├── .gitignore
├── .mvn
└── wrapper
│ ├── MavenWrapperDownloader.java
│ └── maven-wrapper.properties
├── mvnw
├── mvnw.cmd
├── parsex-client
├── pom.xml
└── src
│ ├── main
│ └── java
│ │ └── com
│ │ └── sucx
│ │ ├── App.java
│ │ ├── controller
│ │ └── HelloController.java
│ │ └── util
│ │ └── HttpUtils.java
│ └── test
│ └── java
│ └── com
│ └── sucx
│ ├── AppTest.java
│ └── util
│ ├── HeraTest.java
│ └── PrestoHttpTest.java
├── parsex-common
├── pom.xml
└── src
│ └── main
│ └── java
│ └── com
│ └── sucx
│ └── common
│ ├── Constants.java
│ ├── enums
│ ├── OperatorType.java
│ └── SqlEnum.java
│ ├── exceptions
│ └── SqlParseException.java
│ ├── model
│ ├── Result.java
│ └── TableInfo.java
│ └── util
│ ├── Pair.java
│ └── StringUtils.java
├── parsex-core
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── com
│ │ │ └── sucx
│ │ │ └── core
│ │ │ ├── AbstractSqlParse.java
│ │ │ ├── HiveSQLParse.java
│ │ │ ├── PrestoSqlParse.java
│ │ │ ├── SqlParse.java
│ │ │ └── SqlParseUtil.java
│ ├── resources
│ │ └── log4j2.xml
│ └── scala
│ │ └── com
│ │ └── sucx
│ │ └── core
│ │ └── SparkSQLParse.scala
│ └── test
│ └── java
│ ├── com
│ └── sucx
│ │ └── core
│ │ └── parse
│ │ └── presto
│ │ └── SqlBaseParserTest.java
│ └── org
│ └── apache
│ └── spark
│ └── sql
│ └── catalyst
│ └── expressions
│ └── parse.java
├── pom.xml
└── readme.md
/.gitignore:
--------------------------------------------------------------------------------
1 | HELP.md
2 | target/
3 | !.mvn/wrapper/maven-wrapper.jar
4 | !**/src/main/**
5 | !**/src/test/**
6 |
7 |
8 | *SqlParseTest*
9 | ### STS ###
10 | .apt_generated
11 | .classpath
12 | .factorypath
13 | .project
14 | .settings
15 | .springBeans
16 | .sts4-cache
17 |
18 | ### IntelliJ IDEA ###
19 | .idea
20 | *.iws
21 | *.iml
22 | *.ipr
23 |
24 | ### NetBeans ###
25 | /nbproject/private/
26 | /nbbuild/
27 | /dist/
28 | /nbdist/
29 | /.nb-gradle/
30 | build/
31 |
32 | ### VS Code ###
33 | .vscode/
34 | ### Java template
35 | # Compiled class file
36 | *.class
37 |
38 | # Log file
39 | *.log
40 |
41 | # BlueJ files
42 | *.ctxt
43 |
44 | # Mobile Tools for Java (J2ME)
45 | .mtj.tmp/
46 |
47 | # Package Files #
48 | *.jar
49 | *.war
50 | *.nar
51 | *.ear
52 | *.zip
53 | *.tar.gz
54 | *.rar
55 |
56 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
57 | hs_err_pid*
58 |
59 |
--------------------------------------------------------------------------------
/.mvn/wrapper/MavenWrapperDownloader.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2007-present the original author or authors.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * https://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | import java.net.*;
18 | import java.io.*;
19 | import java.nio.channels.*;
20 | import java.util.Properties;
21 |
22 | public class MavenWrapperDownloader {
23 |
24 | private static final String WRAPPER_VERSION = "0.5.6";
25 | /**
26 | * Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided.
27 | */
28 | private static final String DEFAULT_DOWNLOAD_URL = "https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/"
29 | + WRAPPER_VERSION + "/maven-wrapper-" + WRAPPER_VERSION + ".jar";
30 |
31 | /**
32 | * Path to the maven-wrapper.properties file, which might contain a downloadUrl property to
33 | * use instead of the default one.
34 | */
35 | private static final String MAVEN_WRAPPER_PROPERTIES_PATH =
36 | ".mvn/wrapper/maven-wrapper.properties";
37 |
38 | /**
39 | * Path where the maven-wrapper.jar will be saved to.
40 | */
41 | private static final String MAVEN_WRAPPER_JAR_PATH =
42 | ".mvn/wrapper/maven-wrapper.jar";
43 |
44 | /**
45 | * Name of the property which should be used to override the default download url for the wrapper.
46 | */
47 | private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl";
48 |
49 | public static void main(String args[]) {
50 | System.out.println("- Downloader started");
51 | File baseDirectory = new File(args[0]);
52 | System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath());
53 |
54 | // If the maven-wrapper.properties exists, read it and check if it contains a custom
55 | // wrapperUrl parameter.
56 | File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH);
57 | String url = DEFAULT_DOWNLOAD_URL;
58 | if (mavenWrapperPropertyFile.exists()) {
59 | FileInputStream mavenWrapperPropertyFileInputStream = null;
60 | try {
61 | mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile);
62 | Properties mavenWrapperProperties = new Properties();
63 | mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream);
64 | url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url);
65 | } catch (IOException e) {
66 | System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'");
67 | } finally {
68 | try {
69 | if (mavenWrapperPropertyFileInputStream != null) {
70 | mavenWrapperPropertyFileInputStream.close();
71 | }
72 | } catch (IOException e) {
73 | // Ignore ...
74 | }
75 | }
76 | }
77 | System.out.println("- Downloading from: " + url);
78 |
79 | File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH);
80 | if (!outputFile.getParentFile().exists()) {
81 | if (!outputFile.getParentFile().mkdirs()) {
82 | System.out.println(
83 | "- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'");
84 | }
85 | }
86 | System.out.println("- Downloading to: " + outputFile.getAbsolutePath());
87 | try {
88 | downloadFileFromURL(url, outputFile);
89 | System.out.println("Done");
90 | System.exit(0);
91 | } catch (Throwable e) {
92 | System.out.println("- Error downloading");
93 | e.printStackTrace();
94 | System.exit(1);
95 | }
96 | }
97 |
98 | private static void downloadFileFromURL(String urlString, File destination) throws Exception {
99 | if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) {
100 | String username = System.getenv("MVNW_USERNAME");
101 | char[] password = System.getenv("MVNW_PASSWORD").toCharArray();
102 | Authenticator.setDefault(new Authenticator() {
103 | @Override
104 | protected PasswordAuthentication getPasswordAuthentication() {
105 | return new PasswordAuthentication(username, password);
106 | }
107 | });
108 | }
109 | URL website = new URL(urlString);
110 | ReadableByteChannel rbc;
111 | rbc = Channels.newChannel(website.openStream());
112 | FileOutputStream fos = new FileOutputStream(destination);
113 | fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
114 | fos.close();
115 | rbc.close();
116 | }
117 |
118 | }
119 |
--------------------------------------------------------------------------------
/.mvn/wrapper/maven-wrapper.properties:
--------------------------------------------------------------------------------
1 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.6.3/apache-maven-3.6.3-bin.zip
2 | wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar
3 |
--------------------------------------------------------------------------------
/mvnw:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # ----------------------------------------------------------------------------
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 | # ----------------------------------------------------------------------------
20 |
21 | # ----------------------------------------------------------------------------
22 | # Maven Start Up Batch script
23 | #
24 | # Required ENV vars:
25 | # ------------------
26 | # JAVA_HOME - location of a JDK home dir
27 | #
28 | # Optional ENV vars
29 | # -----------------
30 | # M2_HOME - location of maven2's installed home dir
31 | # MAVEN_OPTS - parameters passed to the Java VM when running Maven
32 | # e.g. to debug Maven itself, use
33 | # set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
34 | # MAVEN_SKIP_RC - flag to disable loading of mavenrc files
35 | # ----------------------------------------------------------------------------
36 |
37 | if [ -z "$MAVEN_SKIP_RC" ] ; then
38 |
39 | if [ -f /etc/mavenrc ] ; then
40 | . /etc/mavenrc
41 | fi
42 |
43 | if [ -f "$HOME/.mavenrc" ] ; then
44 | . "$HOME/.mavenrc"
45 | fi
46 |
47 | fi
48 |
49 | # OS specific support. $var _must_ be set to either true or false.
50 | cygwin=false;
51 | darwin=false;
52 | mingw=false
53 | case "`uname`" in
54 | CYGWIN*) cygwin=true ;;
55 | MINGW*) mingw=true;;
56 | Darwin*) darwin=true
57 | # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home
58 | # See https://developer.apple.com/library/mac/qa/qa1170/_index.html
59 | if [ -z "$JAVA_HOME" ]; then
60 | if [ -x "/usr/libexec/java_home" ]; then
61 | export JAVA_HOME="`/usr/libexec/java_home`"
62 | else
63 | export JAVA_HOME="/Library/Java/Home"
64 | fi
65 | fi
66 | ;;
67 | esac
68 |
69 | if [ -z "$JAVA_HOME" ] ; then
70 | if [ -r /etc/gentoo-release ] ; then
71 | JAVA_HOME=`java-config --jre-home`
72 | fi
73 | fi
74 |
75 | if [ -z "$M2_HOME" ] ; then
76 | ## resolve links - $0 may be a link to maven's home
77 | PRG="$0"
78 |
79 | # need this for relative symlinks
80 | while [ -h "$PRG" ] ; do
81 | ls=`ls -ld "$PRG"`
82 | link=`expr "$ls" : '.*-> \(.*\)$'`
83 | if expr "$link" : '/.*' > /dev/null; then
84 | PRG="$link"
85 | else
86 | PRG="`dirname "$PRG"`/$link"
87 | fi
88 | done
89 |
90 | saveddir=`pwd`
91 |
92 | M2_HOME=`dirname "$PRG"`/..
93 |
94 | # make it fully qualified
95 | M2_HOME=`cd "$M2_HOME" && pwd`
96 |
97 | cd "$saveddir"
98 | # echo Using m2 at $M2_HOME
99 | fi
100 |
101 | # For Cygwin, ensure paths are in UNIX format before anything is touched
102 | if $cygwin ; then
103 | [ -n "$M2_HOME" ] &&
104 | M2_HOME=`cygpath --unix "$M2_HOME"`
105 | [ -n "$JAVA_HOME" ] &&
106 | JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
107 | [ -n "$CLASSPATH" ] &&
108 | CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
109 | fi
110 |
111 | # For Mingw, ensure paths are in UNIX format before anything is touched
112 | if $mingw ; then
113 | [ -n "$M2_HOME" ] &&
114 | M2_HOME="`(cd "$M2_HOME"; pwd)`"
115 | [ -n "$JAVA_HOME" ] &&
116 | JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`"
117 | fi
118 |
119 | if [ -z "$JAVA_HOME" ]; then
120 | javaExecutable="`which javac`"
121 | if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then
122 | # readlink(1) is not available as standard on Solaris 10.
123 | readLink=`which readlink`
124 | if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then
125 | if $darwin ; then
126 | javaHome="`dirname \"$javaExecutable\"`"
127 | javaExecutable="`cd \"$javaHome\" && pwd -P`/javac"
128 | else
129 | javaExecutable="`readlink -f \"$javaExecutable\"`"
130 | fi
131 | javaHome="`dirname \"$javaExecutable\"`"
132 | javaHome=`expr "$javaHome" : '\(.*\)/bin'`
133 | JAVA_HOME="$javaHome"
134 | export JAVA_HOME
135 | fi
136 | fi
137 | fi
138 |
139 | if [ -z "$JAVACMD" ] ; then
140 | if [ -n "$JAVA_HOME" ] ; then
141 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
142 | # IBM's JDK on AIX uses strange locations for the executables
143 | JAVACMD="$JAVA_HOME/jre/sh/java"
144 | else
145 | JAVACMD="$JAVA_HOME/bin/java"
146 | fi
147 | else
148 | JAVACMD="`which java`"
149 | fi
150 | fi
151 |
152 | if [ ! -x "$JAVACMD" ] ; then
153 | echo "Error: JAVA_HOME is not defined correctly." >&2
154 | echo " We cannot execute $JAVACMD" >&2
155 | exit 1
156 | fi
157 |
158 | if [ -z "$JAVA_HOME" ] ; then
159 | echo "Warning: JAVA_HOME environment variable is not set."
160 | fi
161 |
162 | CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher
163 |
164 | # traverses directory structure from process work directory to filesystem root
165 | # first directory with .mvn subdirectory is considered project base directory
166 | find_maven_basedir() {
167 |
168 | if [ -z "$1" ]
169 | then
170 | echo "Path not specified to find_maven_basedir"
171 | return 1
172 | fi
173 |
174 | basedir="$1"
175 | wdir="$1"
176 | while [ "$wdir" != '/' ] ; do
177 | if [ -d "$wdir"/.mvn ] ; then
178 | basedir=$wdir
179 | break
180 | fi
181 | # workaround for JBEAP-8937 (on Solaris 10/Sparc)
182 | if [ -d "${wdir}" ]; then
183 | wdir=`cd "$wdir/.."; pwd`
184 | fi
185 | # end of workaround
186 | done
187 | echo "${basedir}"
188 | }
189 |
190 | # concatenates all lines of a file
191 | concat_lines() {
192 | if [ -f "$1" ]; then
193 | echo "$(tr -s '\n' ' ' < "$1")"
194 | fi
195 | }
196 |
197 | BASE_DIR=`find_maven_basedir "$(pwd)"`
198 | if [ -z "$BASE_DIR" ]; then
199 | exit 1;
200 | fi
201 |
202 | ##########################################################################################
203 | # Extension to allow automatically downloading the maven-wrapper.jar from Maven-central
204 | # This allows using the maven wrapper in projects that prohibit checking in binary data.
205 | ##########################################################################################
206 | if [ -r "$BASE_DIR/.mvn/wrapper/maven-wrapper.jar" ]; then
207 | if [ "$MVNW_VERBOSE" = true ]; then
208 | echo "Found .mvn/wrapper/maven-wrapper.jar"
209 | fi
210 | else
211 | if [ "$MVNW_VERBOSE" = true ]; then
212 | echo "Couldn't find .mvn/wrapper/maven-wrapper.jar, downloading it ..."
213 | fi
214 | if [ -n "$MVNW_REPOURL" ]; then
215 | jarUrl="$MVNW_REPOURL/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
216 | else
217 | jarUrl="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
218 | fi
219 | while IFS="=" read key value; do
220 | case "$key" in (wrapperUrl) jarUrl="$value"; break ;;
221 | esac
222 | done < "$BASE_DIR/.mvn/wrapper/maven-wrapper.properties"
223 | if [ "$MVNW_VERBOSE" = true ]; then
224 | echo "Downloading from: $jarUrl"
225 | fi
226 | wrapperJarPath="$BASE_DIR/.mvn/wrapper/maven-wrapper.jar"
227 | if $cygwin; then
228 | wrapperJarPath=`cygpath --path --windows "$wrapperJarPath"`
229 | fi
230 |
231 | if command -v wget > /dev/null; then
232 | if [ "$MVNW_VERBOSE" = true ]; then
233 | echo "Found wget ... using wget"
234 | fi
235 | if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
236 | wget "$jarUrl" -O "$wrapperJarPath"
237 | else
238 | wget --http-user=$MVNW_USERNAME --http-password=$MVNW_PASSWORD "$jarUrl" -O "$wrapperJarPath"
239 | fi
240 | elif command -v curl > /dev/null; then
241 | if [ "$MVNW_VERBOSE" = true ]; then
242 | echo "Found curl ... using curl"
243 | fi
244 | if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
245 | curl -o "$wrapperJarPath" "$jarUrl" -f
246 | else
247 | curl --user $MVNW_USERNAME:$MVNW_PASSWORD -o "$wrapperJarPath" "$jarUrl" -f
248 | fi
249 |
250 | else
251 | if [ "$MVNW_VERBOSE" = true ]; then
252 | echo "Falling back to using Java to download"
253 | fi
254 | javaClass="$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.java"
255 | # For Cygwin, switch paths to Windows format before running javac
256 | if $cygwin; then
257 | javaClass=`cygpath --path --windows "$javaClass"`
258 | fi
259 | if [ -e "$javaClass" ]; then
260 | if [ ! -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then
261 | if [ "$MVNW_VERBOSE" = true ]; then
262 | echo " - Compiling MavenWrapperDownloader.java ..."
263 | fi
264 | # Compiling the Java class
265 | ("$JAVA_HOME/bin/javac" "$javaClass")
266 | fi
267 | if [ -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then
268 | # Running the downloader
269 | if [ "$MVNW_VERBOSE" = true ]; then
270 | echo " - Running MavenWrapperDownloader.java ..."
271 | fi
272 | ("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$MAVEN_PROJECTBASEDIR")
273 | fi
274 | fi
275 | fi
276 | fi
277 | ##########################################################################################
278 | # End of extension
279 | ##########################################################################################
280 |
281 | export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"}
282 | if [ "$MVNW_VERBOSE" = true ]; then
283 | echo $MAVEN_PROJECTBASEDIR
284 | fi
285 | MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS"
286 |
287 | # For Cygwin, switch paths to Windows format before running java
288 | if $cygwin; then
289 | [ -n "$M2_HOME" ] &&
290 | M2_HOME=`cygpath --path --windows "$M2_HOME"`
291 | [ -n "$JAVA_HOME" ] &&
292 | JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"`
293 | [ -n "$CLASSPATH" ] &&
294 | CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
295 | [ -n "$MAVEN_PROJECTBASEDIR" ] &&
296 | MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"`
297 | fi
298 |
299 | # Provide a "standardized" way to retrieve the CLI args that will
300 | # work with both Windows and non-Windows executions.
301 | MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $@"
302 | export MAVEN_CMD_LINE_ARGS
303 |
304 | WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
305 |
306 | exec "$JAVACMD" \
307 | $MAVEN_OPTS \
308 | -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \
309 | "-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \
310 | ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@"
311 |
--------------------------------------------------------------------------------
/mvnw.cmd:
--------------------------------------------------------------------------------
1 | @REM ----------------------------------------------------------------------------
2 | @REM Licensed to the Apache Software Foundation (ASF) under one
3 | @REM or more contributor license agreements. See the NOTICE file
4 | @REM distributed with this work for additional information
5 | @REM regarding copyright ownership. The ASF licenses this file
6 | @REM to you under the Apache License, Version 2.0 (the
7 | @REM "License"); you may not use this file except in compliance
8 | @REM with the License. You may obtain a copy of the License at
9 | @REM
10 | @REM https://www.apache.org/licenses/LICENSE-2.0
11 | @REM
12 | @REM Unless required by applicable law or agreed to in writing,
13 | @REM software distributed under the License is distributed on an
14 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | @REM KIND, either express or implied. See the License for the
16 | @REM specific language governing permissions and limitations
17 | @REM under the License.
18 | @REM ----------------------------------------------------------------------------
19 |
20 | @REM ----------------------------------------------------------------------------
21 | @REM Maven Start Up Batch script
22 | @REM
23 | @REM Required ENV vars:
24 | @REM JAVA_HOME - location of a JDK home dir
25 | @REM
26 | @REM Optional ENV vars
27 | @REM M2_HOME - location of maven2's installed home dir
28 | @REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands
29 | @REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a keystroke before ending
30 | @REM MAVEN_OPTS - parameters passed to the Java VM when running Maven
31 | @REM e.g. to debug Maven itself, use
32 | @REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
33 | @REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files
34 | @REM ----------------------------------------------------------------------------
35 |
36 | @REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on'
37 | @echo off
38 | @REM set title of command window
39 | title %0
40 | @REM enable echoing by setting MAVEN_BATCH_ECHO to 'on'
41 | @if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO%
42 |
43 | @REM set %HOME% to equivalent of $HOME
44 | if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%")
45 |
46 | @REM Execute a user defined script before this one
47 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre
48 | @REM check for pre script, once with legacy .bat ending and once with .cmd ending
49 | if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat"
50 | if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd"
51 | :skipRcPre
52 |
53 | @setlocal
54 |
55 | set ERROR_CODE=0
56 |
57 | @REM To isolate internal variables from possible post scripts, we use another setlocal
58 | @setlocal
59 |
60 | @REM ==== START VALIDATION ====
61 | if not "%JAVA_HOME%" == "" goto OkJHome
62 |
63 | echo.
64 | echo Error: JAVA_HOME not found in your environment. >&2
65 | echo Please set the JAVA_HOME variable in your environment to match the >&2
66 | echo location of your Java installation. >&2
67 | echo.
68 | goto error
69 |
70 | :OkJHome
71 | if exist "%JAVA_HOME%\bin\java.exe" goto init
72 |
73 | echo.
74 | echo Error: JAVA_HOME is set to an invalid directory. >&2
75 | echo JAVA_HOME = "%JAVA_HOME%" >&2
76 | echo Please set the JAVA_HOME variable in your environment to match the >&2
77 | echo location of your Java installation. >&2
78 | echo.
79 | goto error
80 |
81 | @REM ==== END VALIDATION ====
82 |
83 | :init
84 |
85 | @REM Find the project base dir, i.e. the directory that contains the folder ".mvn".
86 | @REM Fallback to current working directory if not found.
87 |
88 | set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR%
89 | IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir
90 |
91 | set EXEC_DIR=%CD%
92 | set WDIR=%EXEC_DIR%
93 | :findBaseDir
94 | IF EXIST "%WDIR%"\.mvn goto baseDirFound
95 | cd ..
96 | IF "%WDIR%"=="%CD%" goto baseDirNotFound
97 | set WDIR=%CD%
98 | goto findBaseDir
99 |
100 | :baseDirFound
101 | set MAVEN_PROJECTBASEDIR=%WDIR%
102 | cd "%EXEC_DIR%"
103 | goto endDetectBaseDir
104 |
105 | :baseDirNotFound
106 | set MAVEN_PROJECTBASEDIR=%EXEC_DIR%
107 | cd "%EXEC_DIR%"
108 |
109 | :endDetectBaseDir
110 |
111 | IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig
112 |
113 | @setlocal EnableExtensions EnableDelayedExpansion
114 | for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a
115 | @endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS%
116 |
117 | :endReadAdditionalConfig
118 |
119 | SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe"
120 | set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar"
121 | set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
122 |
123 | set DOWNLOAD_URL="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
124 |
125 | FOR /F "tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO (
126 | IF "%%A"=="wrapperUrl" SET DOWNLOAD_URL=%%B
127 | )
128 |
129 | @REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central
130 | @REM This allows using the maven wrapper in projects that prohibit checking in binary data.
131 | if exist %WRAPPER_JAR% (
132 | if "%MVNW_VERBOSE%" == "true" (
133 | echo Found %WRAPPER_JAR%
134 | )
135 | ) else (
136 | if not "%MVNW_REPOURL%" == "" (
137 | SET DOWNLOAD_URL="%MVNW_REPOURL%/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
138 | )
139 | if "%MVNW_VERBOSE%" == "true" (
140 | echo Couldn't find %WRAPPER_JAR%, downloading it ...
141 | echo Downloading from: %DOWNLOAD_URL%
142 | )
143 |
144 | powershell -Command "&{"^
145 | "$webclient = new-object System.Net.WebClient;"^
146 | "if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^
147 | "$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^
148 | "}"^
149 | "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%DOWNLOAD_URL%', '%WRAPPER_JAR%')"^
150 | "}"
151 | if "%MVNW_VERBOSE%" == "true" (
152 | echo Finished downloading %WRAPPER_JAR%
153 | )
154 | )
155 | @REM End of extension
156 |
157 | @REM Provide a "standardized" way to retrieve the CLI args that will
158 | @REM work with both Windows and non-Windows executions.
159 | set MAVEN_CMD_LINE_ARGS=%*
160 |
161 | %MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %*
162 | if ERRORLEVEL 1 goto error
163 | goto end
164 |
165 | :error
166 | set ERROR_CODE=1
167 |
168 | :end
169 | @endlocal & set ERROR_CODE=%ERROR_CODE%
170 |
171 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost
172 | @REM check for post script, once with legacy .bat ending and once with .cmd ending
173 | if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat"
174 | if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd"
175 | :skipRcPost
176 |
177 | @REM pause the script if MAVEN_BATCH_PAUSE is set to 'on'
178 | if "%MAVEN_BATCH_PAUSE%" == "on" pause
179 |
180 | if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE%
181 |
182 | exit /B %ERROR_CODE%
183 |
--------------------------------------------------------------------------------
/parsex-client/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 | parsex
7 | com.sucx.bigdata
8 | 0.0.1-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | parsex-client
13 | 0.0.1-SNAPSHOT
14 |
15 |
16 |
17 | org.apache.maven.plugins
18 | maven-compiler-plugin
19 |
20 | 8
21 | 8
22 |
23 |
24 |
25 |
26 |
27 | parsex-client
28 |
29 |
30 |
31 |
32 |
33 | org.datanucleus
34 | javax.jdo
35 | 3.2.0-m3
36 |
37 |
38 |
39 |
40 | org.datanucleus
41 | datanucleus-rdbms
42 | 4.1.19
43 |
44 |
45 |
46 | mysql
47 | mysql-connector-java
48 | 8.0.19
49 |
50 |
51 |
52 |
53 | org.datanucleus
54 | datanucleus-api-jdo
55 | 4.2.4
56 |
57 |
58 |
59 | org.datanucleus
60 | datanucleus-core
61 | 4.1.17
62 |
63 |
64 |
65 | org.jooq
66 | joor-java-8
67 |
68 |
69 | junit
70 | junit
71 | 4.11
72 | test
73 |
74 |
75 | com.alibaba
76 | fastjson
77 |
78 |
79 | org.apache.httpcomponents
80 | httpclient
81 |
82 |
83 |
84 | com.sucx.bigdata
85 | parsex-core
86 | 0.0.1-SNAPSHOT
87 |
88 |
89 |
90 |
91 |
92 |
93 |
--------------------------------------------------------------------------------
/parsex-client/src/main/java/com/sucx/App.java:
--------------------------------------------------------------------------------
1 | package com.sucx;
2 |
3 | /**
4 | * Hello world!
5 | */
6 | public class App {
7 | public static void main(String[] args) {
8 | System.out.println("Hello World!");
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/parsex-client/src/main/java/com/sucx/controller/HelloController.java:
--------------------------------------------------------------------------------
1 | package com.sucx.controller;
2 |
3 | /**
4 | * desc:
5 | *
6 | * @author scx
7 | * @create 2020/02/26
8 | */
9 |
10 | public class HelloController {
11 |
12 | }
13 |
--------------------------------------------------------------------------------
/parsex-client/src/main/java/com/sucx/util/HttpUtils.java:
--------------------------------------------------------------------------------
1 | package com.sucx.util;
2 |
3 | import org.apache.http.HttpResponse;
4 | import org.apache.http.client.HttpClient;
5 | import org.apache.http.client.methods.HttpGet;
6 | import org.apache.http.client.methods.HttpPost;
7 | import org.apache.http.client.methods.HttpUriRequest;
8 | import org.apache.http.conn.ssl.NoopHostnameVerifier;
9 | import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
10 | import org.apache.http.conn.ssl.TrustSelfSignedStrategy;
11 | import org.apache.http.impl.client.HttpClients;
12 | import org.apache.http.message.BasicHeader;
13 | import org.apache.http.ssl.SSLContexts;
14 | import org.apache.http.util.EntityUtils;
15 |
16 | import java.io.IOException;
17 | import java.net.MalformedURLException;
18 | import java.net.URI;
19 | import java.net.URISyntaxException;
20 | import java.net.URL;
21 | import java.security.KeyManagementException;
22 | import java.security.KeyStoreException;
23 | import java.security.NoSuchAlgorithmException;
24 | import java.util.List;
25 |
26 | /**
27 | * desc:
28 | *
29 | * @author scx
30 | * @create 2020/02/26
31 | */
32 | public class HttpUtils {
33 |
34 |
35 | public static String doGet(String urlStr, List headers) {
36 | return doExecute(urlStr, new HttpGet(check(urlStr)), headers);
37 | }
38 |
39 | public static String doPost(String urlStr, List headers) {
40 | return doExecute(urlStr, new HttpPost(check(urlStr)), headers);
41 | }
42 |
43 | private static String doExecute(String url, HttpUriRequest request, List headers) {
44 | if (headers != null && headers.size() > 0) {
45 | for (BasicHeader header : headers) {
46 | request.setHeader(header);
47 | }
48 | }
49 | SSLConnectionSocketFactory scsf = null;
50 | try {
51 | scsf = new SSLConnectionSocketFactory(
52 | SSLContexts.custom().loadTrustMaterial(null, new TrustSelfSignedStrategy()).build(),
53 | NoopHostnameVerifier.INSTANCE);
54 | } catch (NoSuchAlgorithmException | KeyManagementException | KeyStoreException e) {
55 | e.printStackTrace();
56 | }
57 | try {
58 | HttpClient httpClient = HttpClients.custom().setSSLSocketFactory(scsf).build();
59 | HttpResponse response = httpClient.execute(request);
60 | int code = response.getStatusLine().getStatusCode();
61 | if (code == 200) {
62 | return EntityUtils.toString(response.getEntity());
63 | } else {
64 | throw new RuntimeException(url + " http请求异常:" + response.getStatusLine().getStatusCode() + response.getEntity().toString());
65 | }
66 | } catch (IOException e) {
67 | throw new RuntimeException("发送http请求失败", e);
68 | }
69 | }
70 |
71 |
72 | private static URI check(String urlStr) {
73 | URI uri = null;
74 | try {
75 | URL url = new URL(urlStr);
76 | uri = new URI(url.getProtocol(), url.getHost() + ":" + url.getPort(), url.getPath(), url.getQuery(), null);
77 | return uri;
78 | } catch (URISyntaxException | MalformedURLException e) {
79 | return null;
80 | }
81 |
82 | }
83 |
84 |
85 | }
86 |
--------------------------------------------------------------------------------
/parsex-client/src/test/java/com/sucx/AppTest.java:
--------------------------------------------------------------------------------
1 | package com.sucx;
2 |
3 | import org.apache.hadoop.hive.conf.HiveConf;
4 | import org.apache.hadoop.hive.metastore.api.Schema;
5 | import org.apache.hadoop.hive.ql.Context;
6 | import org.apache.hadoop.hive.ql.Driver;
7 | import org.apache.hadoop.hive.ql.QueryPlan;
8 | import org.apache.hadoop.hive.ql.QueryState;
9 | import org.apache.hadoop.hive.ql.hooks.HookContext;
10 | import org.apache.hadoop.hive.ql.hooks.LineageLogger;
11 | import org.apache.hadoop.hive.ql.parse.ASTNode;
12 | import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
13 | import org.apache.hadoop.hive.ql.parse.ParseUtils;
14 | import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory;
15 | import org.apache.hadoop.hive.ql.session.SessionState;
16 | import org.apache.hadoop.security.UserGroupInformation;
17 | import org.joor.Reflect;
18 | import org.junit.Test;
19 |
20 | import java.util.HashMap;
21 |
22 | /**
23 | * Unit test for simple App.
24 | */
25 | public class AppTest {
26 | /**
27 | * Rigorous Test :-)
28 | */
29 | @Test
30 | public void shouldAnswerWithTrue() throws Exception {
31 |
32 | LineageLogger logger = new LineageLogger();
33 |
34 | HiveConf hiveConf = new HiveConf();
35 |
36 |
37 | hiveConf.set("javax.jdo.option.ConnectionURL", "jdbc:mysql://localhost/metastore", "hive-conf.xml");
38 | hiveConf.set("javax.jdo.option.ConnectionDriverName", "com.mysql.jdbc.Driver", "hive-conf.xml");
39 | hiveConf.set("javax.jdo.option.ConnectionUserName", "root", "hive-conf.xml");
40 | hiveConf.set("javax.jdo.option.ConnectionPassword", "moye", "hive-conf.xml");
41 | hiveConf.set("fs.defaultFS", "hdfs://127.0.0.1:8020", "hdfs-site.xml");
42 | hiveConf.set("_hive.hdfs.session.path", "hdfs://127.0.0.1:8020/tmp", "hive-conf.xml");
43 | hiveConf.set("_hive.local.session.path", "hdfs://127.0.0.1:8020/tmp", "hive-conf.xml");
44 | hiveConf.set("hive.in.test", "true", "hive-conf.xml");
45 |
46 |
47 | String sql = "insert overwrite table sucx.test select * from sucx.test2";
48 | QueryState queryState = new QueryState(hiveConf);
49 |
50 | Context context = new Context(hiveConf);
51 |
52 | SessionState sessionState = new SessionState(hiveConf);
53 |
54 | SessionState.setCurrentSessionState(sessionState);
55 |
56 | ASTNode astNode = ParseUtils.parse(sql, context);
57 |
58 | BaseSemanticAnalyzer analyzer = SemanticAnalyzerFactory.get(queryState, astNode);
59 |
60 | analyzer.analyze(astNode, context);
61 |
62 | Schema schema = Reflect.onClass(Driver.class).call("getSchema", analyzer, hiveConf).get();
63 |
64 | QueryPlan queryPlan = new QueryPlan(sql, analyzer, 0L, null, queryState.getHiveOperation(), schema);
65 |
66 |
67 |
68 | HookContext hookContext = new HookContext(queryPlan, queryState,
69 | new HashMap<>(), "sucx", "",
70 | "", "", "", "",
71 | true, null);
72 |
73 | hookContext.setUgi(UserGroupInformation.getCurrentUser());
74 | logger.run(hookContext);
75 |
76 |
77 | }
78 |
79 | }
80 |
--------------------------------------------------------------------------------
/parsex-client/src/test/java/com/sucx/util/HeraTest.java:
--------------------------------------------------------------------------------
1 | package com.sucx.util;
2 |
3 | import com.alibaba.fastjson.JSONObject;
4 | import org.junit.Test;
5 |
6 | import java.io.BufferedWriter;
7 | import java.io.File;
8 | import java.io.FileWriter;
9 | import java.io.IOException;
10 | import java.util.Random;
11 |
12 | /**
13 | * desc:
14 | *
15 | * @author scx
16 | * @create 2020/03/02
17 | */
18 | public class HeraTest {
19 |
20 |
21 | @Test
22 | public void buildJson() throws IOException {
23 | //创建测试文件
24 | File file = new File("/Users/scx/Desktop/reportLog.txt");
25 | if (!file.exists()) {
26 | if (file.createNewFile()) {
27 | throw new IOException("新建文件失败:" + file.getAbsolutePath());
28 | }
29 | }
30 | BufferedWriter writer = new BufferedWriter(new FileWriter(file));
31 | // 100W的设备数量
32 | int devSize = 10000 * 100;
33 | // 上报类型
34 | String[] typeArr = {"OFFLINE", "ONLINE", "RESET", "ACTIVE"};
35 | // 10天的日期
36 | String[] dateArr =
37 | {"2020-02-01", "2020-02-02", "2020-02-03", "2020-02-04", "2020-02-05",
38 | "2020-02-06", "2020-02-07", "2020-02-08", "2020-02-09", "2020-02-10"};
39 | Random random = new Random(99999);
40 | String type;
41 | for (String date : dateArr) {
42 | int activeCount = 0;
43 | for (int i = 1; i <= devSize; i++) {
44 | JSONObject json = new JSONObject();
45 | type = typeArr[random.nextInt(typeArr.length)];
46 | if ("ONLINE".equals(type) || "ACTIVE".equals(type)) {
47 | activeCount++;
48 | }
49 | json.put("id", i);
50 | //随机赋予一种上报类型
51 | json.put("type", type);
52 | json.put("date", date);
53 | writer.write(json.toJSONString());
54 | writer.newLine();
55 | }
56 | System.out.println(String.format("日志:%s,活跃数:%d", date, activeCount));
57 | }
58 | writer.flush();
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/parsex-client/src/test/java/com/sucx/util/PrestoHttpTest.java:
--------------------------------------------------------------------------------
1 | package com.sucx.util;
2 |
3 | import com.alibaba.fastjson.JSONArray;
4 | import com.alibaba.fastjson.JSONObject;
5 | import com.facebook.presto.sql.parser.ParsingException;
6 | import com.sucx.util.HttpUtils;
7 | import com.sucx.core.PrestoSqlParse;
8 | import com.sucx.common.exceptions.SqlParseException;
9 | import com.sucx.common.model.Result;
10 | import com.sucx.core.SqlParseUtil;
11 | import org.apache.http.message.BasicHeader;
12 | import org.junit.Test;
13 |
14 | import java.util.ArrayList;
15 | import java.util.List;
16 | import java.util.stream.Collectors;
17 |
18 | /**
19 | * desc:
20 | *
21 | * @author scx
22 | * @create 2020/03/10
23 | */
24 | public class PrestoHttpTest {
25 | ArrayList headers = new ArrayList<>();
26 |
27 | {
28 | headers.add(new BasicHeader("cookie",
29 | "_ga=GA1.2.1045647750.1571648344; 7ce0ff06556c05363a176b03dfdd5680=1160; a608ea7c4cbd1919ce039822a2e5d753=01160; cd1f6c4c522c03e21ad83ee2d7b0c515=%E8%8B%8F%E6%89%BF%E7%A5%A5%EF%BC%88%E8%8E%AB%E9%82%AA%EF%BC%89; e255ad9b8262a02d28bca48235a96357=1346; SSO_USER_TOKEN=p_19daf9e8b43332801f3d479b164cecfb"
30 | ));
31 | }
32 |
33 | @Test
34 | public void get() throws SqlParseException {
35 |
36 | String s = HttpUtils.doGet("https://prestonew-presto.bigdata-cn.xx-inc.top:7799/v1/query", headers);
37 |
38 | JSONArray array = JSONArray.parseArray(s);
39 |
40 | int size = array.size();
41 | System.out.println(size);
42 |
43 | PrestoSqlParse sqlParse = new PrestoSqlParse();
44 | for (int i = 0; i < size; i++) {
45 | JSONObject object = array.getJSONObject(i);
46 | String query = object.getString("query");
47 | System.out.println(query);
48 |
49 | Result parse = null;
50 | try {
51 | parse = sqlParse.parse(query);
52 | } catch (SqlParseException e) {
53 | if (e.getCause() instanceof ParsingException) {
54 | System.out.println("sql解析异常:" + e.getMessage());
55 | } else {
56 | throw new SqlParseException(e);
57 | }
58 | }
59 | SqlParseUtil.print(parse);
60 | }
61 | }
62 |
63 | public void test() {
64 |
65 |
66 | ArrayList test = new ArrayList<>();
67 |
68 |
69 | List collect = test.stream().map(col -> {
70 | return col + "1";
71 | }).collect(Collectors.toList());
72 |
73 | for (String s : collect) {
74 | System.out.println(s);
75 | }
76 | }
77 |
78 | @Test
79 | public void replace() {
80 |
81 | String text = "10.1.1 ";
82 |
83 |
84 | System.out.println(text.replaceAll("(.*)", ""));
85 |
86 | }
87 |
88 | }
89 |
--------------------------------------------------------------------------------
/parsex-common/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | parsex
7 | com.sucx.bigdata
8 | 0.0.1-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | parsex-common
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/parsex-common/src/main/java/com/sucx/common/Constants.java:
--------------------------------------------------------------------------------
1 | package com.sucx.common;
2 |
3 | /**
4 | * desc:
5 | *
6 | * @author scx
7 | * @create 2020/03/02
8 | */
9 | public class Constants {
10 |
11 | public static final String SEMICOLON = ";";
12 | public static final String POINT = ".";
13 |
14 |
15 | }
16 |
--------------------------------------------------------------------------------
/parsex-common/src/main/java/com/sucx/common/enums/OperatorType.java:
--------------------------------------------------------------------------------
1 | package com.sucx.common.enums;
2 |
3 | public enum OperatorType {
4 |
5 | READ, WRITE, CREATE, ALTER,DROP
6 |
7 | }
8 |
--------------------------------------------------------------------------------
/parsex-common/src/main/java/com/sucx/common/enums/SqlEnum.java:
--------------------------------------------------------------------------------
1 | package com.sucx.common.enums;
2 |
3 | public enum SqlEnum {
4 |
5 | HIVE, SPARK, PRESTO
6 | }
7 |
--------------------------------------------------------------------------------
/parsex-common/src/main/java/com/sucx/common/exceptions/SqlParseException.java:
--------------------------------------------------------------------------------
1 | package com.sucx.common.exceptions;
2 |
3 | /**
4 | * desc:
5 | *
6 | * @author scx
7 | * @create 2020/02/29
8 | */
9 | public class SqlParseException extends Exception {
10 |
11 | public SqlParseException(Exception e) {
12 | super(e);
13 | }
14 |
15 | public SqlParseException(String e) {
16 | super(e);
17 | }
18 | public SqlParseException(String message, Throwable cause) {
19 | super(message, cause);
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/parsex-common/src/main/java/com/sucx/common/model/Result.java:
--------------------------------------------------------------------------------
1 | package com.sucx.common.model;
2 |
3 | import java.util.Set;
4 |
5 | /**
6 | * desc:
7 | *
8 | * @author scx
9 | * @create 2020/02/26
10 | */
11 | public class Result {
12 |
13 | /**
14 | * 输入表
15 | */
16 | private Set inputSets;
17 | /**
18 | * 输出表
19 | */
20 | private Set outputSets;
21 |
22 | /**
23 | * 临时表
24 | */
25 | private Set tempSets;
26 |
27 | /**
28 | * 是否包含join操作
29 | */
30 | private boolean join;
31 |
32 | public Result(){}
33 |
34 | public Result(Set inputSets, Set outputSets, Set tempSets) {
35 | this(inputSets, outputSets, tempSets, false);
36 | }
37 |
38 |
39 | public Result(Set inputSets, Set outputSets, Set tempSets, boolean join) {
40 | this.inputSets = inputSets;
41 | this.outputSets = outputSets;
42 | this.tempSets = tempSets;
43 | this.join = join;
44 | }
45 |
46 |
47 | @Override
48 | public String toString() {
49 | StringBuilder inputStr = new StringBuilder("*************************\n输入表为:\n");
50 | StringBuilder outputStr = new StringBuilder("输出表为:\n");
51 | StringBuilder tempStr = new StringBuilder("临时表为:\n");
52 |
53 | inputSets.forEach(input -> inputStr.append(input.toString()).append(" ").append("\n"));
54 | outputSets.forEach(input -> outputStr.append(input.toString()).append(" "));
55 | tempSets.forEach(input -> tempStr.append(input.toString()).append(" "));
56 |
57 | return inputStr.append(outputStr).append(tempStr).toString();
58 | }
59 |
60 |
61 | public boolean isJoin() {
62 | return join;
63 | }
64 |
65 | public Set getTempSets() {
66 | return tempSets;
67 | }
68 |
69 | public Set getInputSets() {
70 | return inputSets;
71 | }
72 |
73 | public void setInputSets(Set inputSets) {
74 | this.inputSets = inputSets;
75 | }
76 |
77 | public Set getOutputSets() {
78 | return outputSets;
79 | }
80 |
81 | public void setOutputSets(Set outputSets) {
82 | this.outputSets = outputSets;
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/parsex-common/src/main/java/com/sucx/common/model/TableInfo.java:
--------------------------------------------------------------------------------
1 | package com.sucx.common.model;
2 |
3 | import com.sucx.common.Constants;
4 | import com.sucx.common.enums.OperatorType;
5 | import com.sucx.common.util.Pair;
6 | import com.sucx.common.util.StringUtils;
7 |
8 | import java.util.HashSet;
9 | import java.util.Set;
10 | import java.util.stream.Collectors;
11 |
12 | /**
13 | * desc:
14 | *
15 | * @author scx
16 | * @create 2020/02/26
17 | */
18 | public class TableInfo {
19 |
20 | /**
21 | * 表名
22 | */
23 | private String name;
24 |
25 | /**
26 | * 库名
27 | */
28 | private String dbName;
29 |
30 | private OperatorType type;
31 |
32 | private Set columns;
33 |
34 | private String limit;
35 |
36 | private boolean selectAll;
37 |
38 | private boolean isDb;
39 |
40 |
41 | public TableInfo(){}
42 |
43 |
44 | public TableInfo(String dbName, OperatorType type) {
45 | this.dbName = dbName;
46 | this.type = type;
47 | this.isDb = true;
48 | }
49 |
50 | public TableInfo(String name, String dbName, OperatorType type, HashSet columns) {
51 | this.name = name;
52 | this.dbName = dbName;
53 | this.type = type;
54 | this.columns = new HashSet<>(columns);
55 | columns.clear();
56 | optimizeColumn();
57 | }
58 |
59 | public TableInfo(String dbAndTableName, OperatorType type, String defaultDb, HashSet columns) {
60 | if (dbAndTableName.contains(Constants.POINT)) {
61 | Pair pair = StringUtils.getPointPair(dbAndTableName);
62 | this.name = pair.getRight();
63 | this.dbName = pair.getLeft();
64 | } else {
65 | this.name = dbAndTableName;
66 | this.dbName = defaultDb;
67 | }
68 | this.columns = new HashSet<>(columns);
69 | this.type = type;
70 | columns.clear();
71 | optimizeColumn();
72 | }
73 |
74 |
75 | public Set getColumns() {
76 | return columns;
77 | }
78 |
79 | private void optimizeColumn() {
80 | String dbAndName = this.dbName + Constants.POINT + this.name;
81 | this.columns = this.columns.stream().map(column -> {
82 | if (!selectAll && column.endsWith("*")) {
83 | selectAll = true;
84 | }
85 | if (column.contains(Constants.POINT)) {
86 | Pair pair = StringUtils.getLastPointPair(column);
87 | if (pair.getLeft().equals(dbAndName)) {
88 | return pair.getRight();
89 | }
90 | }
91 | return column;
92 | }).collect(Collectors.toSet());
93 | }
94 |
95 |
96 | public boolean isDb() {
97 | return isDb;
98 | }
99 |
100 | public OperatorType getType() {
101 | return type;
102 | }
103 |
104 | public String getName() {
105 | return name;
106 | }
107 |
108 |
109 | public String getDbName() {
110 | return dbName;
111 | }
112 |
113 | public String getLimit() {
114 | return limit;
115 | }
116 |
117 | public void setLimit(String limit) {
118 | this.limit = limit;
119 | }
120 |
121 |
122 | public boolean isSelectAll() {
123 | return selectAll;
124 | }
125 |
126 | @Override
127 | public String toString() {
128 | StringBuilder str = new StringBuilder();
129 | if (isDb) {
130 | str.append("[库]").append(dbName).append("[").append(type.name()).append("]");
131 | } else {
132 | str.append("[表]").append(dbName).append(Constants.POINT).append(name).append("[").append(type.name()).append("]");
133 | }
134 |
135 | if (this.columns != null && this.columns.size() > 0) {
136 | str.append(" column[ ");
137 | this.columns.forEach(columns -> str.append(columns).append(" "));
138 | str.append("]");
139 | }
140 | if (limit != null) {
141 | str.append(" limit[ ").append(limit).append(" ]");
142 | }
143 | return str.toString();
144 | }
145 |
146 |
147 | @Override
148 | public boolean equals(Object obj) {
149 | if (!(obj instanceof TableInfo)) {
150 | return false;
151 | }
152 |
153 | TableInfo info = (TableInfo) obj;
154 | return this.dbName.equals(info.dbName) && this.name.equals(info.name) && this.type == info.type;
155 | }
156 |
157 | @Override
158 | public int hashCode() {
159 | if (this.name != null) {
160 | return this.dbName.hashCode() + this.name.hashCode() + this.type.hashCode();
161 | }
162 | return this.dbName.hashCode() + this.type.hashCode();
163 | }
164 | }
165 |
--------------------------------------------------------------------------------
/parsex-common/src/main/java/com/sucx/common/util/Pair.java:
--------------------------------------------------------------------------------
1 | package com.sucx.common.util;
2 |
3 | /**
4 | * desc:
5 | *
6 | * @author scx
7 | * @create 2020/06/04
8 | */
9 | public class Pair {
10 |
11 |
12 | private L left;
13 |
14 | private R right;
15 |
16 | private Pair(L left, R right) {
17 | this.left = left;
18 | this.right = right;
19 | }
20 |
21 | public static Pair of(L left, R right) {
22 | return new Pair<>(left, right);
23 | }
24 |
25 | public L getLeft() {
26 | return left;
27 | }
28 |
29 | public R getRight(){
30 | return right;
31 | }
32 |
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/parsex-common/src/main/java/com/sucx/common/util/StringUtils.java:
--------------------------------------------------------------------------------
1 | package com.sucx.common.util;
2 |
3 |
4 | import com.sucx.common.Constants;
5 |
6 | /**
7 | * desc:
8 | *
9 | * @author scx
10 | * @create 2020/03/12
11 | */
12 | public class StringUtils {
13 |
14 |
15 | public static Pair getPointPair(String content) {
16 | return getPair(Constants.POINT, content, false);
17 | }
18 |
19 | public static Pair getLastPointPair(String content) {
20 | return getPair(Constants.POINT, content, true);
21 | }
22 |
23 |
24 | private static Pair getPair(String split, String content, boolean dir) {
25 | int index;
26 | if (dir) {
27 | index = content.lastIndexOf(Constants.POINT);
28 | } else {
29 | index = content.indexOf(Constants.POINT);
30 | }
31 | if (index == -1) {
32 | throw new RuntimeException("not contain . character:" + content);
33 | }
34 | String left = content.substring(0, index);
35 | String right = content.substring(index + 1);
36 | return Pair.of(left, right);
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/parsex-core/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | parsex
7 | com.sucx.bigdata
8 | 0.0.1-SNAPSHOT
9 |
10 | 4.0.0
11 | 0.0.1-SNAPSHOT
12 |
13 | parsex-core
14 |
15 |
16 | junit
17 | junit
18 | test
19 |
20 |
21 |
22 | com.sucx.bigdata
23 | parsex-common
24 | 0.0.1-SNAPSHOT
25 |
26 |
27 | com.facebook.presto
28 | presto-parser
29 |
30 |
31 | org.apache.hive
32 | hive-exec
33 |
34 |
35 | org.eclipse.jetty.orbit
36 | javax.servlet
37 |
38 |
39 | org.apache.hive
40 | hive-service-rpc
41 |
42 |
43 |
44 |
45 | org.scala-lang
46 | scala-library
47 |
48 |
49 |
50 | org.antlr
51 | antlr4
52 |
53 |
54 | org.apache.spark
55 | spark-catalyst_${scala.binary.version}
56 |
57 |
58 | javax.servlet
59 | javax.servlet-api
60 |
61 |
62 |
63 |
64 |
65 | org.apache.spark
66 | spark-sql_${scala.binary.version}
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 | org.apache.maven.plugins
76 | maven-shade-plugin
77 | 3.0.0
78 |
79 | false
80 | false
81 | true
82 | false
83 | false
84 |
85 |
86 |
87 | package
88 |
89 | shade
90 |
91 |
92 |
93 |
94 | org.apache.hive:hive-exec
95 |
96 |
97 |
98 |
99 | *:*
100 |
101 | META-INF/*.SF
102 | META-INF/*.DSA
103 | META-INF/*.RSA
104 |
105 |
106 |
107 |
108 |
109 | com.google.guava
110 | com.medata.google.guava
111 |
112 |
113 | com.google.common
114 | com.medata.google.common
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 | net.alchim31.maven
125 | scala-maven-plugin
126 | 3.3.1
127 |
128 |
129 | scala-compile-first
130 | process-resources
131 |
132 | add-source
133 | compile
134 |
135 |
136 |
137 | compile
138 |
139 | compile
140 | testCompile
141 |
142 |
143 |
144 |
145 | ${scala.binary.version}
146 |
147 |
148 |
149 |
150 |
151 |
--------------------------------------------------------------------------------
/parsex-core/src/main/java/com/sucx/core/AbstractSqlParse.java:
--------------------------------------------------------------------------------
1 | package com.sucx.core;
2 |
3 | import com.sucx.common.Constants;
4 | import com.sucx.common.enums.OperatorType;
5 | import com.sucx.common.exceptions.SqlParseException;
6 | import com.sucx.common.model.Result;
7 | import com.sucx.common.model.TableInfo;
8 | import com.sucx.common.util.Pair;
9 | import com.sucx.common.util.StringUtils;
10 | import org.apache.log4j.Logger;
11 | import scala.Tuple3;
12 |
13 | import java.util.*;
14 | import java.util.stream.Collectors;
15 |
16 | /**
17 | * desc:
18 | *
19 | * @author scx
20 | * @create 2020/02/29
21 | */
22 | public abstract class AbstractSqlParse implements SqlParse {
23 |
24 |
25 | private static Logger log = Logger.getLogger(AbstractSqlParse.class);
26 |
27 |
28 | protected final String columnSplit = ",";
29 | protected Map tableAliaMap;
30 | protected Stack> columnsStack;
31 | protected Stack limitStack;
32 |
33 | protected boolean hasJoin;
34 |
35 | protected String currentDb;
36 |
37 |
38 | protected HashSet splitColumn(Set columns, Map tableMap) {
39 | return (HashSet) columns.stream()
40 | .flatMap(column -> Arrays.stream(column.split(columnSplit)))
41 | .collect(Collectors.toSet())
42 | .stream()
43 | .map(column -> {
44 | if (column.contains(Constants.POINT)) {
45 | Pair pair = StringUtils.getPointPair(column);
46 | String aDefault = tableMap.getOrDefault(pair.getLeft(), pair.getLeft());
47 | return aDefault + Constants.POINT + pair.getRight();
48 | }
49 | return column;
50 | }).collect(Collectors.toSet());
51 | }
52 |
53 |
54 | protected HashSet getColumnsTop() {
55 | if (columnsStack.isEmpty()) {
56 | return new HashSet<>(0);
57 | }
58 | return columnsStack.pop();
59 | }
60 |
61 |
62 | protected String getLimitTop() {
63 | if (limitStack.isEmpty()) {
64 | return null;
65 | }
66 | return limitStack.pop();
67 | }
68 |
69 |
70 | protected TableInfo buildTableInfo(String name, String db, OperatorType type) {
71 | TableInfo info = new TableInfo(name, db, type, splitColumn(getColumnsTop(), tableAliaMap));
72 | info.setLimit(getLimitTop());
73 | return info;
74 | }
75 |
76 | protected TableInfo buildTableInfo(String dbAndTable, OperatorType type) {
77 | TableInfo info = new TableInfo(dbAndTable, type, currentDb, splitColumn(getColumnsTop(), tableAliaMap));
78 | info.setLimit(getLimitTop());
79 | return info;
80 | }
81 |
82 | /**
83 | * 替换sql注释
84 | *
85 | * @param sqlText sql
86 | * @return 替换后的sl
87 | */
88 | protected String replaceNotes(String sqlText) {
89 | StringBuilder newSql = new StringBuilder();
90 | String lineBreak = "\n";
91 | String empty = "";
92 | String trimLine;
93 | for (String line : sqlText.split(lineBreak)) {
94 | trimLine = line.trim();
95 | if (!trimLine.startsWith("--") && !trimLine.startsWith("download")) {
96 | //过滤掉行内注释
97 | line = line.replaceAll("/\\*.*\\*/", empty);
98 | if (org.apache.commons.lang3.StringUtils.isNotBlank(line)) {
99 | newSql.append(line).append(lineBreak);
100 | }
101 | }
102 | }
103 | return newSql.toString();
104 | }
105 |
106 |
107 | /**
108 | * ;分割多段sql
109 | *
110 | * @param sqlText sql
111 | * @return
112 | */
113 | protected ArrayList splitSql(String sqlText) {
114 | String[] sqlArray = sqlText.split(Constants.SEMICOLON);
115 | ArrayList newSqlArray = new ArrayList<>(sqlArray.length);
116 | String command = "";
117 | int arrayLen = sqlArray.length;
118 | String oneCmd;
119 | for (int i = 0; i < arrayLen; i++) {
120 | oneCmd = sqlArray[i];
121 | boolean keepSemicolon = (oneCmd.endsWith("'") && i + 1 < arrayLen && sqlArray[i + 1].startsWith("'"))
122 | || (oneCmd.endsWith("\"") && i + 1 < arrayLen && sqlArray[i + 1].startsWith("\""));
123 | if (oneCmd.endsWith("\\")) {
124 | command += org.apache.commons.lang.StringUtils.chop(oneCmd) + Constants.SEMICOLON;
125 | continue;
126 | } else if (keepSemicolon) {
127 | command += oneCmd + Constants.SEMICOLON;
128 | continue;
129 | } else {
130 | command += oneCmd;
131 | }
132 | if (org.apache.commons.lang3.StringUtils.isBlank(command)) {
133 | continue;
134 | }
135 | newSqlArray.add(command);
136 | command = "";
137 | }
138 | return newSqlArray;
139 | }
140 |
141 |
142 | @Override
143 | public Result parse(String sqlText) throws SqlParseException {
144 |
145 | ArrayList sqlArray = this.splitSql(this.replaceNotes(sqlText));
146 | HashSet inputTables = new HashSet<>();
147 | HashSet outputTables = new HashSet<>();
148 | HashSet tempTables = new HashSet<>();
149 |
150 | columnsStack = new Stack<>();
151 | tableAliaMap = new HashMap<>();
152 | limitStack = new Stack<>();
153 | currentDb = "default";
154 | hasJoin = false;
155 | for (String sql : sqlArray) {
156 | if (sql.charAt(sql.length() - 1) == ';') {
157 | sql = sql.substring(0, sql.length() - 1);
158 | }
159 | if (org.apache.commons.lang3.StringUtils.isBlank(sql)) {
160 | continue;
161 | }
162 | columnsStack.clear();
163 | limitStack.clear();
164 | Tuple3, HashSet, HashSet> subTuple = this.parseInternal(sql);
165 | inputTables.addAll(subTuple._1());
166 | outputTables.addAll(subTuple._2());
167 | tempTables.addAll(subTuple._3());
168 | }
169 |
170 | tempTables.forEach(table -> {
171 | Iterator iterator = inputTables.iterator();
172 | while (iterator.hasNext()) {
173 | TableInfo checkTable = iterator.next();
174 | if (checkTable.getName().equals(table.getName())) {
175 | iterator.remove();
176 | break;
177 | }
178 | }
179 | });
180 |
181 | return new Result(inputTables, outputTables, tempTables, hasJoin);
182 | }
183 |
184 | /**
185 | * 抽象解析
186 | *
187 | * @param sqlText sql
188 | * @return tuple4
189 | * @throws SqlParseException
190 | */
191 | protected abstract Tuple3, HashSet, HashSet> parseInternal(String sqlText) throws SqlParseException;
192 |
193 | protected void print(String plan) {
194 | log.info("************ignore plan******\n" + plan);
195 | }
196 | }
197 |
--------------------------------------------------------------------------------
/parsex-core/src/main/java/com/sucx/core/HiveSQLParse.java:
--------------------------------------------------------------------------------
1 | package com.sucx.core;
2 |
3 | import com.sucx.common.Constants;
4 | import com.sucx.common.enums.OperatorType;
5 | import com.sucx.common.exceptions.SqlParseException;
6 | import com.sucx.common.model.TableInfo;
7 | import org.apache.hadoop.hive.ql.lib.*;
8 | import org.apache.hadoop.hive.ql.parse.*;
9 | import scala.Tuple3;
10 |
11 | import java.util.*;
12 |
13 | /**
14 | * desc:
15 | *
16 | * @author scx
17 | * @create 2020/02/29
18 | */
19 | public class HiveSQLParse extends AbstractSqlParse implements NodeProcessor {
20 |
21 | /**
22 | * 临时输入表
23 | */
24 | private HashSet inputTableList;
25 |
26 | /**
27 | * 临时输出表
28 | */
29 | private HashSet outputTableList;
30 |
31 | /**
32 | * 临时表
33 | */
34 | private HashSet withTableList;
35 |
36 |
37 | @Override
38 | public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) {
39 | ASTNode ast = (ASTNode) nd;
40 | switch (ast.getToken().getType()) {
41 | //create语句
42 | case HiveParser.TOK_CREATETABLE: {
43 | String tableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast.getChild(0));
44 | outputTableList.add(new TableInfo(tableName, OperatorType.CREATE, currentDb, new HashSet<>()));
45 | break;
46 | }
47 | //insert语句
48 | case HiveParser.TOK_TAB: {
49 | String tableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast.getChild(0));
50 | outputTableList.add(new TableInfo(tableName, OperatorType.WRITE, currentDb, new HashSet<>()));
51 | break;
52 | }
53 | //from语句
54 | case HiveParser.TOK_TABREF: {
55 | ASTNode tabTree = (ASTNode) ast.getChild(0);
56 | String tableName = (tabTree.getChildCount() == 1) ? BaseSemanticAnalyzer.getUnescapedName((ASTNode) tabTree.getChild(0)) : BaseSemanticAnalyzer.getUnescapedName((ASTNode) tabTree.getChild(0)) + "." + tabTree.getChild(1);
57 | inputTableList.add(new TableInfo(tableName, OperatorType.READ, currentDb, new HashSet<>()));
58 | break;
59 | }
60 | // with.....语句
61 | case HiveParser.TOK_CTE: {
62 | for (int i = 0; i < ast.getChildCount(); i++) {
63 | ASTNode temp = (ASTNode) ast.getChild(i);
64 | String tableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) temp.getChild(1));
65 | withTableList.add(new TableInfo(tableName, OperatorType.READ, "temp", new HashSet<>()));
66 | }
67 | break;
68 | }
69 | //ALTER 语句
70 | case HiveParser.TOK_ALTERTABLE: {
71 | String tableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast.getChild(0));
72 | inputTableList.add(new TableInfo(tableName, OperatorType.ALTER, currentDb, new HashSet<>()));
73 | break;
74 | }
75 | case HiveParser.TOK_SWITCHDATABASE: {
76 | this.currentDb = BaseSemanticAnalyzer.unescapeIdentifier(ast.getChild(0).getText());
77 | break;
78 | }
79 | case HiveParser.TOK_CREATEDATABASE: {
80 | String dbName = BaseSemanticAnalyzer.unescapeIdentifier(ast.getChild(0).getText());
81 | inputTableList.add(new TableInfo(dbName, OperatorType.CREATE));
82 | break;
83 | }
84 | case HiveParser.TOK_DROPDATABASE: {
85 | String dbName = BaseSemanticAnalyzer.unescapeIdentifier(ast.getChild(0).getText());
86 | inputTableList.add(new TableInfo(dbName, OperatorType.DROP));
87 | break;
88 | }
89 | case HiveParser.TOK_ALTERDATABASE_OWNER:
90 | case HiveParser.TOK_ALTERDATABASE_PROPERTIES: {
91 | String dbName = BaseSemanticAnalyzer.unescapeIdentifier(ast.getChild(0).getText());
92 | inputTableList.add(new TableInfo(dbName, OperatorType.ALTER));
93 | break;
94 | }
95 | default: {
96 | return null;
97 | }
98 | }
99 | return null;
100 | }
101 |
102 | @Override
103 | protected String replaceNotes(String sqlText) {
104 | StringBuilder builder = new StringBuilder();
105 | String lineBreak = "\n";
106 | for (String line : sqlText.split(lineBreak)) {
107 | //udf 添加的去掉,目前无法解析,会抛异常
108 | if (line.toLowerCase().startsWith("add jar") || line.toLowerCase().startsWith("set")) {
109 | int splitIndex = line.indexOf(Constants.SEMICOLON);
110 | if (splitIndex != -1 && splitIndex + 1 != line.length()) {
111 | builder.append(line.substring(splitIndex + 1)).append(lineBreak);
112 | }
113 | } else {
114 | builder.append(line).append(lineBreak);
115 | }
116 | }
117 | return super.replaceNotes(builder.toString());
118 | }
119 |
120 | @Override
121 | protected Tuple3, HashSet, HashSet> parseInternal(String sqlText) throws SqlParseException {
122 | ParseDriver pd = new ParseDriver();
123 | ASTNode tree;
124 | try {
125 | tree = pd.parse(sqlText);
126 | } catch (ParseException e) {
127 | throw new SqlParseException(e);
128 | }
129 | while ((tree.getToken() == null) && (tree.getChildCount() > 0)) {
130 | tree = (ASTNode) tree.getChild(0);
131 | }
132 | inputTableList = new HashSet<>();
133 | outputTableList = new HashSet<>();
134 | withTableList = new HashSet<>();
135 | Map rules = new LinkedHashMap<>();
136 |
137 | GraphWalker ogw = new DefaultGraphWalker(new DefaultRuleDispatcher(this, rules, null));
138 |
139 | ArrayList topNodes = new ArrayList<>();
140 | topNodes.add(tree);
141 | try {
142 | ogw.startWalking(topNodes, null);
143 | } catch (SemanticException e) {
144 | throw new RuntimeException(e);
145 | }
146 | return new Tuple3<>(inputTableList, outputTableList, withTableList);
147 | }
148 | }
149 |
--------------------------------------------------------------------------------
/parsex-core/src/main/java/com/sucx/core/PrestoSqlParse.java:
--------------------------------------------------------------------------------
1 | package com.sucx.core;
2 |
3 | import com.facebook.presto.sql.parser.ParsingException;
4 | import com.facebook.presto.sql.parser.ParsingOptions;
5 | import com.facebook.presto.sql.parser.SqlParser;
6 | import com.facebook.presto.sql.tree.*;
7 | import com.sucx.common.Constants;
8 | import com.sucx.common.enums.OperatorType;
9 | import com.sucx.common.exceptions.SqlParseException;
10 | import com.sucx.common.model.TableInfo;
11 | import org.apache.log4j.Logger;
12 | import scala.Tuple3;
13 |
14 | import java.util.ArrayList;
15 | import java.util.Arrays;
16 | import java.util.HashSet;
17 | import java.util.List;
18 | import java.util.stream.Collectors;
19 |
20 | /**
21 | * 目前presto 仅仅是用来查询,目前还没解析输入表
22 | * desc: 解析presto sql的输入表、字段
23 | *
24 | * @author scx
25 | * @create 2020/03/09
26 | */
27 |
28 | public class PrestoSqlParse extends AbstractSqlParse {
29 |
30 | private static Logger log = Logger.getLogger(PrestoSqlParse.class);
31 |
32 | private HashSet inputTables;
33 | private HashSet outputTables;
34 | private HashSet tempTables;
35 |
36 | /**
37 | * select 字段表达式中获取字段
38 | *
39 | * @param expression
40 | * @return
41 | */
42 | private String getColumn(Expression expression) throws SqlParseException {
43 | if (expression instanceof IfExpression) {
44 | IfExpression ifExpression = (IfExpression) expression;
45 | List list = new ArrayList<>();
46 | list.add(ifExpression.getCondition());
47 | list.add(ifExpression.getTrueValue());
48 | ifExpression.getFalseValue().ifPresent(list::add);
49 | return getString(list);
50 | } else if (expression instanceof Identifier) {
51 | Identifier identifier = (Identifier) expression;
52 | return identifier.getValue();
53 | } else if (expression instanceof FunctionCall) {
54 | FunctionCall call = (FunctionCall) expression;
55 | StringBuilder columns = new StringBuilder();
56 | List arguments = call.getArguments();
57 | int size = arguments.size();
58 | for (int i = 0; i < size; i++) {
59 | Expression exp = arguments.get(i);
60 | if (i == 0) {
61 | columns.append(getColumn(exp));
62 | } else {
63 | columns.append(getColumn(exp)).append(columnSplit);
64 | }
65 | }
66 | return columns.toString();
67 | } else if (expression instanceof ComparisonExpression) {
68 | ComparisonExpression compare = (ComparisonExpression) expression;
69 | return getString(compare.getLeft(), compare.getRight());
70 | } else if (expression instanceof Literal || expression instanceof ArithmeticUnaryExpression) {
71 | return "";
72 | } else if (expression instanceof Cast) {
73 | Cast cast = (Cast) expression;
74 | return getColumn(cast.getExpression());
75 | } else if (expression instanceof DereferenceExpression) {
76 | DereferenceExpression reference = (DereferenceExpression) expression;
77 | return reference.toString();
78 | } else if (expression instanceof ArithmeticBinaryExpression) {
79 | ArithmeticBinaryExpression binaryExpression = (ArithmeticBinaryExpression) expression;
80 | return getString(binaryExpression.getLeft(), binaryExpression.getRight());
81 | } else if (expression instanceof SearchedCaseExpression) {
82 | SearchedCaseExpression caseExpression = (SearchedCaseExpression) expression;
83 | List exps = caseExpression.getWhenClauses().stream().map(whenClause -> (Expression) whenClause).collect(Collectors.toList());
84 | caseExpression.getDefaultValue().ifPresent(exps::add);
85 | return getString(exps);
86 | } else if (expression instanceof WhenClause) {
87 | WhenClause whenClause = (WhenClause) expression;
88 | return getString(whenClause.getOperand(), whenClause.getResult());
89 | } else if (expression instanceof LikePredicate) {
90 | LikePredicate likePredicate = (LikePredicate) expression;
91 | return likePredicate.getValue().toString();
92 | } else if (expression instanceof InPredicate) {
93 | InPredicate predicate = (InPredicate) expression;
94 | return predicate.getValue().toString();
95 | } else if (expression instanceof SubscriptExpression) {
96 | SubscriptExpression subscriptExpression = (SubscriptExpression) expression;
97 | return getColumn(subscriptExpression.getBase());
98 | } else if (expression instanceof LogicalBinaryExpression) {
99 | LogicalBinaryExpression logicExp = (LogicalBinaryExpression) expression;
100 | return getString(logicExp.getLeft(), logicExp.getRight());
101 | } else if (expression instanceof IsNullPredicate) {
102 | IsNullPredicate isNullExp = (IsNullPredicate) expression;
103 | return getColumn(isNullExp.getValue());
104 | } else if (expression instanceof IsNotNullPredicate) {
105 | IsNotNullPredicate notNull = (IsNotNullPredicate) expression;
106 | return getColumn(notNull.getValue());
107 | } else if (expression instanceof CoalesceExpression) {
108 | CoalesceExpression coalesce = (CoalesceExpression) expression;
109 | return getString(coalesce.getOperands());
110 | }
111 | throw new SqlParseException("无法识别的表达式:" + expression.getClass().getName());
112 | // return expression.toString();
113 | }
114 |
115 |
116 | private String getString(Expression... exps) throws SqlParseException {
117 | return getString(Arrays.stream(exps).collect(Collectors.toList()));
118 | }
119 |
120 | private String getString(List exps) throws SqlParseException {
121 | StringBuilder builder = new StringBuilder();
122 | for (Expression exp : exps) {
123 | builder.append(getColumn(exp)).append(columnSplit);
124 | }
125 | return builder.toString();
126 | }
127 |
128 |
129 | /**
130 | * node 节点的遍历
131 | *
132 | * @param node
133 | */
134 | private void checkNode(Node node) throws SqlParseException {
135 | if (node instanceof QuerySpecification) {
136 | QuerySpecification query = (QuerySpecification) node;
137 | query.getLimit().ifPresent(limit -> limitStack.push(limit));
138 | loopNode(query.getChildren());
139 | } else if (node instanceof TableSubquery) {
140 | loopNode(node.getChildren());
141 | } else if (node instanceof AliasedRelation) {
142 | AliasedRelation alias = (AliasedRelation) node;
143 | String value = alias.getAlias().getValue();
144 | if (alias.getChildren().size() == 1 && alias.getChildren().get(0) instanceof Table) {
145 | Table table = (Table) alias.getChildren().get(0);
146 | tableAliaMap.put(value, table.getName().toString());
147 | } else {
148 | tempTables.add(buildTableInfo(value, OperatorType.READ));
149 | }
150 | loopNode(node.getChildren());
151 | } else if (node instanceof Query || node instanceof SubqueryExpression
152 | || node instanceof Union || node instanceof With
153 | || node instanceof LogicalBinaryExpression || node instanceof InPredicate) {
154 | loopNode(node.getChildren());
155 |
156 | } else if (node instanceof Join) {
157 | hasJoin = true;
158 | loopNode(node.getChildren());
159 | }
160 | //基本都是where条件,过滤掉,如果需要,可以调用getColumn解析字段
161 | else if (node instanceof LikePredicate || node instanceof NotExpression
162 | || node instanceof IfExpression
163 | || node instanceof ComparisonExpression || node instanceof GroupBy
164 | || node instanceof OrderBy || node instanceof Identifier
165 | || node instanceof InListExpression || node instanceof DereferenceExpression
166 | || node instanceof IsNotNullPredicate || node instanceof IsNullPredicate
167 | || node instanceof FunctionCall) {
168 | print(node.getClass().getName());
169 |
170 | } else if (node instanceof WithQuery) {
171 | WithQuery withQuery = (WithQuery) node;
172 | tempTables.add(buildTableInfo(withQuery.getName().getValue(), OperatorType.READ));
173 | loopNode(withQuery.getChildren());
174 | } else if (node instanceof Table) {
175 | Table table = (Table) node;
176 | inputTables.add(buildTableInfo(table.getName().toString(), OperatorType.READ));
177 | loopNode(table.getChildren());
178 | } else if (node instanceof Select) {
179 | Select select = (Select) node;
180 | List selectItems = select.getSelectItems();
181 | HashSet columns = new HashSet<>();
182 | for (SelectItem item : selectItems) {
183 | if (item instanceof SingleColumn) {
184 | columns.add(getColumn(((SingleColumn) item).getExpression()));
185 | } else if (item instanceof AllColumns) {
186 | columns.add(item.toString());
187 | } else {
188 | throw new SqlParseException("unknow column type:" + item.getClass().getName());
189 | }
190 | }
191 | columnsStack.push(columns);
192 |
193 | } else {
194 | throw new SqlParseException("unknow node type:" + node.getClass().getName());
195 | }
196 | }
197 |
198 |
199 | private void loopNode(List extends Node> children) throws SqlParseException {
200 | for (Node node : children) {
201 | this.checkNode(node);
202 | }
203 | }
204 |
205 | /**
206 | * statement 过滤 只识别select 语句
207 | *
208 | * @param statement
209 | * @throws SqlParseException
210 | */
211 | private void check(Statement statement) throws SqlParseException {
212 | if (statement instanceof Query) {
213 | Query query = (Query) statement;
214 | List children = query.getChildren();
215 | for (Node child : children) {
216 | checkNode(child);
217 | }
218 | } else if (statement instanceof Use) {
219 | Use use = (Use) statement;
220 | this.currentDb = use.getSchema().getValue();
221 | } else if (statement instanceof ShowColumns) {
222 | ShowColumns show = (ShowColumns) statement;
223 | String allName = show.getTable().toString().replace("hive.", "");
224 | inputTables.add(buildTableInfo(allName, OperatorType.READ));
225 | } else if (statement instanceof ShowTables) {
226 | ShowTables show = (ShowTables) statement;
227 | QualifiedName qualifiedName = show.getSchema().orElseThrow(() -> new SqlParseException("unkonw table name or db name" + statement.toString()));
228 | String allName = qualifiedName.toString().replace("hive.", "");
229 | if (allName.contains(Constants.POINT)) {
230 | allName += Constants.POINT + "*";
231 | }
232 | inputTables.add(buildTableInfo(allName, OperatorType.READ));
233 |
234 | } else {
235 | throw new SqlParseException("sorry,only support read statement,unSupport statement:" + statement.getClass().getName());
236 | }
237 | }
238 |
239 |
240 | @Override
241 | protected Tuple3, HashSet, HashSet> parseInternal(String sqlText) throws SqlParseException {
242 | this.inputTables = new HashSet<>();
243 | this.outputTables = new HashSet<>();
244 | this.tempTables = new HashSet<>();
245 | try {
246 | check(new SqlParser().createStatement(sqlText, new ParsingOptions(ParsingOptions.DecimalLiteralTreatment.AS_DECIMAL)));
247 | } catch (ParsingException e) {
248 | throw new SqlParseException("parse sql exception:" + e.getMessage(), e);
249 | }
250 | return new Tuple3<>(inputTables, outputTables, tempTables);
251 | }
252 | }
253 |
--------------------------------------------------------------------------------
/parsex-core/src/main/java/com/sucx/core/SqlParse.java:
--------------------------------------------------------------------------------
1 | package com.sucx.core;
2 |
3 | import com.sucx.common.exceptions.SqlParseException;
4 | import com.sucx.common.model.Result;
5 |
6 | /**
7 | * desc:
8 | *
9 | * @author scx
10 | * @create 2020/02/29
11 | */
12 | public interface SqlParse {
13 |
14 |
15 | /**
16 | * 血缘解析入口
17 | *
18 | * @param sqlText sql
19 | * @return Result 结果
20 | */
21 | Result parse(String sqlText) throws SqlParseException;
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/parsex-core/src/main/java/com/sucx/core/SqlParseUtil.java:
--------------------------------------------------------------------------------
1 | package com.sucx.core;
2 |
3 | import com.sucx.common.enums.SqlEnum;
4 | import com.sucx.common.exceptions.SqlParseException;
5 | import com.sucx.common.model.Result;
6 | import com.sucx.common.model.TableInfo;
7 | import org.apache.commons.logging.Log;
8 | import org.apache.commons.logging.LogFactory;
9 | import scala.Tuple3;
10 |
11 | import java.util.HashSet;
12 | import java.util.Map;
13 | import java.util.Set;
14 | import java.util.concurrent.ConcurrentHashMap;
15 |
16 | /**
17 | * desc:
18 | *
19 | * @author scx
20 | * @create 2020/02/26
21 | */
22 | public class SqlParseUtil {
23 |
24 | private static final Log LOG = LogFactory.getLog(SqlParseUtil.class);
25 |
26 |
27 | private static Map sqlParseMap = new ConcurrentHashMap<>(3);
28 |
29 |
30 | public static Result parsePrestoSql(String sqlText) throws SqlParseException {
31 | return parse(SqlEnum.PRESTO, sqlText);
32 | }
33 |
34 | public static Result parseHiveSql(String sqlText) throws SqlParseException {
35 | return parse(SqlEnum.HIVE, sqlText);
36 | }
37 |
38 | public static Result parseSparkSql(String sqlText) throws SqlParseException {
39 | return parse(SqlEnum.SPARK, sqlText);
40 | }
41 |
42 | /**
43 | * 解析sql入口
44 | *
45 | * @param sqlEnum sql类型
46 | * @param sqlText sql内容
47 | * @return Result
48 | * @throws SqlParseException 解析异常
49 | */
50 | private static Result parse(SqlEnum sqlEnum, String sqlText) throws SqlParseException {
51 | Result result;
52 | switch (sqlEnum) {
53 | case SPARK:
54 | try {
55 | result = getSqlParse(sqlEnum).parse(sqlText);
56 | } catch (Exception e) {
57 | LOG.error("spark引擎解析异常,准备使用hive引擎解析:" + sqlText);
58 | try {
59 | result = getSqlParse(SqlEnum.HIVE).parse(sqlText);
60 | } catch (Exception e1) {
61 | throw new SqlParseException(e);
62 | }
63 | }
64 | return result;
65 | case HIVE:
66 | try {
67 | result = getSqlParse(sqlEnum).parse(sqlText);
68 | } catch (Exception e) {
69 | LOG.error("hive引擎解析异常,准备使用spark引擎解析:" + sqlText);
70 | try {
71 | result = getSqlParse(SqlEnum.SPARK).parse(sqlText);
72 | } catch (Exception e1) {
73 | throw new SqlParseException(e);
74 | }
75 | }
76 | return result;
77 |
78 | case PRESTO:
79 | result = getSqlParse(sqlEnum).parse(sqlText);
80 | return result;
81 | default:
82 | throw new IllegalArgumentException("not support sqlEnum type :" + sqlEnum.name());
83 |
84 | }
85 | }
86 |
87 |
88 | private static SqlParse getSqlParse(SqlEnum sqlEnum) {
89 | SqlParse sqlParse = sqlParseMap.get(sqlEnum);
90 | if (sqlParse == null) {
91 | synchronized (SqlParseUtil.class) {
92 | sqlParse = sqlParseMap.get(sqlEnum);
93 | if (sqlParse == null) {
94 | switch (sqlEnum) {
95 | case PRESTO:
96 | sqlParse = new PrestoSqlParse();
97 | break;
98 | case SPARK:
99 | sqlParse = new SparkSQLParse();
100 | break;
101 | case HIVE:
102 | sqlParse = new HiveSQLParse();
103 | break;
104 | default:
105 | throw new IllegalArgumentException("not support sqlEnum type :" + sqlEnum.name());
106 |
107 | }
108 | sqlParseMap.put(sqlEnum, sqlParse);
109 | }
110 | }
111 | }
112 | return sqlParse;
113 | }
114 |
115 | public static void print(Tuple3, HashSet, HashSet> tuple3) {
116 | if (tuple3 == null) {
117 | return;
118 | }
119 | print(tuple3._2(), tuple3._2(), tuple3._3(), false);
120 | }
121 |
122 | public static void print(Result result) {
123 | if (result == null) {
124 | return;
125 | }
126 | print(result.getInputSets(), result.getOutputSets(), result.getTempSets(), result.isJoin());
127 |
128 | }
129 |
130 | private static void print(Set inputTable, Set outputTable, Set tempTable, boolean join) {
131 |
132 | LOG.info("是否包含join:" + join);
133 | LOG.info("输入表有:");
134 | for (TableInfo table : inputTable) {
135 | LOG.info(table);
136 | }
137 |
138 | LOG.info("输出表有:");
139 |
140 | for (TableInfo table : outputTable) {
141 | LOG.info(table);
142 | }
143 |
144 | LOG.info("临时表:");
145 |
146 | for (TableInfo table : tempTable) {
147 | LOG.info(table);
148 | }
149 |
150 | }
151 |
152 | }
153 |
--------------------------------------------------------------------------------
/parsex-core/src/main/resources/log4j2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | %d{YYYY-MM-dd HH:mm:ss} [%t] %-5p %c{1}:%L - %msg%n
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/parsex-core/src/main/scala/com/sucx/core/SparkSQLParse.scala:
--------------------------------------------------------------------------------
1 | package com.sucx.core
2 |
3 | import java.util.{HashSet => JSet}
4 |
5 | import com.sucx.common.enums.OperatorType
6 | import com.sucx.common.model.TableInfo
7 | import org.apache.spark.sql.catalyst.TableIdentifier
8 | import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
9 | import org.apache.spark.sql.catalyst.catalog.UnresolvedCatalogRelation
10 | import org.apache.spark.sql.catalyst.plans.logical._
11 | import org.apache.spark.sql.execution.SparkSqlParser
12 | import org.apache.spark.sql.execution.command._
13 | import org.apache.spark.sql.execution.datasources.{CreateTable, RefreshTable}
14 | import org.apache.spark.sql.internal.SQLConf
15 |
16 | import scala.collection.JavaConversions._
17 |
18 | class SparkSQLParse extends AbstractSqlParse {
19 |
20 |
21 | private[this] def resolveLogicPlan(plan: LogicalPlan) = {
22 | val inputTables = new JSet[TableInfo]()
23 | val outputTables = new JSet[TableInfo]()
24 | val tmpTables = new JSet[TableInfo]()
25 | resolveLogic(plan, inputTables, outputTables, tmpTables)
26 | Tuple3(inputTables, outputTables, tmpTables)
27 | }
28 |
29 |
30 | /* def getColumnAuto(exps: Expression*): String = {
31 | getColumn(exps)
32 | }
33 |
34 | def getColumn(expSeq: Seq[Expression]): String = {
35 | val columns = new StringBuilder
36 | expSeq.foreach(exp => {
37 | columns.append(resolveExp(exp)).append(",")
38 | })
39 | columns.toString()
40 | }
41 |
42 | private[this] def resolveExp(exp: Expression): String = {
43 | val column = ""
44 | exp match {
45 | case alias: Alias =>
46 | return resolveExp(alias.child)
47 | case divide: Divide =>
48 | return getColumnAuto(divide.left, divide.right)
49 | case cast: Cast =>
50 | return resolveExp(cast.child)
51 | case unresolvedFun: UnresolvedFunction =>
52 | return getColumn(unresolvedFun.children)
53 | case unresolvedAttribute: UnresolvedAttribute =>
54 | return unresolvedAttribute.name
55 | case literal: Literal =>
56 | print(literal.sql)
57 | case caseWhen: CaseWhen =>
58 | return getColumn(caseWhen.children)
59 | case in: In =>
60 | return getColumn(in.children)
61 | case equalTo: EqualTo =>
62 | return getColumnAuto(equalTo.left, equalTo.right)
63 | case unresolvedAlias: UnresolvedAlias =>
64 | return getColumnAuto(unresolvedAlias.child)
65 | case unresolvedStar: UnresolvedStar =>
66 | return unresolvedStar.toString()
67 | case multiAlias: MultiAlias =>
68 | return resolveExp(multiAlias.child)
69 |
70 | case _ =>
71 | throw new SqlParseException("无法识别的exp:" + exp.getClass.getName)
72 | }
73 | column
74 | }*/
75 |
76 |
77 | private[this] def resolveLogic(plan: LogicalPlan, inputTables: JSet[TableInfo], outputTables: JSet[TableInfo], tmpTables: JSet[TableInfo]): Unit = {
78 | plan match {
79 |
80 | case plan: Project =>
81 | val project: Project = plan.asInstanceOf[Project]
82 | /* val columnsSet = new JSet[String]()
83 | project.projectList.foreach(exp => {
84 | columnsSet.add(resolveExp(exp))
85 | })
86 |
87 | columnsStack.push(columnsSet)*/
88 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
89 | case plan: Union =>
90 | val project: Union = plan.asInstanceOf[Union]
91 | for (child <- project.children) {
92 | resolveLogic(child, inputTables, outputTables, tmpTables)
93 | }
94 | case plan: Join =>
95 | val project: Join = plan.asInstanceOf[Join]
96 | resolveLogic(project.left, inputTables, outputTables, tmpTables)
97 | resolveLogic(project.right, inputTables, outputTables, tmpTables)
98 |
99 | case plan: Aggregate =>
100 | val project: Aggregate = plan.asInstanceOf[Aggregate]
101 | /* val columnsSet = new JSet[String]()
102 | project.aggregateExpressions.foreach(exp => {
103 | columnsSet.add(resolveExp(exp))
104 | })
105 | columnsStack.push(columnsSet)*/
106 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
107 |
108 | case plan: Filter =>
109 | val project: Filter = plan.asInstanceOf[Filter]
110 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
111 |
112 | case plan: Generate =>
113 | val project: Generate = plan.asInstanceOf[Generate]
114 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
115 |
116 | case plan: RepartitionByExpression =>
117 | val project: RepartitionByExpression = plan.asInstanceOf[RepartitionByExpression]
118 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
119 |
120 | case plan: SerializeFromObject =>
121 | val project: SerializeFromObject = plan.asInstanceOf[SerializeFromObject]
122 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
123 |
124 | case plan: MapPartitions =>
125 | val project: MapPartitions = plan.asInstanceOf[MapPartitions]
126 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
127 |
128 | case plan: DeserializeToObject =>
129 | val project: DeserializeToObject = plan.asInstanceOf[DeserializeToObject]
130 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
131 |
132 | case plan: Repartition =>
133 | val project: Repartition = plan.asInstanceOf[Repartition]
134 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
135 |
136 | case plan: Deduplicate =>
137 | val project: Deduplicate = plan.asInstanceOf[Deduplicate]
138 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
139 |
140 | case plan: Window =>
141 | val project: Window = plan.asInstanceOf[Window]
142 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
143 |
144 | case plan: MapElements =>
145 | val project: MapElements = plan.asInstanceOf[MapElements]
146 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
147 |
148 | case plan: TypedFilter =>
149 | val project: TypedFilter = plan.asInstanceOf[TypedFilter]
150 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
151 |
152 | case plan: Distinct =>
153 | val project: Distinct = plan.asInstanceOf[Distinct]
154 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
155 |
156 | case plan: SubqueryAlias =>
157 | val project: SubqueryAlias = plan.asInstanceOf[SubqueryAlias]
158 | val childInputTables = new JSet[TableInfo]()
159 | val childOutputTables = new JSet[TableInfo]()
160 |
161 | resolveLogic(project.child, childInputTables, childOutputTables, tmpTables)
162 | if (childInputTables.size() > 1) {
163 | for (table <- childInputTables) inputTables.add(table)
164 | } else if (childInputTables.size() == 1) {
165 | val tableInfo: TableInfo = childInputTables.iterator().next()
166 | tableAliaMap.put(project.alias, tableInfo.getDbName + "." + tableInfo.getName)
167 | inputTables.add(new TableInfo(tableInfo.getName, tableInfo.getDbName, tableInfo.getType, splitColumn(tableInfo.getColumns, tableAliaMap)))
168 | }
169 | tmpTables.add(buildTableInfo(project.alias, this.currentDb, OperatorType.READ))
170 |
171 | case plan: UnresolvedCatalogRelation =>
172 | val project: UnresolvedCatalogRelation = plan.asInstanceOf[UnresolvedCatalogRelation]
173 | val identifier: TableIdentifier = project.tableMeta.identifier
174 | inputTables.add(buildTableInfo(identifier.table, identifier.database.getOrElse(this.currentDb), OperatorType.READ))
175 |
176 | case plan: UnresolvedRelation =>
177 | val project: UnresolvedRelation = plan.asInstanceOf[UnresolvedRelation]
178 | inputTables.add(buildTableInfo(project.tableIdentifier.table, project.tableIdentifier.database.getOrElse(this.currentDb), OperatorType.READ))
179 |
180 | case plan: InsertIntoTable =>
181 | val project: InsertIntoTable = plan.asInstanceOf[InsertIntoTable]
182 | plan.table match {
183 | case relation: UnresolvedRelation =>
184 | val table: TableIdentifier = relation.tableIdentifier
185 | outputTables.add(buildTableInfo(table.table, table.database.getOrElse(this.currentDb), OperatorType.WRITE))
186 | case _ =>
187 | throw new RuntimeException("无法解析的插入逻辑语法树:" + plan.table)
188 | }
189 |
190 | resolveLogic(project.query, inputTables, outputTables, tmpTables)
191 |
192 | case plan: CreateTable =>
193 | val project: CreateTable = plan.asInstanceOf[CreateTable]
194 | if (project.query.isDefined) {
195 | resolveLogic(project.query.get, inputTables, outputTables, tmpTables)
196 | }
197 | val columnsSet = new JSet[String]()
198 | project.tableDesc.schema.fields.foreach(field => {
199 | columnsSet.add(field.name)
200 | })
201 | columnsStack.push(columnsSet)
202 | val tableIdentifier: TableIdentifier = project.tableDesc.identifier
203 | outputTables.add(buildTableInfo(tableIdentifier.table, tableIdentifier.database.getOrElse(this.currentDb), OperatorType.CREATE))
204 |
205 | case plan: GlobalLimit =>
206 | val project: GlobalLimit = plan.asInstanceOf[GlobalLimit]
207 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
208 |
209 | case plan: LocalLimit =>
210 | val project: LocalLimit = plan.asInstanceOf[LocalLimit]
211 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
212 |
213 | case plan: With =>
214 | val project: With = plan.asInstanceOf[With]
215 | project.cteRelations.foreach(cte => {
216 | tmpTables.add(buildTableInfo(cte._1, "temp", OperatorType.READ))
217 | resolveLogic(cte._2, inputTables, outputTables, tmpTables)
218 | })
219 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
220 |
221 | case plan: Sort =>
222 | val project: Sort = plan.asInstanceOf[Sort]
223 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
224 |
225 | case ignore: SetCommand =>
226 | print(ignore.toString())
227 |
228 | case ignore: AddJarCommand =>
229 | print(ignore.toString())
230 | case ignore: CreateFunctionCommand =>
231 | print(ignore.toString())
232 |
233 | case ignore: SetDatabaseCommand =>
234 | print(ignore.toString())
235 |
236 | case ignore: OneRowRelation =>
237 | print(ignore.toString())
238 |
239 | case ignore: DropFunctionCommand =>
240 | print(ignore.toString())
241 | case plan: AlterTableAddPartitionCommand =>
242 | val project: AlterTableAddPartitionCommand = plan.asInstanceOf[AlterTableAddPartitionCommand]
243 |
244 | outputTables.add(buildTableInfo(project.tableName.table, project.tableName.database.getOrElse(this.currentDb), OperatorType.ALTER))
245 |
246 | case plan: AlterTableDropPartitionCommand =>
247 | val project: AlterTableDropPartitionCommand = plan.asInstanceOf[AlterTableDropPartitionCommand]
248 | outputTables.add(buildTableInfo(project.tableName.table, project.tableName.database.getOrElse(this.currentDb), OperatorType.ALTER))
249 |
250 | case plan: AlterTableAddColumnsCommand =>
251 | val project: AlterTableAddColumnsCommand = plan.asInstanceOf[AlterTableAddColumnsCommand]
252 | outputTables.add(buildTableInfo(project.table.table, project.table.database.getOrElse(this.currentDb), OperatorType.ALTER))
253 |
254 |
255 | case plan: CreateTableLikeCommand =>
256 | val project: CreateTableLikeCommand = plan.asInstanceOf[CreateTableLikeCommand]
257 | inputTables.add(buildTableInfo(project.sourceTable.table, project.sourceTable.database.getOrElse(this.currentDb), OperatorType.READ))
258 | outputTables.add(buildTableInfo(project.targetTable.table, project.targetTable.database.getOrElse(this.currentDb), OperatorType.CREATE))
259 |
260 |
261 | case plan: DropTableCommand =>
262 | val project: DropTableCommand = plan.asInstanceOf[DropTableCommand]
263 | outputTables.add(buildTableInfo(project.tableName.table, project.tableName.database.getOrElse(this.currentDb), OperatorType.DROP))
264 |
265 |
266 | case plan: AlterTableRecoverPartitionsCommand =>
267 | val project: AlterTableRecoverPartitionsCommand = plan.asInstanceOf[AlterTableRecoverPartitionsCommand]
268 | outputTables.add(buildTableInfo(project.tableName.table, project.tableName.database.getOrElse(this.currentDb), OperatorType.ALTER))
269 | case plan: GroupingSets =>
270 | val project: GroupingSets = plan.asInstanceOf[GroupingSets]
271 | resolveLogic(project.child, inputTables, outputTables, tmpTables)
272 |
273 | case plan: CreateDatabaseCommand =>
274 | val project: CreateDatabaseCommand = plan.asInstanceOf[CreateDatabaseCommand]
275 | inputTables.add(new TableInfo(project.databaseName, OperatorType.CREATE))
276 |
277 | case plan: DropDatabaseCommand =>
278 | val project: DropDatabaseCommand = plan.asInstanceOf[DropDatabaseCommand]
279 | inputTables.add(new TableInfo(project.databaseName, OperatorType.DROP))
280 |
281 | case plan: AlterDatabasePropertiesCommand =>
282 | val project: AlterDatabasePropertiesCommand = plan.asInstanceOf[AlterDatabasePropertiesCommand]
283 | inputTables.add(new TableInfo(project.databaseName, OperatorType.ALTER))
284 |
285 | case plan: ShowCreateTableCommand =>
286 | val project: ShowCreateTableCommand = plan.asInstanceOf[ShowCreateTableCommand]
287 | outputTables.add(buildTableInfo(project.table.table, project.table.database.getOrElse(this.currentDb), OperatorType.READ))
288 |
289 | case plan: RefreshTable =>
290 | val project: RefreshTable = plan.asInstanceOf[RefreshTable]
291 | inputTables.add(buildTableInfo(project.tableIdent.table, project.tableIdent.database.getOrElse(this.currentDb), OperatorType.READ))
292 |
293 | case `plan` => {
294 | throw new RuntimeException("******child plan******:\n" + plan.getClass.getName + "\n" + plan)
295 | }
296 | }
297 | }
298 |
299 | override protected def parseInternal(sqlText: String): (JSet[TableInfo], JSet[TableInfo], JSet[TableInfo]) = {
300 | val parser = new SparkSqlParser(new SQLConf)
301 |
302 | val logicalPlan: LogicalPlan = parser.parsePlan(sqlText)
303 | logicalPlan match {
304 | case command: SetDatabaseCommand =>
305 | this.currentDb = command.databaseName
306 | return Tuple3(new JSet[TableInfo](0), new JSet[TableInfo](0), new JSet[TableInfo](0))
307 | case _ =>
308 | }
309 | this.resolveLogicPlan(logicalPlan)
310 | }
311 | }
312 |
--------------------------------------------------------------------------------
/parsex-core/src/test/java/com/sucx/core/parse/presto/SqlBaseParserTest.java:
--------------------------------------------------------------------------------
1 | package com.sucx.core.parse.presto;
2 |
3 | import org.junit.Test;
4 |
5 | public class SqlBaseParserTest {
6 |
7 |
8 | @Test
9 | public void test1() {
10 |
11 |
12 |
13 | }
14 |
15 | }
16 |
--------------------------------------------------------------------------------
/parsex-core/src/test/java/org/apache/spark/sql/catalyst/expressions/parse.java:
--------------------------------------------------------------------------------
1 | package org.apache.spark.sql.catalyst.expressions;
2 |
3 | import com.sucx.core.SparkSQLParse;
4 | import com.sucx.core.SqlParse;
5 | import com.sucx.common.exceptions.SqlParseException;
6 | import com.sucx.common.model.Result;
7 | import com.sucx.core.SqlParseUtil;
8 | import org.apache.hadoop.conf.Configuration;
9 | import org.apache.hadoop.fs.FileSystem;
10 | import org.apache.hadoop.fs.Path;
11 | import org.apache.hadoop.io.IOUtils;
12 | import org.junit.Assert;
13 | import org.junit.Test;
14 |
15 | import java.io.BufferedReader;
16 | import java.io.IOException;
17 | import java.io.InputStreamReader;
18 | import java.util.Arrays;
19 |
20 | /**
21 | * desc:
22 | *
23 | * @author scx
24 | * @create 2020/02/24
25 | */
26 | public class parse {
27 |
28 | String sql = "";
29 |
30 | @Test
31 | public void parse() throws SqlParseException {
32 |
33 | SqlParse sqlParse = new SparkSQLParse();
34 |
35 | Result tuple3 = sqlParse.parse(sql);
36 |
37 | SqlParseUtil.print(tuple3);
38 |
39 | }
40 |
41 |
42 | @Test
43 | public void splitSql() throws IOException {
44 | // deal(sql);
45 |
46 | readFile("/Users/scx/Desktop/test.hive");
47 | }
48 |
49 | public void readFile(String fileName) throws IOException {
50 | Configuration conf = new Configuration();
51 | Path path = new Path(fileName);
52 | FileSystem fs;
53 | if (!path.toUri().isAbsolute()) {
54 | fs = FileSystem.getLocal(conf);
55 | path = fs.makeQualified(path);
56 | } else {
57 | fs = FileSystem.get(path.toUri(), conf);
58 | }
59 | BufferedReader bufferReader = null;
60 | int rc = 0;
61 | try {
62 | bufferReader = new BufferedReader(new InputStreamReader(fs.open(path)));
63 | processReader(bufferReader);
64 | } finally {
65 | IOUtils.closeStream(bufferReader);
66 | }
67 | }
68 |
69 | public void processReader(BufferedReader r) throws IOException {
70 | String line;
71 | StringBuilder qsb = new StringBuilder();
72 |
73 | while ((line = r.readLine()) != null) {
74 | // Skipping through comments
75 | if (!line.startsWith("--")) {
76 | qsb.append(line + "\n");
77 | }
78 | }
79 | deal(qsb.toString());
80 | }
81 |
82 | public void deal(String line) {
83 |
84 | String command = "";
85 | for (String oneCmd : line.split(";")) {
86 | if (org.apache.commons.lang.StringUtils.endsWith(oneCmd, "\\")) {
87 | command += org.apache.commons.lang.StringUtils.chop(oneCmd) + ";";
88 | continue;
89 | } else {
90 | command += oneCmd;
91 | }
92 | if (org.apache.commons.lang.StringUtils.isBlank(command)) {
93 | continue;
94 | }
95 | System.out.println(command);
96 | System.out.println("=======");
97 | command = "";
98 | }
99 | }
100 |
101 |
102 | @Test
103 | public void split() {
104 | String sql = "abc; abcd;\n absc;\tabcde ';'; abcde\';\";";
105 |
106 | String[] split = sql.split("[\\s]*(?!'|\");(?!'|\")[\\s]*");
107 | for (String s : split) {
108 | System.out.println(s);
109 | }
110 | Assert.assertEquals(5, split.length);
111 |
112 | sql = "--我是注释 \n -------注释来了 \n -- 我也是注释\n select * from table 1 \n 哈哈--我的注释在后面";
113 |
114 |
115 | split = sql.split("\n");
116 |
117 | Arrays.stream(split).forEach(s -> {
118 |
119 | // System.out.println(s);
120 | s = s.replaceAll("\\s*-+.*", "");
121 | System.out.println(s);
122 |
123 |
124 | });
125 |
126 |
127 | }
128 |
129 | @Test
130 | public void lineSplit() {
131 |
132 | String regex = "\\s*-+.*\n$";
133 | String blank = "";
134 | Assert.assertEquals("--\n".replaceAll(regex, blank), blank);
135 | Assert.assertEquals("abc--\n".replaceAll(regex, blank), "abc");
136 | Assert.assertEquals("---注释\n".replaceAll(regex, blank), blank);
137 | Assert.assertEquals(" \t -----注释\n".replaceAll(regex, blank), blank);
138 |
139 |
140 | Assert.assertEquals("SELECT /*+ REPARTITION(1) */ md5".replaceAll("/\\*.*\\*/", blank), "SELECT md5");
141 | Assert.assertEquals("SELECT /****/ md5".replaceAll("/\\*.*\\*/", blank), "SELECT md5");
142 | Assert.assertEquals("SELECT /**/ md5".replaceAll("/\\*.*\\*/", blank), "SELECT md5");
143 | System.out.println("; \n".matches(";[ ]*\n+"));
144 |
145 | }
146 |
147 |
148 | @Test
149 | public void prestoTest() {
150 |
151 |
152 | }
153 |
154 | }
155 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 | pom
6 |
7 | parsex-client
8 | parsex-core
9 | parsex-common
10 |
11 |
12 | org.springframework.boot
13 | spring-boot-starter-parent
14 | 2.2.4.RELEASE
15 |
16 |
17 | com.sucx.bigdata
18 | parsex
19 | 0.0.1-SNAPSHOT
20 | parsex
21 | Demo project for Spring Boot
22 |
23 |
24 | 1.8
25 | 4.8
26 | 4.11
27 | 1.7.16
28 | 1.2.17
29 | 2.4.0
30 | 2.11
31 |
32 |
33 |
34 |
35 |
36 |
37 | junit
38 | junit
39 | ${test.version}
40 | test
41 |
42 |
43 |
44 | com.google.guava
45 | guava
46 | 26.0-jre
47 |
48 |
49 | com.facebook.presto
50 | presto-parser
51 | 0.215
52 |
53 |
54 | org.jooq
55 | joor-java-8
56 | 0.9.12
57 |
58 |
59 | org.antlr
60 | antlr4
61 | ${antlr4.version}
62 |
63 |
64 | org.apache.hive
65 | hive-exec
66 | 2.3.4
67 |
68 |
69 |
70 | org.springframework.boot
71 | spring-boot-starter-web
72 |
73 |
74 |
75 | com.alibaba
76 | fastjson
77 | 1.2.70
78 |
79 |
80 |
81 | org.springframework.boot
82 | spring-boot-starter-test
83 | test
84 |
85 |
86 | org.junit.vintage
87 | junit-vintage-engine
88 |
89 |
90 |
91 |
92 | org.apache.httpcomponents
93 | httpclient
94 | 4.5.3
95 |
96 |
97 |
98 | org.slf4j
99 | slf4j-api
100 | ${slf4j.version}
101 | ${hadoop.deps.scope}
102 |
103 |
104 | org.slf4j
105 | slf4j-log4j12
106 | ${slf4j.version}
107 | ${hadoop.deps.scope}
108 |
109 |
110 | org.json4s
111 | json4s-jackson_${scala.binary.version}
112 | 3.5.3
113 |
114 |
115 | com.fasterxml.jackson.core
116 | *
117 |
118 |
119 |
120 |
121 | org.apache.spark
122 | spark-core_${scala.binary.version}
123 | ${spark.version}
124 |
125 |
126 |
127 | org.apache.spark
128 | spark-sql_${scala.binary.version}
129 | ${spark.version}
130 |
131 |
132 |
133 | org.apache.spark
134 | spark-catalyst_${scala.binary.version}
135 | ${spark.version}
136 |
137 |
138 |
139 | org.slf4j
140 | jul-to-slf4j
141 | ${slf4j.version}
142 |
143 |
144 | org.slf4j
145 | jcl-over-slf4j
146 | ${slf4j.version}
147 |
148 |
149 |
150 | log4j
151 | log4j
152 | ${log4j.version}
153 |
154 |
155 | org.scala-lang.modules
156 | scala-parser-combinators_${scala.binary.version}
157 | 1.1.0
158 |
159 |
160 | org.scala-lang
161 | scala-library
162 | 2.11.8
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | 主要是一个sql解析的小工具
2 | 可以搭配[分布式任务调度系统Hera](https://github.com/scxwhite/hera)或者其它调度使用。解析hive、spark sql的输入、输出表。达到自动依赖任务的目的
3 | 直接使用[SqlParseUtil](https://github.com/scxwhite/parseX/blob/master/parsex-core/src/main/java/com/sucx/core/SqlParseUtil.java) 类中的静态方法调用
4 |
--------------------------------------------------------------------------------