├── .gitignore ├── .mvn └── wrapper │ ├── MavenWrapperDownloader.java │ └── maven-wrapper.properties ├── mvnw ├── mvnw.cmd ├── parsex-client ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── sucx │ │ ├── App.java │ │ ├── controller │ │ └── HelloController.java │ │ └── util │ │ └── HttpUtils.java │ └── test │ └── java │ └── com │ └── sucx │ ├── AppTest.java │ └── util │ ├── HeraTest.java │ └── PrestoHttpTest.java ├── parsex-common ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── sucx │ └── common │ ├── Constants.java │ ├── enums │ ├── OperatorType.java │ └── SqlEnum.java │ ├── exceptions │ └── SqlParseException.java │ ├── model │ ├── Result.java │ └── TableInfo.java │ └── util │ ├── Pair.java │ └── StringUtils.java ├── parsex-core ├── pom.xml └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── sucx │ │ │ └── core │ │ │ ├── AbstractSqlParse.java │ │ │ ├── HiveSQLParse.java │ │ │ ├── PrestoSqlParse.java │ │ │ ├── SqlParse.java │ │ │ └── SqlParseUtil.java │ ├── resources │ │ └── log4j2.xml │ └── scala │ │ └── com │ │ └── sucx │ │ └── core │ │ └── SparkSQLParse.scala │ └── test │ └── java │ ├── com │ └── sucx │ │ └── core │ │ └── parse │ │ └── presto │ │ └── SqlBaseParserTest.java │ └── org │ └── apache │ └── spark │ └── sql │ └── catalyst │ └── expressions │ └── parse.java ├── pom.xml └── readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | HELP.md 2 | target/ 3 | !.mvn/wrapper/maven-wrapper.jar 4 | !**/src/main/** 5 | !**/src/test/** 6 | 7 | 8 | *SqlParseTest* 9 | ### STS ### 10 | .apt_generated 11 | .classpath 12 | .factorypath 13 | .project 14 | .settings 15 | .springBeans 16 | .sts4-cache 17 | 18 | ### IntelliJ IDEA ### 19 | .idea 20 | *.iws 21 | *.iml 22 | *.ipr 23 | 24 | ### NetBeans ### 25 | /nbproject/private/ 26 | /nbbuild/ 27 | /dist/ 28 | /nbdist/ 29 | /.nb-gradle/ 30 | build/ 31 | 32 | ### VS Code ### 33 | .vscode/ 34 | ### Java template 35 | # Compiled class file 36 | *.class 37 | 38 | # Log file 39 | *.log 40 | 41 | # BlueJ files 42 | *.ctxt 43 | 44 | # Mobile Tools for Java (J2ME) 45 | .mtj.tmp/ 46 | 47 | # Package Files # 48 | *.jar 49 | *.war 50 | *.nar 51 | *.ear 52 | *.zip 53 | *.tar.gz 54 | *.rar 55 | 56 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 57 | hs_err_pid* 58 | 59 | -------------------------------------------------------------------------------- /.mvn/wrapper/MavenWrapperDownloader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2007-present the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * https://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import java.net.*; 18 | import java.io.*; 19 | import java.nio.channels.*; 20 | import java.util.Properties; 21 | 22 | public class MavenWrapperDownloader { 23 | 24 | private static final String WRAPPER_VERSION = "0.5.6"; 25 | /** 26 | * Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided. 27 | */ 28 | private static final String DEFAULT_DOWNLOAD_URL = "https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/" 29 | + WRAPPER_VERSION + "/maven-wrapper-" + WRAPPER_VERSION + ".jar"; 30 | 31 | /** 32 | * Path to the maven-wrapper.properties file, which might contain a downloadUrl property to 33 | * use instead of the default one. 34 | */ 35 | private static final String MAVEN_WRAPPER_PROPERTIES_PATH = 36 | ".mvn/wrapper/maven-wrapper.properties"; 37 | 38 | /** 39 | * Path where the maven-wrapper.jar will be saved to. 40 | */ 41 | private static final String MAVEN_WRAPPER_JAR_PATH = 42 | ".mvn/wrapper/maven-wrapper.jar"; 43 | 44 | /** 45 | * Name of the property which should be used to override the default download url for the wrapper. 46 | */ 47 | private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl"; 48 | 49 | public static void main(String args[]) { 50 | System.out.println("- Downloader started"); 51 | File baseDirectory = new File(args[0]); 52 | System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath()); 53 | 54 | // If the maven-wrapper.properties exists, read it and check if it contains a custom 55 | // wrapperUrl parameter. 56 | File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH); 57 | String url = DEFAULT_DOWNLOAD_URL; 58 | if (mavenWrapperPropertyFile.exists()) { 59 | FileInputStream mavenWrapperPropertyFileInputStream = null; 60 | try { 61 | mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile); 62 | Properties mavenWrapperProperties = new Properties(); 63 | mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream); 64 | url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url); 65 | } catch (IOException e) { 66 | System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'"); 67 | } finally { 68 | try { 69 | if (mavenWrapperPropertyFileInputStream != null) { 70 | mavenWrapperPropertyFileInputStream.close(); 71 | } 72 | } catch (IOException e) { 73 | // Ignore ... 74 | } 75 | } 76 | } 77 | System.out.println("- Downloading from: " + url); 78 | 79 | File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH); 80 | if (!outputFile.getParentFile().exists()) { 81 | if (!outputFile.getParentFile().mkdirs()) { 82 | System.out.println( 83 | "- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'"); 84 | } 85 | } 86 | System.out.println("- Downloading to: " + outputFile.getAbsolutePath()); 87 | try { 88 | downloadFileFromURL(url, outputFile); 89 | System.out.println("Done"); 90 | System.exit(0); 91 | } catch (Throwable e) { 92 | System.out.println("- Error downloading"); 93 | e.printStackTrace(); 94 | System.exit(1); 95 | } 96 | } 97 | 98 | private static void downloadFileFromURL(String urlString, File destination) throws Exception { 99 | if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) { 100 | String username = System.getenv("MVNW_USERNAME"); 101 | char[] password = System.getenv("MVNW_PASSWORD").toCharArray(); 102 | Authenticator.setDefault(new Authenticator() { 103 | @Override 104 | protected PasswordAuthentication getPasswordAuthentication() { 105 | return new PasswordAuthentication(username, password); 106 | } 107 | }); 108 | } 109 | URL website = new URL(urlString); 110 | ReadableByteChannel rbc; 111 | rbc = Channels.newChannel(website.openStream()); 112 | FileOutputStream fos = new FileOutputStream(destination); 113 | fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE); 114 | fos.close(); 115 | rbc.close(); 116 | } 117 | 118 | } 119 | -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.6.3/apache-maven-3.6.3-bin.zip 2 | wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar 3 | -------------------------------------------------------------------------------- /mvnw: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # ---------------------------------------------------------------------------- 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # https://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # ---------------------------------------------------------------------------- 20 | 21 | # ---------------------------------------------------------------------------- 22 | # Maven Start Up Batch script 23 | # 24 | # Required ENV vars: 25 | # ------------------ 26 | # JAVA_HOME - location of a JDK home dir 27 | # 28 | # Optional ENV vars 29 | # ----------------- 30 | # M2_HOME - location of maven2's installed home dir 31 | # MAVEN_OPTS - parameters passed to the Java VM when running Maven 32 | # e.g. to debug Maven itself, use 33 | # set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 34 | # MAVEN_SKIP_RC - flag to disable loading of mavenrc files 35 | # ---------------------------------------------------------------------------- 36 | 37 | if [ -z "$MAVEN_SKIP_RC" ] ; then 38 | 39 | if [ -f /etc/mavenrc ] ; then 40 | . /etc/mavenrc 41 | fi 42 | 43 | if [ -f "$HOME/.mavenrc" ] ; then 44 | . "$HOME/.mavenrc" 45 | fi 46 | 47 | fi 48 | 49 | # OS specific support. $var _must_ be set to either true or false. 50 | cygwin=false; 51 | darwin=false; 52 | mingw=false 53 | case "`uname`" in 54 | CYGWIN*) cygwin=true ;; 55 | MINGW*) mingw=true;; 56 | Darwin*) darwin=true 57 | # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home 58 | # See https://developer.apple.com/library/mac/qa/qa1170/_index.html 59 | if [ -z "$JAVA_HOME" ]; then 60 | if [ -x "/usr/libexec/java_home" ]; then 61 | export JAVA_HOME="`/usr/libexec/java_home`" 62 | else 63 | export JAVA_HOME="/Library/Java/Home" 64 | fi 65 | fi 66 | ;; 67 | esac 68 | 69 | if [ -z "$JAVA_HOME" ] ; then 70 | if [ -r /etc/gentoo-release ] ; then 71 | JAVA_HOME=`java-config --jre-home` 72 | fi 73 | fi 74 | 75 | if [ -z "$M2_HOME" ] ; then 76 | ## resolve links - $0 may be a link to maven's home 77 | PRG="$0" 78 | 79 | # need this for relative symlinks 80 | while [ -h "$PRG" ] ; do 81 | ls=`ls -ld "$PRG"` 82 | link=`expr "$ls" : '.*-> \(.*\)$'` 83 | if expr "$link" : '/.*' > /dev/null; then 84 | PRG="$link" 85 | else 86 | PRG="`dirname "$PRG"`/$link" 87 | fi 88 | done 89 | 90 | saveddir=`pwd` 91 | 92 | M2_HOME=`dirname "$PRG"`/.. 93 | 94 | # make it fully qualified 95 | M2_HOME=`cd "$M2_HOME" && pwd` 96 | 97 | cd "$saveddir" 98 | # echo Using m2 at $M2_HOME 99 | fi 100 | 101 | # For Cygwin, ensure paths are in UNIX format before anything is touched 102 | if $cygwin ; then 103 | [ -n "$M2_HOME" ] && 104 | M2_HOME=`cygpath --unix "$M2_HOME"` 105 | [ -n "$JAVA_HOME" ] && 106 | JAVA_HOME=`cygpath --unix "$JAVA_HOME"` 107 | [ -n "$CLASSPATH" ] && 108 | CLASSPATH=`cygpath --path --unix "$CLASSPATH"` 109 | fi 110 | 111 | # For Mingw, ensure paths are in UNIX format before anything is touched 112 | if $mingw ; then 113 | [ -n "$M2_HOME" ] && 114 | M2_HOME="`(cd "$M2_HOME"; pwd)`" 115 | [ -n "$JAVA_HOME" ] && 116 | JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`" 117 | fi 118 | 119 | if [ -z "$JAVA_HOME" ]; then 120 | javaExecutable="`which javac`" 121 | if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then 122 | # readlink(1) is not available as standard on Solaris 10. 123 | readLink=`which readlink` 124 | if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then 125 | if $darwin ; then 126 | javaHome="`dirname \"$javaExecutable\"`" 127 | javaExecutable="`cd \"$javaHome\" && pwd -P`/javac" 128 | else 129 | javaExecutable="`readlink -f \"$javaExecutable\"`" 130 | fi 131 | javaHome="`dirname \"$javaExecutable\"`" 132 | javaHome=`expr "$javaHome" : '\(.*\)/bin'` 133 | JAVA_HOME="$javaHome" 134 | export JAVA_HOME 135 | fi 136 | fi 137 | fi 138 | 139 | if [ -z "$JAVACMD" ] ; then 140 | if [ -n "$JAVA_HOME" ] ; then 141 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 142 | # IBM's JDK on AIX uses strange locations for the executables 143 | JAVACMD="$JAVA_HOME/jre/sh/java" 144 | else 145 | JAVACMD="$JAVA_HOME/bin/java" 146 | fi 147 | else 148 | JAVACMD="`which java`" 149 | fi 150 | fi 151 | 152 | if [ ! -x "$JAVACMD" ] ; then 153 | echo "Error: JAVA_HOME is not defined correctly." >&2 154 | echo " We cannot execute $JAVACMD" >&2 155 | exit 1 156 | fi 157 | 158 | if [ -z "$JAVA_HOME" ] ; then 159 | echo "Warning: JAVA_HOME environment variable is not set." 160 | fi 161 | 162 | CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher 163 | 164 | # traverses directory structure from process work directory to filesystem root 165 | # first directory with .mvn subdirectory is considered project base directory 166 | find_maven_basedir() { 167 | 168 | if [ -z "$1" ] 169 | then 170 | echo "Path not specified to find_maven_basedir" 171 | return 1 172 | fi 173 | 174 | basedir="$1" 175 | wdir="$1" 176 | while [ "$wdir" != '/' ] ; do 177 | if [ -d "$wdir"/.mvn ] ; then 178 | basedir=$wdir 179 | break 180 | fi 181 | # workaround for JBEAP-8937 (on Solaris 10/Sparc) 182 | if [ -d "${wdir}" ]; then 183 | wdir=`cd "$wdir/.."; pwd` 184 | fi 185 | # end of workaround 186 | done 187 | echo "${basedir}" 188 | } 189 | 190 | # concatenates all lines of a file 191 | concat_lines() { 192 | if [ -f "$1" ]; then 193 | echo "$(tr -s '\n' ' ' < "$1")" 194 | fi 195 | } 196 | 197 | BASE_DIR=`find_maven_basedir "$(pwd)"` 198 | if [ -z "$BASE_DIR" ]; then 199 | exit 1; 200 | fi 201 | 202 | ########################################################################################## 203 | # Extension to allow automatically downloading the maven-wrapper.jar from Maven-central 204 | # This allows using the maven wrapper in projects that prohibit checking in binary data. 205 | ########################################################################################## 206 | if [ -r "$BASE_DIR/.mvn/wrapper/maven-wrapper.jar" ]; then 207 | if [ "$MVNW_VERBOSE" = true ]; then 208 | echo "Found .mvn/wrapper/maven-wrapper.jar" 209 | fi 210 | else 211 | if [ "$MVNW_VERBOSE" = true ]; then 212 | echo "Couldn't find .mvn/wrapper/maven-wrapper.jar, downloading it ..." 213 | fi 214 | if [ -n "$MVNW_REPOURL" ]; then 215 | jarUrl="$MVNW_REPOURL/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar" 216 | else 217 | jarUrl="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar" 218 | fi 219 | while IFS="=" read key value; do 220 | case "$key" in (wrapperUrl) jarUrl="$value"; break ;; 221 | esac 222 | done < "$BASE_DIR/.mvn/wrapper/maven-wrapper.properties" 223 | if [ "$MVNW_VERBOSE" = true ]; then 224 | echo "Downloading from: $jarUrl" 225 | fi 226 | wrapperJarPath="$BASE_DIR/.mvn/wrapper/maven-wrapper.jar" 227 | if $cygwin; then 228 | wrapperJarPath=`cygpath --path --windows "$wrapperJarPath"` 229 | fi 230 | 231 | if command -v wget > /dev/null; then 232 | if [ "$MVNW_VERBOSE" = true ]; then 233 | echo "Found wget ... using wget" 234 | fi 235 | if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then 236 | wget "$jarUrl" -O "$wrapperJarPath" 237 | else 238 | wget --http-user=$MVNW_USERNAME --http-password=$MVNW_PASSWORD "$jarUrl" -O "$wrapperJarPath" 239 | fi 240 | elif command -v curl > /dev/null; then 241 | if [ "$MVNW_VERBOSE" = true ]; then 242 | echo "Found curl ... using curl" 243 | fi 244 | if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then 245 | curl -o "$wrapperJarPath" "$jarUrl" -f 246 | else 247 | curl --user $MVNW_USERNAME:$MVNW_PASSWORD -o "$wrapperJarPath" "$jarUrl" -f 248 | fi 249 | 250 | else 251 | if [ "$MVNW_VERBOSE" = true ]; then 252 | echo "Falling back to using Java to download" 253 | fi 254 | javaClass="$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.java" 255 | # For Cygwin, switch paths to Windows format before running javac 256 | if $cygwin; then 257 | javaClass=`cygpath --path --windows "$javaClass"` 258 | fi 259 | if [ -e "$javaClass" ]; then 260 | if [ ! -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then 261 | if [ "$MVNW_VERBOSE" = true ]; then 262 | echo " - Compiling MavenWrapperDownloader.java ..." 263 | fi 264 | # Compiling the Java class 265 | ("$JAVA_HOME/bin/javac" "$javaClass") 266 | fi 267 | if [ -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then 268 | # Running the downloader 269 | if [ "$MVNW_VERBOSE" = true ]; then 270 | echo " - Running MavenWrapperDownloader.java ..." 271 | fi 272 | ("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$MAVEN_PROJECTBASEDIR") 273 | fi 274 | fi 275 | fi 276 | fi 277 | ########################################################################################## 278 | # End of extension 279 | ########################################################################################## 280 | 281 | export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"} 282 | if [ "$MVNW_VERBOSE" = true ]; then 283 | echo $MAVEN_PROJECTBASEDIR 284 | fi 285 | MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS" 286 | 287 | # For Cygwin, switch paths to Windows format before running java 288 | if $cygwin; then 289 | [ -n "$M2_HOME" ] && 290 | M2_HOME=`cygpath --path --windows "$M2_HOME"` 291 | [ -n "$JAVA_HOME" ] && 292 | JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"` 293 | [ -n "$CLASSPATH" ] && 294 | CLASSPATH=`cygpath --path --windows "$CLASSPATH"` 295 | [ -n "$MAVEN_PROJECTBASEDIR" ] && 296 | MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"` 297 | fi 298 | 299 | # Provide a "standardized" way to retrieve the CLI args that will 300 | # work with both Windows and non-Windows executions. 301 | MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $@" 302 | export MAVEN_CMD_LINE_ARGS 303 | 304 | WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain 305 | 306 | exec "$JAVACMD" \ 307 | $MAVEN_OPTS \ 308 | -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \ 309 | "-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \ 310 | ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@" 311 | -------------------------------------------------------------------------------- /mvnw.cmd: -------------------------------------------------------------------------------- 1 | @REM ---------------------------------------------------------------------------- 2 | @REM Licensed to the Apache Software Foundation (ASF) under one 3 | @REM or more contributor license agreements. See the NOTICE file 4 | @REM distributed with this work for additional information 5 | @REM regarding copyright ownership. The ASF licenses this file 6 | @REM to you under the Apache License, Version 2.0 (the 7 | @REM "License"); you may not use this file except in compliance 8 | @REM with the License. You may obtain a copy of the License at 9 | @REM 10 | @REM https://www.apache.org/licenses/LICENSE-2.0 11 | @REM 12 | @REM Unless required by applicable law or agreed to in writing, 13 | @REM software distributed under the License is distributed on an 14 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | @REM KIND, either express or implied. See the License for the 16 | @REM specific language governing permissions and limitations 17 | @REM under the License. 18 | @REM ---------------------------------------------------------------------------- 19 | 20 | @REM ---------------------------------------------------------------------------- 21 | @REM Maven Start Up Batch script 22 | @REM 23 | @REM Required ENV vars: 24 | @REM JAVA_HOME - location of a JDK home dir 25 | @REM 26 | @REM Optional ENV vars 27 | @REM M2_HOME - location of maven2's installed home dir 28 | @REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands 29 | @REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a keystroke before ending 30 | @REM MAVEN_OPTS - parameters passed to the Java VM when running Maven 31 | @REM e.g. to debug Maven itself, use 32 | @REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 33 | @REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files 34 | @REM ---------------------------------------------------------------------------- 35 | 36 | @REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on' 37 | @echo off 38 | @REM set title of command window 39 | title %0 40 | @REM enable echoing by setting MAVEN_BATCH_ECHO to 'on' 41 | @if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO% 42 | 43 | @REM set %HOME% to equivalent of $HOME 44 | if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%") 45 | 46 | @REM Execute a user defined script before this one 47 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre 48 | @REM check for pre script, once with legacy .bat ending and once with .cmd ending 49 | if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat" 50 | if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd" 51 | :skipRcPre 52 | 53 | @setlocal 54 | 55 | set ERROR_CODE=0 56 | 57 | @REM To isolate internal variables from possible post scripts, we use another setlocal 58 | @setlocal 59 | 60 | @REM ==== START VALIDATION ==== 61 | if not "%JAVA_HOME%" == "" goto OkJHome 62 | 63 | echo. 64 | echo Error: JAVA_HOME not found in your environment. >&2 65 | echo Please set the JAVA_HOME variable in your environment to match the >&2 66 | echo location of your Java installation. >&2 67 | echo. 68 | goto error 69 | 70 | :OkJHome 71 | if exist "%JAVA_HOME%\bin\java.exe" goto init 72 | 73 | echo. 74 | echo Error: JAVA_HOME is set to an invalid directory. >&2 75 | echo JAVA_HOME = "%JAVA_HOME%" >&2 76 | echo Please set the JAVA_HOME variable in your environment to match the >&2 77 | echo location of your Java installation. >&2 78 | echo. 79 | goto error 80 | 81 | @REM ==== END VALIDATION ==== 82 | 83 | :init 84 | 85 | @REM Find the project base dir, i.e. the directory that contains the folder ".mvn". 86 | @REM Fallback to current working directory if not found. 87 | 88 | set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR% 89 | IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir 90 | 91 | set EXEC_DIR=%CD% 92 | set WDIR=%EXEC_DIR% 93 | :findBaseDir 94 | IF EXIST "%WDIR%"\.mvn goto baseDirFound 95 | cd .. 96 | IF "%WDIR%"=="%CD%" goto baseDirNotFound 97 | set WDIR=%CD% 98 | goto findBaseDir 99 | 100 | :baseDirFound 101 | set MAVEN_PROJECTBASEDIR=%WDIR% 102 | cd "%EXEC_DIR%" 103 | goto endDetectBaseDir 104 | 105 | :baseDirNotFound 106 | set MAVEN_PROJECTBASEDIR=%EXEC_DIR% 107 | cd "%EXEC_DIR%" 108 | 109 | :endDetectBaseDir 110 | 111 | IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig 112 | 113 | @setlocal EnableExtensions EnableDelayedExpansion 114 | for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a 115 | @endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS% 116 | 117 | :endReadAdditionalConfig 118 | 119 | SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe" 120 | set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar" 121 | set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain 122 | 123 | set DOWNLOAD_URL="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar" 124 | 125 | FOR /F "tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO ( 126 | IF "%%A"=="wrapperUrl" SET DOWNLOAD_URL=%%B 127 | ) 128 | 129 | @REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central 130 | @REM This allows using the maven wrapper in projects that prohibit checking in binary data. 131 | if exist %WRAPPER_JAR% ( 132 | if "%MVNW_VERBOSE%" == "true" ( 133 | echo Found %WRAPPER_JAR% 134 | ) 135 | ) else ( 136 | if not "%MVNW_REPOURL%" == "" ( 137 | SET DOWNLOAD_URL="%MVNW_REPOURL%/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar" 138 | ) 139 | if "%MVNW_VERBOSE%" == "true" ( 140 | echo Couldn't find %WRAPPER_JAR%, downloading it ... 141 | echo Downloading from: %DOWNLOAD_URL% 142 | ) 143 | 144 | powershell -Command "&{"^ 145 | "$webclient = new-object System.Net.WebClient;"^ 146 | "if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^ 147 | "$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^ 148 | "}"^ 149 | "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%DOWNLOAD_URL%', '%WRAPPER_JAR%')"^ 150 | "}" 151 | if "%MVNW_VERBOSE%" == "true" ( 152 | echo Finished downloading %WRAPPER_JAR% 153 | ) 154 | ) 155 | @REM End of extension 156 | 157 | @REM Provide a "standardized" way to retrieve the CLI args that will 158 | @REM work with both Windows and non-Windows executions. 159 | set MAVEN_CMD_LINE_ARGS=%* 160 | 161 | %MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %* 162 | if ERRORLEVEL 1 goto error 163 | goto end 164 | 165 | :error 166 | set ERROR_CODE=1 167 | 168 | :end 169 | @endlocal & set ERROR_CODE=%ERROR_CODE% 170 | 171 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost 172 | @REM check for post script, once with legacy .bat ending and once with .cmd ending 173 | if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat" 174 | if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd" 175 | :skipRcPost 176 | 177 | @REM pause the script if MAVEN_BATCH_PAUSE is set to 'on' 178 | if "%MAVEN_BATCH_PAUSE%" == "on" pause 179 | 180 | if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE% 181 | 182 | exit /B %ERROR_CODE% 183 | -------------------------------------------------------------------------------- /parsex-client/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | parsex 7 | com.sucx.bigdata 8 | 0.0.1-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | parsex-client 13 | 0.0.1-SNAPSHOT 14 | 15 | 16 | 17 | org.apache.maven.plugins 18 | maven-compiler-plugin 19 | 20 | 8 21 | 8 22 | 23 | 24 | 25 | 26 | 27 | parsex-client 28 | 29 | 30 | 31 | 32 | 33 | org.datanucleus 34 | javax.jdo 35 | 3.2.0-m3 36 | 37 | 38 | 39 | 40 | org.datanucleus 41 | datanucleus-rdbms 42 | 4.1.19 43 | 44 | 45 | 46 | mysql 47 | mysql-connector-java 48 | 8.0.19 49 | 50 | 51 | 52 | 53 | org.datanucleus 54 | datanucleus-api-jdo 55 | 4.2.4 56 | 57 | 58 | 59 | org.datanucleus 60 | datanucleus-core 61 | 4.1.17 62 | 63 | 64 | 65 | org.jooq 66 | joor-java-8 67 | 68 | 69 | junit 70 | junit 71 | 4.11 72 | test 73 | 74 | 75 | com.alibaba 76 | fastjson 77 | 78 | 79 | org.apache.httpcomponents 80 | httpclient 81 | 82 | 83 | 84 | com.sucx.bigdata 85 | parsex-core 86 | 0.0.1-SNAPSHOT 87 | 88 | 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /parsex-client/src/main/java/com/sucx/App.java: -------------------------------------------------------------------------------- 1 | package com.sucx; 2 | 3 | /** 4 | * Hello world! 5 | */ 6 | public class App { 7 | public static void main(String[] args) { 8 | System.out.println("Hello World!"); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /parsex-client/src/main/java/com/sucx/controller/HelloController.java: -------------------------------------------------------------------------------- 1 | package com.sucx.controller; 2 | 3 | /** 4 | * desc: 5 | * 6 | * @author scx 7 | * @create 2020/02/26 8 | */ 9 | 10 | public class HelloController { 11 | 12 | } 13 | -------------------------------------------------------------------------------- /parsex-client/src/main/java/com/sucx/util/HttpUtils.java: -------------------------------------------------------------------------------- 1 | package com.sucx.util; 2 | 3 | import org.apache.http.HttpResponse; 4 | import org.apache.http.client.HttpClient; 5 | import org.apache.http.client.methods.HttpGet; 6 | import org.apache.http.client.methods.HttpPost; 7 | import org.apache.http.client.methods.HttpUriRequest; 8 | import org.apache.http.conn.ssl.NoopHostnameVerifier; 9 | import org.apache.http.conn.ssl.SSLConnectionSocketFactory; 10 | import org.apache.http.conn.ssl.TrustSelfSignedStrategy; 11 | import org.apache.http.impl.client.HttpClients; 12 | import org.apache.http.message.BasicHeader; 13 | import org.apache.http.ssl.SSLContexts; 14 | import org.apache.http.util.EntityUtils; 15 | 16 | import java.io.IOException; 17 | import java.net.MalformedURLException; 18 | import java.net.URI; 19 | import java.net.URISyntaxException; 20 | import java.net.URL; 21 | import java.security.KeyManagementException; 22 | import java.security.KeyStoreException; 23 | import java.security.NoSuchAlgorithmException; 24 | import java.util.List; 25 | 26 | /** 27 | * desc: 28 | * 29 | * @author scx 30 | * @create 2020/02/26 31 | */ 32 | public class HttpUtils { 33 | 34 | 35 | public static String doGet(String urlStr, List headers) { 36 | return doExecute(urlStr, new HttpGet(check(urlStr)), headers); 37 | } 38 | 39 | public static String doPost(String urlStr, List headers) { 40 | return doExecute(urlStr, new HttpPost(check(urlStr)), headers); 41 | } 42 | 43 | private static String doExecute(String url, HttpUriRequest request, List headers) { 44 | if (headers != null && headers.size() > 0) { 45 | for (BasicHeader header : headers) { 46 | request.setHeader(header); 47 | } 48 | } 49 | SSLConnectionSocketFactory scsf = null; 50 | try { 51 | scsf = new SSLConnectionSocketFactory( 52 | SSLContexts.custom().loadTrustMaterial(null, new TrustSelfSignedStrategy()).build(), 53 | NoopHostnameVerifier.INSTANCE); 54 | } catch (NoSuchAlgorithmException | KeyManagementException | KeyStoreException e) { 55 | e.printStackTrace(); 56 | } 57 | try { 58 | HttpClient httpClient = HttpClients.custom().setSSLSocketFactory(scsf).build(); 59 | HttpResponse response = httpClient.execute(request); 60 | int code = response.getStatusLine().getStatusCode(); 61 | if (code == 200) { 62 | return EntityUtils.toString(response.getEntity()); 63 | } else { 64 | throw new RuntimeException(url + " http请求异常:" + response.getStatusLine().getStatusCode() + response.getEntity().toString()); 65 | } 66 | } catch (IOException e) { 67 | throw new RuntimeException("发送http请求失败", e); 68 | } 69 | } 70 | 71 | 72 | private static URI check(String urlStr) { 73 | URI uri = null; 74 | try { 75 | URL url = new URL(urlStr); 76 | uri = new URI(url.getProtocol(), url.getHost() + ":" + url.getPort(), url.getPath(), url.getQuery(), null); 77 | return uri; 78 | } catch (URISyntaxException | MalformedURLException e) { 79 | return null; 80 | } 81 | 82 | } 83 | 84 | 85 | } 86 | -------------------------------------------------------------------------------- /parsex-client/src/test/java/com/sucx/AppTest.java: -------------------------------------------------------------------------------- 1 | package com.sucx; 2 | 3 | import org.apache.hadoop.hive.conf.HiveConf; 4 | import org.apache.hadoop.hive.metastore.api.Schema; 5 | import org.apache.hadoop.hive.ql.Context; 6 | import org.apache.hadoop.hive.ql.Driver; 7 | import org.apache.hadoop.hive.ql.QueryPlan; 8 | import org.apache.hadoop.hive.ql.QueryState; 9 | import org.apache.hadoop.hive.ql.hooks.HookContext; 10 | import org.apache.hadoop.hive.ql.hooks.LineageLogger; 11 | import org.apache.hadoop.hive.ql.parse.ASTNode; 12 | import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; 13 | import org.apache.hadoop.hive.ql.parse.ParseUtils; 14 | import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory; 15 | import org.apache.hadoop.hive.ql.session.SessionState; 16 | import org.apache.hadoop.security.UserGroupInformation; 17 | import org.joor.Reflect; 18 | import org.junit.Test; 19 | 20 | import java.util.HashMap; 21 | 22 | /** 23 | * Unit test for simple App. 24 | */ 25 | public class AppTest { 26 | /** 27 | * Rigorous Test :-) 28 | */ 29 | @Test 30 | public void shouldAnswerWithTrue() throws Exception { 31 | 32 | LineageLogger logger = new LineageLogger(); 33 | 34 | HiveConf hiveConf = new HiveConf(); 35 | 36 | 37 | hiveConf.set("javax.jdo.option.ConnectionURL", "jdbc:mysql://localhost/metastore", "hive-conf.xml"); 38 | hiveConf.set("javax.jdo.option.ConnectionDriverName", "com.mysql.jdbc.Driver", "hive-conf.xml"); 39 | hiveConf.set("javax.jdo.option.ConnectionUserName", "root", "hive-conf.xml"); 40 | hiveConf.set("javax.jdo.option.ConnectionPassword", "moye", "hive-conf.xml"); 41 | hiveConf.set("fs.defaultFS", "hdfs://127.0.0.1:8020", "hdfs-site.xml"); 42 | hiveConf.set("_hive.hdfs.session.path", "hdfs://127.0.0.1:8020/tmp", "hive-conf.xml"); 43 | hiveConf.set("_hive.local.session.path", "hdfs://127.0.0.1:8020/tmp", "hive-conf.xml"); 44 | hiveConf.set("hive.in.test", "true", "hive-conf.xml"); 45 | 46 | 47 | String sql = "insert overwrite table sucx.test select * from sucx.test2"; 48 | QueryState queryState = new QueryState(hiveConf); 49 | 50 | Context context = new Context(hiveConf); 51 | 52 | SessionState sessionState = new SessionState(hiveConf); 53 | 54 | SessionState.setCurrentSessionState(sessionState); 55 | 56 | ASTNode astNode = ParseUtils.parse(sql, context); 57 | 58 | BaseSemanticAnalyzer analyzer = SemanticAnalyzerFactory.get(queryState, astNode); 59 | 60 | analyzer.analyze(astNode, context); 61 | 62 | Schema schema = Reflect.onClass(Driver.class).call("getSchema", analyzer, hiveConf).get(); 63 | 64 | QueryPlan queryPlan = new QueryPlan(sql, analyzer, 0L, null, queryState.getHiveOperation(), schema); 65 | 66 | 67 | 68 | HookContext hookContext = new HookContext(queryPlan, queryState, 69 | new HashMap<>(), "sucx", "", 70 | "", "", "", "", 71 | true, null); 72 | 73 | hookContext.setUgi(UserGroupInformation.getCurrentUser()); 74 | logger.run(hookContext); 75 | 76 | 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /parsex-client/src/test/java/com/sucx/util/HeraTest.java: -------------------------------------------------------------------------------- 1 | package com.sucx.util; 2 | 3 | import com.alibaba.fastjson.JSONObject; 4 | import org.junit.Test; 5 | 6 | import java.io.BufferedWriter; 7 | import java.io.File; 8 | import java.io.FileWriter; 9 | import java.io.IOException; 10 | import java.util.Random; 11 | 12 | /** 13 | * desc: 14 | * 15 | * @author scx 16 | * @create 2020/03/02 17 | */ 18 | public class HeraTest { 19 | 20 | 21 | @Test 22 | public void buildJson() throws IOException { 23 | //创建测试文件 24 | File file = new File("/Users/scx/Desktop/reportLog.txt"); 25 | if (!file.exists()) { 26 | if (file.createNewFile()) { 27 | throw new IOException("新建文件失败:" + file.getAbsolutePath()); 28 | } 29 | } 30 | BufferedWriter writer = new BufferedWriter(new FileWriter(file)); 31 | // 100W的设备数量 32 | int devSize = 10000 * 100; 33 | // 上报类型 34 | String[] typeArr = {"OFFLINE", "ONLINE", "RESET", "ACTIVE"}; 35 | // 10天的日期 36 | String[] dateArr = 37 | {"2020-02-01", "2020-02-02", "2020-02-03", "2020-02-04", "2020-02-05", 38 | "2020-02-06", "2020-02-07", "2020-02-08", "2020-02-09", "2020-02-10"}; 39 | Random random = new Random(99999); 40 | String type; 41 | for (String date : dateArr) { 42 | int activeCount = 0; 43 | for (int i = 1; i <= devSize; i++) { 44 | JSONObject json = new JSONObject(); 45 | type = typeArr[random.nextInt(typeArr.length)]; 46 | if ("ONLINE".equals(type) || "ACTIVE".equals(type)) { 47 | activeCount++; 48 | } 49 | json.put("id", i); 50 | //随机赋予一种上报类型 51 | json.put("type", type); 52 | json.put("date", date); 53 | writer.write(json.toJSONString()); 54 | writer.newLine(); 55 | } 56 | System.out.println(String.format("日志:%s,活跃数:%d", date, activeCount)); 57 | } 58 | writer.flush(); 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /parsex-client/src/test/java/com/sucx/util/PrestoHttpTest.java: -------------------------------------------------------------------------------- 1 | package com.sucx.util; 2 | 3 | import com.alibaba.fastjson.JSONArray; 4 | import com.alibaba.fastjson.JSONObject; 5 | import com.facebook.presto.sql.parser.ParsingException; 6 | import com.sucx.util.HttpUtils; 7 | import com.sucx.core.PrestoSqlParse; 8 | import com.sucx.common.exceptions.SqlParseException; 9 | import com.sucx.common.model.Result; 10 | import com.sucx.core.SqlParseUtil; 11 | import org.apache.http.message.BasicHeader; 12 | import org.junit.Test; 13 | 14 | import java.util.ArrayList; 15 | import java.util.List; 16 | import java.util.stream.Collectors; 17 | 18 | /** 19 | * desc: 20 | * 21 | * @author scx 22 | * @create 2020/03/10 23 | */ 24 | public class PrestoHttpTest { 25 | ArrayList headers = new ArrayList<>(); 26 | 27 | { 28 | headers.add(new BasicHeader("cookie", 29 | "_ga=GA1.2.1045647750.1571648344; 7ce0ff06556c05363a176b03dfdd5680=1160; a608ea7c4cbd1919ce039822a2e5d753=01160; cd1f6c4c522c03e21ad83ee2d7b0c515=%E8%8B%8F%E6%89%BF%E7%A5%A5%EF%BC%88%E8%8E%AB%E9%82%AA%EF%BC%89; e255ad9b8262a02d28bca48235a96357=1346; SSO_USER_TOKEN=p_19daf9e8b43332801f3d479b164cecfb" 30 | )); 31 | } 32 | 33 | @Test 34 | public void get() throws SqlParseException { 35 | 36 | String s = HttpUtils.doGet("https://prestonew-presto.bigdata-cn.xx-inc.top:7799/v1/query", headers); 37 | 38 | JSONArray array = JSONArray.parseArray(s); 39 | 40 | int size = array.size(); 41 | System.out.println(size); 42 | 43 | PrestoSqlParse sqlParse = new PrestoSqlParse(); 44 | for (int i = 0; i < size; i++) { 45 | JSONObject object = array.getJSONObject(i); 46 | String query = object.getString("query"); 47 | System.out.println(query); 48 | 49 | Result parse = null; 50 | try { 51 | parse = sqlParse.parse(query); 52 | } catch (SqlParseException e) { 53 | if (e.getCause() instanceof ParsingException) { 54 | System.out.println("sql解析异常:" + e.getMessage()); 55 | } else { 56 | throw new SqlParseException(e); 57 | } 58 | } 59 | SqlParseUtil.print(parse); 60 | } 61 | } 62 | 63 | public void test() { 64 | 65 | 66 | ArrayList test = new ArrayList<>(); 67 | 68 | 69 | List collect = test.stream().map(col -> { 70 | return col + "1"; 71 | }).collect(Collectors.toList()); 72 | 73 | for (String s : collect) { 74 | System.out.println(s); 75 | } 76 | } 77 | 78 | @Test 79 | public void replace() { 80 | 81 | String text = "10.1.1 "; 82 | 83 | 84 | System.out.println(text.replaceAll("(.*)", "")); 85 | 86 | } 87 | 88 | } 89 | -------------------------------------------------------------------------------- /parsex-common/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | parsex 7 | com.sucx.bigdata 8 | 0.0.1-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | parsex-common 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /parsex-common/src/main/java/com/sucx/common/Constants.java: -------------------------------------------------------------------------------- 1 | package com.sucx.common; 2 | 3 | /** 4 | * desc: 5 | * 6 | * @author scx 7 | * @create 2020/03/02 8 | */ 9 | public class Constants { 10 | 11 | public static final String SEMICOLON = ";"; 12 | public static final String POINT = "."; 13 | 14 | 15 | } 16 | -------------------------------------------------------------------------------- /parsex-common/src/main/java/com/sucx/common/enums/OperatorType.java: -------------------------------------------------------------------------------- 1 | package com.sucx.common.enums; 2 | 3 | public enum OperatorType { 4 | 5 | READ, WRITE, CREATE, ALTER,DROP 6 | 7 | } 8 | -------------------------------------------------------------------------------- /parsex-common/src/main/java/com/sucx/common/enums/SqlEnum.java: -------------------------------------------------------------------------------- 1 | package com.sucx.common.enums; 2 | 3 | public enum SqlEnum { 4 | 5 | HIVE, SPARK, PRESTO 6 | } 7 | -------------------------------------------------------------------------------- /parsex-common/src/main/java/com/sucx/common/exceptions/SqlParseException.java: -------------------------------------------------------------------------------- 1 | package com.sucx.common.exceptions; 2 | 3 | /** 4 | * desc: 5 | * 6 | * @author scx 7 | * @create 2020/02/29 8 | */ 9 | public class SqlParseException extends Exception { 10 | 11 | public SqlParseException(Exception e) { 12 | super(e); 13 | } 14 | 15 | public SqlParseException(String e) { 16 | super(e); 17 | } 18 | public SqlParseException(String message, Throwable cause) { 19 | super(message, cause); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /parsex-common/src/main/java/com/sucx/common/model/Result.java: -------------------------------------------------------------------------------- 1 | package com.sucx.common.model; 2 | 3 | import java.util.Set; 4 | 5 | /** 6 | * desc: 7 | * 8 | * @author scx 9 | * @create 2020/02/26 10 | */ 11 | public class Result { 12 | 13 | /** 14 | * 输入表 15 | */ 16 | private Set inputSets; 17 | /** 18 | * 输出表 19 | */ 20 | private Set outputSets; 21 | 22 | /** 23 | * 临时表 24 | */ 25 | private Set tempSets; 26 | 27 | /** 28 | * 是否包含join操作 29 | */ 30 | private boolean join; 31 | 32 | public Result(){} 33 | 34 | public Result(Set inputSets, Set outputSets, Set tempSets) { 35 | this(inputSets, outputSets, tempSets, false); 36 | } 37 | 38 | 39 | public Result(Set inputSets, Set outputSets, Set tempSets, boolean join) { 40 | this.inputSets = inputSets; 41 | this.outputSets = outputSets; 42 | this.tempSets = tempSets; 43 | this.join = join; 44 | } 45 | 46 | 47 | @Override 48 | public String toString() { 49 | StringBuilder inputStr = new StringBuilder("*************************\n输入表为:\n"); 50 | StringBuilder outputStr = new StringBuilder("输出表为:\n"); 51 | StringBuilder tempStr = new StringBuilder("临时表为:\n"); 52 | 53 | inputSets.forEach(input -> inputStr.append(input.toString()).append(" ").append("\n")); 54 | outputSets.forEach(input -> outputStr.append(input.toString()).append(" ")); 55 | tempSets.forEach(input -> tempStr.append(input.toString()).append(" ")); 56 | 57 | return inputStr.append(outputStr).append(tempStr).toString(); 58 | } 59 | 60 | 61 | public boolean isJoin() { 62 | return join; 63 | } 64 | 65 | public Set getTempSets() { 66 | return tempSets; 67 | } 68 | 69 | public Set getInputSets() { 70 | return inputSets; 71 | } 72 | 73 | public void setInputSets(Set inputSets) { 74 | this.inputSets = inputSets; 75 | } 76 | 77 | public Set getOutputSets() { 78 | return outputSets; 79 | } 80 | 81 | public void setOutputSets(Set outputSets) { 82 | this.outputSets = outputSets; 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /parsex-common/src/main/java/com/sucx/common/model/TableInfo.java: -------------------------------------------------------------------------------- 1 | package com.sucx.common.model; 2 | 3 | import com.sucx.common.Constants; 4 | import com.sucx.common.enums.OperatorType; 5 | import com.sucx.common.util.Pair; 6 | import com.sucx.common.util.StringUtils; 7 | 8 | import java.util.HashSet; 9 | import java.util.Set; 10 | import java.util.stream.Collectors; 11 | 12 | /** 13 | * desc: 14 | * 15 | * @author scx 16 | * @create 2020/02/26 17 | */ 18 | public class TableInfo { 19 | 20 | /** 21 | * 表名 22 | */ 23 | private String name; 24 | 25 | /** 26 | * 库名 27 | */ 28 | private String dbName; 29 | 30 | private OperatorType type; 31 | 32 | private Set columns; 33 | 34 | private String limit; 35 | 36 | private boolean selectAll; 37 | 38 | private boolean isDb; 39 | 40 | 41 | public TableInfo(){} 42 | 43 | 44 | public TableInfo(String dbName, OperatorType type) { 45 | this.dbName = dbName; 46 | this.type = type; 47 | this.isDb = true; 48 | } 49 | 50 | public TableInfo(String name, String dbName, OperatorType type, HashSet columns) { 51 | this.name = name; 52 | this.dbName = dbName; 53 | this.type = type; 54 | this.columns = new HashSet<>(columns); 55 | columns.clear(); 56 | optimizeColumn(); 57 | } 58 | 59 | public TableInfo(String dbAndTableName, OperatorType type, String defaultDb, HashSet columns) { 60 | if (dbAndTableName.contains(Constants.POINT)) { 61 | Pair pair = StringUtils.getPointPair(dbAndTableName); 62 | this.name = pair.getRight(); 63 | this.dbName = pair.getLeft(); 64 | } else { 65 | this.name = dbAndTableName; 66 | this.dbName = defaultDb; 67 | } 68 | this.columns = new HashSet<>(columns); 69 | this.type = type; 70 | columns.clear(); 71 | optimizeColumn(); 72 | } 73 | 74 | 75 | public Set getColumns() { 76 | return columns; 77 | } 78 | 79 | private void optimizeColumn() { 80 | String dbAndName = this.dbName + Constants.POINT + this.name; 81 | this.columns = this.columns.stream().map(column -> { 82 | if (!selectAll && column.endsWith("*")) { 83 | selectAll = true; 84 | } 85 | if (column.contains(Constants.POINT)) { 86 | Pair pair = StringUtils.getLastPointPair(column); 87 | if (pair.getLeft().equals(dbAndName)) { 88 | return pair.getRight(); 89 | } 90 | } 91 | return column; 92 | }).collect(Collectors.toSet()); 93 | } 94 | 95 | 96 | public boolean isDb() { 97 | return isDb; 98 | } 99 | 100 | public OperatorType getType() { 101 | return type; 102 | } 103 | 104 | public String getName() { 105 | return name; 106 | } 107 | 108 | 109 | public String getDbName() { 110 | return dbName; 111 | } 112 | 113 | public String getLimit() { 114 | return limit; 115 | } 116 | 117 | public void setLimit(String limit) { 118 | this.limit = limit; 119 | } 120 | 121 | 122 | public boolean isSelectAll() { 123 | return selectAll; 124 | } 125 | 126 | @Override 127 | public String toString() { 128 | StringBuilder str = new StringBuilder(); 129 | if (isDb) { 130 | str.append("[库]").append(dbName).append("[").append(type.name()).append("]"); 131 | } else { 132 | str.append("[表]").append(dbName).append(Constants.POINT).append(name).append("[").append(type.name()).append("]"); 133 | } 134 | 135 | if (this.columns != null && this.columns.size() > 0) { 136 | str.append(" column[ "); 137 | this.columns.forEach(columns -> str.append(columns).append(" ")); 138 | str.append("]"); 139 | } 140 | if (limit != null) { 141 | str.append(" limit[ ").append(limit).append(" ]"); 142 | } 143 | return str.toString(); 144 | } 145 | 146 | 147 | @Override 148 | public boolean equals(Object obj) { 149 | if (!(obj instanceof TableInfo)) { 150 | return false; 151 | } 152 | 153 | TableInfo info = (TableInfo) obj; 154 | return this.dbName.equals(info.dbName) && this.name.equals(info.name) && this.type == info.type; 155 | } 156 | 157 | @Override 158 | public int hashCode() { 159 | if (this.name != null) { 160 | return this.dbName.hashCode() + this.name.hashCode() + this.type.hashCode(); 161 | } 162 | return this.dbName.hashCode() + this.type.hashCode(); 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /parsex-common/src/main/java/com/sucx/common/util/Pair.java: -------------------------------------------------------------------------------- 1 | package com.sucx.common.util; 2 | 3 | /** 4 | * desc: 5 | * 6 | * @author scx 7 | * @create 2020/06/04 8 | */ 9 | public class Pair { 10 | 11 | 12 | private L left; 13 | 14 | private R right; 15 | 16 | private Pair(L left, R right) { 17 | this.left = left; 18 | this.right = right; 19 | } 20 | 21 | public static Pair of(L left, R right) { 22 | return new Pair<>(left, right); 23 | } 24 | 25 | public L getLeft() { 26 | return left; 27 | } 28 | 29 | public R getRight(){ 30 | return right; 31 | } 32 | 33 | 34 | } 35 | -------------------------------------------------------------------------------- /parsex-common/src/main/java/com/sucx/common/util/StringUtils.java: -------------------------------------------------------------------------------- 1 | package com.sucx.common.util; 2 | 3 | 4 | import com.sucx.common.Constants; 5 | 6 | /** 7 | * desc: 8 | * 9 | * @author scx 10 | * @create 2020/03/12 11 | */ 12 | public class StringUtils { 13 | 14 | 15 | public static Pair getPointPair(String content) { 16 | return getPair(Constants.POINT, content, false); 17 | } 18 | 19 | public static Pair getLastPointPair(String content) { 20 | return getPair(Constants.POINT, content, true); 21 | } 22 | 23 | 24 | private static Pair getPair(String split, String content, boolean dir) { 25 | int index; 26 | if (dir) { 27 | index = content.lastIndexOf(Constants.POINT); 28 | } else { 29 | index = content.indexOf(Constants.POINT); 30 | } 31 | if (index == -1) { 32 | throw new RuntimeException("not contain . character:" + content); 33 | } 34 | String left = content.substring(0, index); 35 | String right = content.substring(index + 1); 36 | return Pair.of(left, right); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /parsex-core/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | parsex 7 | com.sucx.bigdata 8 | 0.0.1-SNAPSHOT 9 | 10 | 4.0.0 11 | 0.0.1-SNAPSHOT 12 | 13 | parsex-core 14 | 15 | 16 | junit 17 | junit 18 | test 19 | 20 | 21 | 22 | com.sucx.bigdata 23 | parsex-common 24 | 0.0.1-SNAPSHOT 25 | 26 | 27 | com.facebook.presto 28 | presto-parser 29 | 30 | 31 | org.apache.hive 32 | hive-exec 33 | 34 | 35 | org.eclipse.jetty.orbit 36 | javax.servlet 37 | 38 | 39 | org.apache.hive 40 | hive-service-rpc 41 | 42 | 43 | 44 | 45 | org.scala-lang 46 | scala-library 47 | 48 | 49 | 50 | org.antlr 51 | antlr4 52 | 53 | 54 | org.apache.spark 55 | spark-catalyst_${scala.binary.version} 56 | 57 | 58 | javax.servlet 59 | javax.servlet-api 60 | 61 | 62 | 63 | 64 | 65 | org.apache.spark 66 | spark-sql_${scala.binary.version} 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | org.apache.maven.plugins 76 | maven-shade-plugin 77 | 3.0.0 78 | 79 | false 80 | false 81 | true 82 | false 83 | false 84 | 85 | 86 | 87 | package 88 | 89 | shade 90 | 91 | 92 | 93 | 94 | org.apache.hive:hive-exec 95 | 96 | 97 | 98 | 99 | *:* 100 | 101 | META-INF/*.SF 102 | META-INF/*.DSA 103 | META-INF/*.RSA 104 | 105 | 106 | 107 | 108 | 109 | com.google.guava 110 | com.medata.google.guava 111 | 112 | 113 | com.google.common 114 | com.medata.google.common 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | net.alchim31.maven 125 | scala-maven-plugin 126 | 3.3.1 127 | 128 | 129 | scala-compile-first 130 | process-resources 131 | 132 | add-source 133 | compile 134 | 135 | 136 | 137 | compile 138 | 139 | compile 140 | testCompile 141 | 142 | 143 | 144 | 145 | ${scala.binary.version} 146 | 147 | 148 | 149 | 150 | 151 | -------------------------------------------------------------------------------- /parsex-core/src/main/java/com/sucx/core/AbstractSqlParse.java: -------------------------------------------------------------------------------- 1 | package com.sucx.core; 2 | 3 | import com.sucx.common.Constants; 4 | import com.sucx.common.enums.OperatorType; 5 | import com.sucx.common.exceptions.SqlParseException; 6 | import com.sucx.common.model.Result; 7 | import com.sucx.common.model.TableInfo; 8 | import com.sucx.common.util.Pair; 9 | import com.sucx.common.util.StringUtils; 10 | import org.apache.log4j.Logger; 11 | import scala.Tuple3; 12 | 13 | import java.util.*; 14 | import java.util.stream.Collectors; 15 | 16 | /** 17 | * desc: 18 | * 19 | * @author scx 20 | * @create 2020/02/29 21 | */ 22 | public abstract class AbstractSqlParse implements SqlParse { 23 | 24 | 25 | private static Logger log = Logger.getLogger(AbstractSqlParse.class); 26 | 27 | 28 | protected final String columnSplit = ","; 29 | protected Map tableAliaMap; 30 | protected Stack> columnsStack; 31 | protected Stack limitStack; 32 | 33 | protected boolean hasJoin; 34 | 35 | protected String currentDb; 36 | 37 | 38 | protected HashSet splitColumn(Set columns, Map tableMap) { 39 | return (HashSet) columns.stream() 40 | .flatMap(column -> Arrays.stream(column.split(columnSplit))) 41 | .collect(Collectors.toSet()) 42 | .stream() 43 | .map(column -> { 44 | if (column.contains(Constants.POINT)) { 45 | Pair pair = StringUtils.getPointPair(column); 46 | String aDefault = tableMap.getOrDefault(pair.getLeft(), pair.getLeft()); 47 | return aDefault + Constants.POINT + pair.getRight(); 48 | } 49 | return column; 50 | }).collect(Collectors.toSet()); 51 | } 52 | 53 | 54 | protected HashSet getColumnsTop() { 55 | if (columnsStack.isEmpty()) { 56 | return new HashSet<>(0); 57 | } 58 | return columnsStack.pop(); 59 | } 60 | 61 | 62 | protected String getLimitTop() { 63 | if (limitStack.isEmpty()) { 64 | return null; 65 | } 66 | return limitStack.pop(); 67 | } 68 | 69 | 70 | protected TableInfo buildTableInfo(String name, String db, OperatorType type) { 71 | TableInfo info = new TableInfo(name, db, type, splitColumn(getColumnsTop(), tableAliaMap)); 72 | info.setLimit(getLimitTop()); 73 | return info; 74 | } 75 | 76 | protected TableInfo buildTableInfo(String dbAndTable, OperatorType type) { 77 | TableInfo info = new TableInfo(dbAndTable, type, currentDb, splitColumn(getColumnsTop(), tableAliaMap)); 78 | info.setLimit(getLimitTop()); 79 | return info; 80 | } 81 | 82 | /** 83 | * 替换sql注释 84 | * 85 | * @param sqlText sql 86 | * @return 替换后的sl 87 | */ 88 | protected String replaceNotes(String sqlText) { 89 | StringBuilder newSql = new StringBuilder(); 90 | String lineBreak = "\n"; 91 | String empty = ""; 92 | String trimLine; 93 | for (String line : sqlText.split(lineBreak)) { 94 | trimLine = line.trim(); 95 | if (!trimLine.startsWith("--") && !trimLine.startsWith("download")) { 96 | //过滤掉行内注释 97 | line = line.replaceAll("/\\*.*\\*/", empty); 98 | if (org.apache.commons.lang3.StringUtils.isNotBlank(line)) { 99 | newSql.append(line).append(lineBreak); 100 | } 101 | } 102 | } 103 | return newSql.toString(); 104 | } 105 | 106 | 107 | /** 108 | * ;分割多段sql 109 | * 110 | * @param sqlText sql 111 | * @return 112 | */ 113 | protected ArrayList splitSql(String sqlText) { 114 | String[] sqlArray = sqlText.split(Constants.SEMICOLON); 115 | ArrayList newSqlArray = new ArrayList<>(sqlArray.length); 116 | String command = ""; 117 | int arrayLen = sqlArray.length; 118 | String oneCmd; 119 | for (int i = 0; i < arrayLen; i++) { 120 | oneCmd = sqlArray[i]; 121 | boolean keepSemicolon = (oneCmd.endsWith("'") && i + 1 < arrayLen && sqlArray[i + 1].startsWith("'")) 122 | || (oneCmd.endsWith("\"") && i + 1 < arrayLen && sqlArray[i + 1].startsWith("\"")); 123 | if (oneCmd.endsWith("\\")) { 124 | command += org.apache.commons.lang.StringUtils.chop(oneCmd) + Constants.SEMICOLON; 125 | continue; 126 | } else if (keepSemicolon) { 127 | command += oneCmd + Constants.SEMICOLON; 128 | continue; 129 | } else { 130 | command += oneCmd; 131 | } 132 | if (org.apache.commons.lang3.StringUtils.isBlank(command)) { 133 | continue; 134 | } 135 | newSqlArray.add(command); 136 | command = ""; 137 | } 138 | return newSqlArray; 139 | } 140 | 141 | 142 | @Override 143 | public Result parse(String sqlText) throws SqlParseException { 144 | 145 | ArrayList sqlArray = this.splitSql(this.replaceNotes(sqlText)); 146 | HashSet inputTables = new HashSet<>(); 147 | HashSet outputTables = new HashSet<>(); 148 | HashSet tempTables = new HashSet<>(); 149 | 150 | columnsStack = new Stack<>(); 151 | tableAliaMap = new HashMap<>(); 152 | limitStack = new Stack<>(); 153 | currentDb = "default"; 154 | hasJoin = false; 155 | for (String sql : sqlArray) { 156 | if (sql.charAt(sql.length() - 1) == ';') { 157 | sql = sql.substring(0, sql.length() - 1); 158 | } 159 | if (org.apache.commons.lang3.StringUtils.isBlank(sql)) { 160 | continue; 161 | } 162 | columnsStack.clear(); 163 | limitStack.clear(); 164 | Tuple3, HashSet, HashSet> subTuple = this.parseInternal(sql); 165 | inputTables.addAll(subTuple._1()); 166 | outputTables.addAll(subTuple._2()); 167 | tempTables.addAll(subTuple._3()); 168 | } 169 | 170 | tempTables.forEach(table -> { 171 | Iterator iterator = inputTables.iterator(); 172 | while (iterator.hasNext()) { 173 | TableInfo checkTable = iterator.next(); 174 | if (checkTable.getName().equals(table.getName())) { 175 | iterator.remove(); 176 | break; 177 | } 178 | } 179 | }); 180 | 181 | return new Result(inputTables, outputTables, tempTables, hasJoin); 182 | } 183 | 184 | /** 185 | * 抽象解析 186 | * 187 | * @param sqlText sql 188 | * @return tuple4 189 | * @throws SqlParseException 190 | */ 191 | protected abstract Tuple3, HashSet, HashSet> parseInternal(String sqlText) throws SqlParseException; 192 | 193 | protected void print(String plan) { 194 | log.info("************ignore plan******\n" + plan); 195 | } 196 | } 197 | -------------------------------------------------------------------------------- /parsex-core/src/main/java/com/sucx/core/HiveSQLParse.java: -------------------------------------------------------------------------------- 1 | package com.sucx.core; 2 | 3 | import com.sucx.common.Constants; 4 | import com.sucx.common.enums.OperatorType; 5 | import com.sucx.common.exceptions.SqlParseException; 6 | import com.sucx.common.model.TableInfo; 7 | import org.apache.hadoop.hive.ql.lib.*; 8 | import org.apache.hadoop.hive.ql.parse.*; 9 | import scala.Tuple3; 10 | 11 | import java.util.*; 12 | 13 | /** 14 | * desc: 15 | * 16 | * @author scx 17 | * @create 2020/02/29 18 | */ 19 | public class HiveSQLParse extends AbstractSqlParse implements NodeProcessor { 20 | 21 | /** 22 | * 临时输入表 23 | */ 24 | private HashSet inputTableList; 25 | 26 | /** 27 | * 临时输出表 28 | */ 29 | private HashSet outputTableList; 30 | 31 | /** 32 | * 临时表 33 | */ 34 | private HashSet withTableList; 35 | 36 | 37 | @Override 38 | public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) { 39 | ASTNode ast = (ASTNode) nd; 40 | switch (ast.getToken().getType()) { 41 | //create语句 42 | case HiveParser.TOK_CREATETABLE: { 43 | String tableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast.getChild(0)); 44 | outputTableList.add(new TableInfo(tableName, OperatorType.CREATE, currentDb, new HashSet<>())); 45 | break; 46 | } 47 | //insert语句 48 | case HiveParser.TOK_TAB: { 49 | String tableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast.getChild(0)); 50 | outputTableList.add(new TableInfo(tableName, OperatorType.WRITE, currentDb, new HashSet<>())); 51 | break; 52 | } 53 | //from语句 54 | case HiveParser.TOK_TABREF: { 55 | ASTNode tabTree = (ASTNode) ast.getChild(0); 56 | String tableName = (tabTree.getChildCount() == 1) ? BaseSemanticAnalyzer.getUnescapedName((ASTNode) tabTree.getChild(0)) : BaseSemanticAnalyzer.getUnescapedName((ASTNode) tabTree.getChild(0)) + "." + tabTree.getChild(1); 57 | inputTableList.add(new TableInfo(tableName, OperatorType.READ, currentDb, new HashSet<>())); 58 | break; 59 | } 60 | // with.....语句 61 | case HiveParser.TOK_CTE: { 62 | for (int i = 0; i < ast.getChildCount(); i++) { 63 | ASTNode temp = (ASTNode) ast.getChild(i); 64 | String tableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) temp.getChild(1)); 65 | withTableList.add(new TableInfo(tableName, OperatorType.READ, "temp", new HashSet<>())); 66 | } 67 | break; 68 | } 69 | //ALTER 语句 70 | case HiveParser.TOK_ALTERTABLE: { 71 | String tableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast.getChild(0)); 72 | inputTableList.add(new TableInfo(tableName, OperatorType.ALTER, currentDb, new HashSet<>())); 73 | break; 74 | } 75 | case HiveParser.TOK_SWITCHDATABASE: { 76 | this.currentDb = BaseSemanticAnalyzer.unescapeIdentifier(ast.getChild(0).getText()); 77 | break; 78 | } 79 | case HiveParser.TOK_CREATEDATABASE: { 80 | String dbName = BaseSemanticAnalyzer.unescapeIdentifier(ast.getChild(0).getText()); 81 | inputTableList.add(new TableInfo(dbName, OperatorType.CREATE)); 82 | break; 83 | } 84 | case HiveParser.TOK_DROPDATABASE: { 85 | String dbName = BaseSemanticAnalyzer.unescapeIdentifier(ast.getChild(0).getText()); 86 | inputTableList.add(new TableInfo(dbName, OperatorType.DROP)); 87 | break; 88 | } 89 | case HiveParser.TOK_ALTERDATABASE_OWNER: 90 | case HiveParser.TOK_ALTERDATABASE_PROPERTIES: { 91 | String dbName = BaseSemanticAnalyzer.unescapeIdentifier(ast.getChild(0).getText()); 92 | inputTableList.add(new TableInfo(dbName, OperatorType.ALTER)); 93 | break; 94 | } 95 | default: { 96 | return null; 97 | } 98 | } 99 | return null; 100 | } 101 | 102 | @Override 103 | protected String replaceNotes(String sqlText) { 104 | StringBuilder builder = new StringBuilder(); 105 | String lineBreak = "\n"; 106 | for (String line : sqlText.split(lineBreak)) { 107 | //udf 添加的去掉,目前无法解析,会抛异常 108 | if (line.toLowerCase().startsWith("add jar") || line.toLowerCase().startsWith("set")) { 109 | int splitIndex = line.indexOf(Constants.SEMICOLON); 110 | if (splitIndex != -1 && splitIndex + 1 != line.length()) { 111 | builder.append(line.substring(splitIndex + 1)).append(lineBreak); 112 | } 113 | } else { 114 | builder.append(line).append(lineBreak); 115 | } 116 | } 117 | return super.replaceNotes(builder.toString()); 118 | } 119 | 120 | @Override 121 | protected Tuple3, HashSet, HashSet> parseInternal(String sqlText) throws SqlParseException { 122 | ParseDriver pd = new ParseDriver(); 123 | ASTNode tree; 124 | try { 125 | tree = pd.parse(sqlText); 126 | } catch (ParseException e) { 127 | throw new SqlParseException(e); 128 | } 129 | while ((tree.getToken() == null) && (tree.getChildCount() > 0)) { 130 | tree = (ASTNode) tree.getChild(0); 131 | } 132 | inputTableList = new HashSet<>(); 133 | outputTableList = new HashSet<>(); 134 | withTableList = new HashSet<>(); 135 | Map rules = new LinkedHashMap<>(); 136 | 137 | GraphWalker ogw = new DefaultGraphWalker(new DefaultRuleDispatcher(this, rules, null)); 138 | 139 | ArrayList topNodes = new ArrayList<>(); 140 | topNodes.add(tree); 141 | try { 142 | ogw.startWalking(topNodes, null); 143 | } catch (SemanticException e) { 144 | throw new RuntimeException(e); 145 | } 146 | return new Tuple3<>(inputTableList, outputTableList, withTableList); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /parsex-core/src/main/java/com/sucx/core/PrestoSqlParse.java: -------------------------------------------------------------------------------- 1 | package com.sucx.core; 2 | 3 | import com.facebook.presto.sql.parser.ParsingException; 4 | import com.facebook.presto.sql.parser.ParsingOptions; 5 | import com.facebook.presto.sql.parser.SqlParser; 6 | import com.facebook.presto.sql.tree.*; 7 | import com.sucx.common.Constants; 8 | import com.sucx.common.enums.OperatorType; 9 | import com.sucx.common.exceptions.SqlParseException; 10 | import com.sucx.common.model.TableInfo; 11 | import org.apache.log4j.Logger; 12 | import scala.Tuple3; 13 | 14 | import java.util.ArrayList; 15 | import java.util.Arrays; 16 | import java.util.HashSet; 17 | import java.util.List; 18 | import java.util.stream.Collectors; 19 | 20 | /** 21 | * 目前presto 仅仅是用来查询,目前还没解析输入表 22 | * desc: 解析presto sql的输入表、字段 23 | * 24 | * @author scx 25 | * @create 2020/03/09 26 | */ 27 | 28 | public class PrestoSqlParse extends AbstractSqlParse { 29 | 30 | private static Logger log = Logger.getLogger(PrestoSqlParse.class); 31 | 32 | private HashSet inputTables; 33 | private HashSet outputTables; 34 | private HashSet tempTables; 35 | 36 | /** 37 | * select 字段表达式中获取字段 38 | * 39 | * @param expression 40 | * @return 41 | */ 42 | private String getColumn(Expression expression) throws SqlParseException { 43 | if (expression instanceof IfExpression) { 44 | IfExpression ifExpression = (IfExpression) expression; 45 | List list = new ArrayList<>(); 46 | list.add(ifExpression.getCondition()); 47 | list.add(ifExpression.getTrueValue()); 48 | ifExpression.getFalseValue().ifPresent(list::add); 49 | return getString(list); 50 | } else if (expression instanceof Identifier) { 51 | Identifier identifier = (Identifier) expression; 52 | return identifier.getValue(); 53 | } else if (expression instanceof FunctionCall) { 54 | FunctionCall call = (FunctionCall) expression; 55 | StringBuilder columns = new StringBuilder(); 56 | List arguments = call.getArguments(); 57 | int size = arguments.size(); 58 | for (int i = 0; i < size; i++) { 59 | Expression exp = arguments.get(i); 60 | if (i == 0) { 61 | columns.append(getColumn(exp)); 62 | } else { 63 | columns.append(getColumn(exp)).append(columnSplit); 64 | } 65 | } 66 | return columns.toString(); 67 | } else if (expression instanceof ComparisonExpression) { 68 | ComparisonExpression compare = (ComparisonExpression) expression; 69 | return getString(compare.getLeft(), compare.getRight()); 70 | } else if (expression instanceof Literal || expression instanceof ArithmeticUnaryExpression) { 71 | return ""; 72 | } else if (expression instanceof Cast) { 73 | Cast cast = (Cast) expression; 74 | return getColumn(cast.getExpression()); 75 | } else if (expression instanceof DereferenceExpression) { 76 | DereferenceExpression reference = (DereferenceExpression) expression; 77 | return reference.toString(); 78 | } else if (expression instanceof ArithmeticBinaryExpression) { 79 | ArithmeticBinaryExpression binaryExpression = (ArithmeticBinaryExpression) expression; 80 | return getString(binaryExpression.getLeft(), binaryExpression.getRight()); 81 | } else if (expression instanceof SearchedCaseExpression) { 82 | SearchedCaseExpression caseExpression = (SearchedCaseExpression) expression; 83 | List exps = caseExpression.getWhenClauses().stream().map(whenClause -> (Expression) whenClause).collect(Collectors.toList()); 84 | caseExpression.getDefaultValue().ifPresent(exps::add); 85 | return getString(exps); 86 | } else if (expression instanceof WhenClause) { 87 | WhenClause whenClause = (WhenClause) expression; 88 | return getString(whenClause.getOperand(), whenClause.getResult()); 89 | } else if (expression instanceof LikePredicate) { 90 | LikePredicate likePredicate = (LikePredicate) expression; 91 | return likePredicate.getValue().toString(); 92 | } else if (expression instanceof InPredicate) { 93 | InPredicate predicate = (InPredicate) expression; 94 | return predicate.getValue().toString(); 95 | } else if (expression instanceof SubscriptExpression) { 96 | SubscriptExpression subscriptExpression = (SubscriptExpression) expression; 97 | return getColumn(subscriptExpression.getBase()); 98 | } else if (expression instanceof LogicalBinaryExpression) { 99 | LogicalBinaryExpression logicExp = (LogicalBinaryExpression) expression; 100 | return getString(logicExp.getLeft(), logicExp.getRight()); 101 | } else if (expression instanceof IsNullPredicate) { 102 | IsNullPredicate isNullExp = (IsNullPredicate) expression; 103 | return getColumn(isNullExp.getValue()); 104 | } else if (expression instanceof IsNotNullPredicate) { 105 | IsNotNullPredicate notNull = (IsNotNullPredicate) expression; 106 | return getColumn(notNull.getValue()); 107 | } else if (expression instanceof CoalesceExpression) { 108 | CoalesceExpression coalesce = (CoalesceExpression) expression; 109 | return getString(coalesce.getOperands()); 110 | } 111 | throw new SqlParseException("无法识别的表达式:" + expression.getClass().getName()); 112 | // return expression.toString(); 113 | } 114 | 115 | 116 | private String getString(Expression... exps) throws SqlParseException { 117 | return getString(Arrays.stream(exps).collect(Collectors.toList())); 118 | } 119 | 120 | private String getString(List exps) throws SqlParseException { 121 | StringBuilder builder = new StringBuilder(); 122 | for (Expression exp : exps) { 123 | builder.append(getColumn(exp)).append(columnSplit); 124 | } 125 | return builder.toString(); 126 | } 127 | 128 | 129 | /** 130 | * node 节点的遍历 131 | * 132 | * @param node 133 | */ 134 | private void checkNode(Node node) throws SqlParseException { 135 | if (node instanceof QuerySpecification) { 136 | QuerySpecification query = (QuerySpecification) node; 137 | query.getLimit().ifPresent(limit -> limitStack.push(limit)); 138 | loopNode(query.getChildren()); 139 | } else if (node instanceof TableSubquery) { 140 | loopNode(node.getChildren()); 141 | } else if (node instanceof AliasedRelation) { 142 | AliasedRelation alias = (AliasedRelation) node; 143 | String value = alias.getAlias().getValue(); 144 | if (alias.getChildren().size() == 1 && alias.getChildren().get(0) instanceof Table) { 145 | Table table = (Table) alias.getChildren().get(0); 146 | tableAliaMap.put(value, table.getName().toString()); 147 | } else { 148 | tempTables.add(buildTableInfo(value, OperatorType.READ)); 149 | } 150 | loopNode(node.getChildren()); 151 | } else if (node instanceof Query || node instanceof SubqueryExpression 152 | || node instanceof Union || node instanceof With 153 | || node instanceof LogicalBinaryExpression || node instanceof InPredicate) { 154 | loopNode(node.getChildren()); 155 | 156 | } else if (node instanceof Join) { 157 | hasJoin = true; 158 | loopNode(node.getChildren()); 159 | } 160 | //基本都是where条件,过滤掉,如果需要,可以调用getColumn解析字段 161 | else if (node instanceof LikePredicate || node instanceof NotExpression 162 | || node instanceof IfExpression 163 | || node instanceof ComparisonExpression || node instanceof GroupBy 164 | || node instanceof OrderBy || node instanceof Identifier 165 | || node instanceof InListExpression || node instanceof DereferenceExpression 166 | || node instanceof IsNotNullPredicate || node instanceof IsNullPredicate 167 | || node instanceof FunctionCall) { 168 | print(node.getClass().getName()); 169 | 170 | } else if (node instanceof WithQuery) { 171 | WithQuery withQuery = (WithQuery) node; 172 | tempTables.add(buildTableInfo(withQuery.getName().getValue(), OperatorType.READ)); 173 | loopNode(withQuery.getChildren()); 174 | } else if (node instanceof Table) { 175 | Table table = (Table) node; 176 | inputTables.add(buildTableInfo(table.getName().toString(), OperatorType.READ)); 177 | loopNode(table.getChildren()); 178 | } else if (node instanceof Select) { 179 | Select select = (Select) node; 180 | List selectItems = select.getSelectItems(); 181 | HashSet columns = new HashSet<>(); 182 | for (SelectItem item : selectItems) { 183 | if (item instanceof SingleColumn) { 184 | columns.add(getColumn(((SingleColumn) item).getExpression())); 185 | } else if (item instanceof AllColumns) { 186 | columns.add(item.toString()); 187 | } else { 188 | throw new SqlParseException("unknow column type:" + item.getClass().getName()); 189 | } 190 | } 191 | columnsStack.push(columns); 192 | 193 | } else { 194 | throw new SqlParseException("unknow node type:" + node.getClass().getName()); 195 | } 196 | } 197 | 198 | 199 | private void loopNode(List children) throws SqlParseException { 200 | for (Node node : children) { 201 | this.checkNode(node); 202 | } 203 | } 204 | 205 | /** 206 | * statement 过滤 只识别select 语句 207 | * 208 | * @param statement 209 | * @throws SqlParseException 210 | */ 211 | private void check(Statement statement) throws SqlParseException { 212 | if (statement instanceof Query) { 213 | Query query = (Query) statement; 214 | List children = query.getChildren(); 215 | for (Node child : children) { 216 | checkNode(child); 217 | } 218 | } else if (statement instanceof Use) { 219 | Use use = (Use) statement; 220 | this.currentDb = use.getSchema().getValue(); 221 | } else if (statement instanceof ShowColumns) { 222 | ShowColumns show = (ShowColumns) statement; 223 | String allName = show.getTable().toString().replace("hive.", ""); 224 | inputTables.add(buildTableInfo(allName, OperatorType.READ)); 225 | } else if (statement instanceof ShowTables) { 226 | ShowTables show = (ShowTables) statement; 227 | QualifiedName qualifiedName = show.getSchema().orElseThrow(() -> new SqlParseException("unkonw table name or db name" + statement.toString())); 228 | String allName = qualifiedName.toString().replace("hive.", ""); 229 | if (allName.contains(Constants.POINT)) { 230 | allName += Constants.POINT + "*"; 231 | } 232 | inputTables.add(buildTableInfo(allName, OperatorType.READ)); 233 | 234 | } else { 235 | throw new SqlParseException("sorry,only support read statement,unSupport statement:" + statement.getClass().getName()); 236 | } 237 | } 238 | 239 | 240 | @Override 241 | protected Tuple3, HashSet, HashSet> parseInternal(String sqlText) throws SqlParseException { 242 | this.inputTables = new HashSet<>(); 243 | this.outputTables = new HashSet<>(); 244 | this.tempTables = new HashSet<>(); 245 | try { 246 | check(new SqlParser().createStatement(sqlText, new ParsingOptions(ParsingOptions.DecimalLiteralTreatment.AS_DECIMAL))); 247 | } catch (ParsingException e) { 248 | throw new SqlParseException("parse sql exception:" + e.getMessage(), e); 249 | } 250 | return new Tuple3<>(inputTables, outputTables, tempTables); 251 | } 252 | } 253 | -------------------------------------------------------------------------------- /parsex-core/src/main/java/com/sucx/core/SqlParse.java: -------------------------------------------------------------------------------- 1 | package com.sucx.core; 2 | 3 | import com.sucx.common.exceptions.SqlParseException; 4 | import com.sucx.common.model.Result; 5 | 6 | /** 7 | * desc: 8 | * 9 | * @author scx 10 | * @create 2020/02/29 11 | */ 12 | public interface SqlParse { 13 | 14 | 15 | /** 16 | * 血缘解析入口 17 | * 18 | * @param sqlText sql 19 | * @return Result 结果 20 | */ 21 | Result parse(String sqlText) throws SqlParseException; 22 | 23 | } 24 | -------------------------------------------------------------------------------- /parsex-core/src/main/java/com/sucx/core/SqlParseUtil.java: -------------------------------------------------------------------------------- 1 | package com.sucx.core; 2 | 3 | import com.sucx.common.enums.SqlEnum; 4 | import com.sucx.common.exceptions.SqlParseException; 5 | import com.sucx.common.model.Result; 6 | import com.sucx.common.model.TableInfo; 7 | import org.apache.commons.logging.Log; 8 | import org.apache.commons.logging.LogFactory; 9 | import scala.Tuple3; 10 | 11 | import java.util.HashSet; 12 | import java.util.Map; 13 | import java.util.Set; 14 | import java.util.concurrent.ConcurrentHashMap; 15 | 16 | /** 17 | * desc: 18 | * 19 | * @author scx 20 | * @create 2020/02/26 21 | */ 22 | public class SqlParseUtil { 23 | 24 | private static final Log LOG = LogFactory.getLog(SqlParseUtil.class); 25 | 26 | 27 | private static Map sqlParseMap = new ConcurrentHashMap<>(3); 28 | 29 | 30 | public static Result parsePrestoSql(String sqlText) throws SqlParseException { 31 | return parse(SqlEnum.PRESTO, sqlText); 32 | } 33 | 34 | public static Result parseHiveSql(String sqlText) throws SqlParseException { 35 | return parse(SqlEnum.HIVE, sqlText); 36 | } 37 | 38 | public static Result parseSparkSql(String sqlText) throws SqlParseException { 39 | return parse(SqlEnum.SPARK, sqlText); 40 | } 41 | 42 | /** 43 | * 解析sql入口 44 | * 45 | * @param sqlEnum sql类型 46 | * @param sqlText sql内容 47 | * @return Result 48 | * @throws SqlParseException 解析异常 49 | */ 50 | private static Result parse(SqlEnum sqlEnum, String sqlText) throws SqlParseException { 51 | Result result; 52 | switch (sqlEnum) { 53 | case SPARK: 54 | try { 55 | result = getSqlParse(sqlEnum).parse(sqlText); 56 | } catch (Exception e) { 57 | LOG.error("spark引擎解析异常,准备使用hive引擎解析:" + sqlText); 58 | try { 59 | result = getSqlParse(SqlEnum.HIVE).parse(sqlText); 60 | } catch (Exception e1) { 61 | throw new SqlParseException(e); 62 | } 63 | } 64 | return result; 65 | case HIVE: 66 | try { 67 | result = getSqlParse(sqlEnum).parse(sqlText); 68 | } catch (Exception e) { 69 | LOG.error("hive引擎解析异常,准备使用spark引擎解析:" + sqlText); 70 | try { 71 | result = getSqlParse(SqlEnum.SPARK).parse(sqlText); 72 | } catch (Exception e1) { 73 | throw new SqlParseException(e); 74 | } 75 | } 76 | return result; 77 | 78 | case PRESTO: 79 | result = getSqlParse(sqlEnum).parse(sqlText); 80 | return result; 81 | default: 82 | throw new IllegalArgumentException("not support sqlEnum type :" + sqlEnum.name()); 83 | 84 | } 85 | } 86 | 87 | 88 | private static SqlParse getSqlParse(SqlEnum sqlEnum) { 89 | SqlParse sqlParse = sqlParseMap.get(sqlEnum); 90 | if (sqlParse == null) { 91 | synchronized (SqlParseUtil.class) { 92 | sqlParse = sqlParseMap.get(sqlEnum); 93 | if (sqlParse == null) { 94 | switch (sqlEnum) { 95 | case PRESTO: 96 | sqlParse = new PrestoSqlParse(); 97 | break; 98 | case SPARK: 99 | sqlParse = new SparkSQLParse(); 100 | break; 101 | case HIVE: 102 | sqlParse = new HiveSQLParse(); 103 | break; 104 | default: 105 | throw new IllegalArgumentException("not support sqlEnum type :" + sqlEnum.name()); 106 | 107 | } 108 | sqlParseMap.put(sqlEnum, sqlParse); 109 | } 110 | } 111 | } 112 | return sqlParse; 113 | } 114 | 115 | public static void print(Tuple3, HashSet, HashSet> tuple3) { 116 | if (tuple3 == null) { 117 | return; 118 | } 119 | print(tuple3._2(), tuple3._2(), tuple3._3(), false); 120 | } 121 | 122 | public static void print(Result result) { 123 | if (result == null) { 124 | return; 125 | } 126 | print(result.getInputSets(), result.getOutputSets(), result.getTempSets(), result.isJoin()); 127 | 128 | } 129 | 130 | private static void print(Set inputTable, Set outputTable, Set tempTable, boolean join) { 131 | 132 | LOG.info("是否包含join:" + join); 133 | LOG.info("输入表有:"); 134 | for (TableInfo table : inputTable) { 135 | LOG.info(table); 136 | } 137 | 138 | LOG.info("输出表有:"); 139 | 140 | for (TableInfo table : outputTable) { 141 | LOG.info(table); 142 | } 143 | 144 | LOG.info("临时表:"); 145 | 146 | for (TableInfo table : tempTable) { 147 | LOG.info(table); 148 | } 149 | 150 | } 151 | 152 | } 153 | -------------------------------------------------------------------------------- /parsex-core/src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | %d{YYYY-MM-dd HH:mm:ss} [%t] %-5p %c{1}:%L - %msg%n 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /parsex-core/src/main/scala/com/sucx/core/SparkSQLParse.scala: -------------------------------------------------------------------------------- 1 | package com.sucx.core 2 | 3 | import java.util.{HashSet => JSet} 4 | 5 | import com.sucx.common.enums.OperatorType 6 | import com.sucx.common.model.TableInfo 7 | import org.apache.spark.sql.catalyst.TableIdentifier 8 | import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation 9 | import org.apache.spark.sql.catalyst.catalog.UnresolvedCatalogRelation 10 | import org.apache.spark.sql.catalyst.plans.logical._ 11 | import org.apache.spark.sql.execution.SparkSqlParser 12 | import org.apache.spark.sql.execution.command._ 13 | import org.apache.spark.sql.execution.datasources.{CreateTable, RefreshTable} 14 | import org.apache.spark.sql.internal.SQLConf 15 | 16 | import scala.collection.JavaConversions._ 17 | 18 | class SparkSQLParse extends AbstractSqlParse { 19 | 20 | 21 | private[this] def resolveLogicPlan(plan: LogicalPlan) = { 22 | val inputTables = new JSet[TableInfo]() 23 | val outputTables = new JSet[TableInfo]() 24 | val tmpTables = new JSet[TableInfo]() 25 | resolveLogic(plan, inputTables, outputTables, tmpTables) 26 | Tuple3(inputTables, outputTables, tmpTables) 27 | } 28 | 29 | 30 | /* def getColumnAuto(exps: Expression*): String = { 31 | getColumn(exps) 32 | } 33 | 34 | def getColumn(expSeq: Seq[Expression]): String = { 35 | val columns = new StringBuilder 36 | expSeq.foreach(exp => { 37 | columns.append(resolveExp(exp)).append(",") 38 | }) 39 | columns.toString() 40 | } 41 | 42 | private[this] def resolveExp(exp: Expression): String = { 43 | val column = "" 44 | exp match { 45 | case alias: Alias => 46 | return resolveExp(alias.child) 47 | case divide: Divide => 48 | return getColumnAuto(divide.left, divide.right) 49 | case cast: Cast => 50 | return resolveExp(cast.child) 51 | case unresolvedFun: UnresolvedFunction => 52 | return getColumn(unresolvedFun.children) 53 | case unresolvedAttribute: UnresolvedAttribute => 54 | return unresolvedAttribute.name 55 | case literal: Literal => 56 | print(literal.sql) 57 | case caseWhen: CaseWhen => 58 | return getColumn(caseWhen.children) 59 | case in: In => 60 | return getColumn(in.children) 61 | case equalTo: EqualTo => 62 | return getColumnAuto(equalTo.left, equalTo.right) 63 | case unresolvedAlias: UnresolvedAlias => 64 | return getColumnAuto(unresolvedAlias.child) 65 | case unresolvedStar: UnresolvedStar => 66 | return unresolvedStar.toString() 67 | case multiAlias: MultiAlias => 68 | return resolveExp(multiAlias.child) 69 | 70 | case _ => 71 | throw new SqlParseException("无法识别的exp:" + exp.getClass.getName) 72 | } 73 | column 74 | }*/ 75 | 76 | 77 | private[this] def resolveLogic(plan: LogicalPlan, inputTables: JSet[TableInfo], outputTables: JSet[TableInfo], tmpTables: JSet[TableInfo]): Unit = { 78 | plan match { 79 | 80 | case plan: Project => 81 | val project: Project = plan.asInstanceOf[Project] 82 | /* val columnsSet = new JSet[String]() 83 | project.projectList.foreach(exp => { 84 | columnsSet.add(resolveExp(exp)) 85 | }) 86 | 87 | columnsStack.push(columnsSet)*/ 88 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 89 | case plan: Union => 90 | val project: Union = plan.asInstanceOf[Union] 91 | for (child <- project.children) { 92 | resolveLogic(child, inputTables, outputTables, tmpTables) 93 | } 94 | case plan: Join => 95 | val project: Join = plan.asInstanceOf[Join] 96 | resolveLogic(project.left, inputTables, outputTables, tmpTables) 97 | resolveLogic(project.right, inputTables, outputTables, tmpTables) 98 | 99 | case plan: Aggregate => 100 | val project: Aggregate = plan.asInstanceOf[Aggregate] 101 | /* val columnsSet = new JSet[String]() 102 | project.aggregateExpressions.foreach(exp => { 103 | columnsSet.add(resolveExp(exp)) 104 | }) 105 | columnsStack.push(columnsSet)*/ 106 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 107 | 108 | case plan: Filter => 109 | val project: Filter = plan.asInstanceOf[Filter] 110 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 111 | 112 | case plan: Generate => 113 | val project: Generate = plan.asInstanceOf[Generate] 114 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 115 | 116 | case plan: RepartitionByExpression => 117 | val project: RepartitionByExpression = plan.asInstanceOf[RepartitionByExpression] 118 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 119 | 120 | case plan: SerializeFromObject => 121 | val project: SerializeFromObject = plan.asInstanceOf[SerializeFromObject] 122 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 123 | 124 | case plan: MapPartitions => 125 | val project: MapPartitions = plan.asInstanceOf[MapPartitions] 126 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 127 | 128 | case plan: DeserializeToObject => 129 | val project: DeserializeToObject = plan.asInstanceOf[DeserializeToObject] 130 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 131 | 132 | case plan: Repartition => 133 | val project: Repartition = plan.asInstanceOf[Repartition] 134 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 135 | 136 | case plan: Deduplicate => 137 | val project: Deduplicate = plan.asInstanceOf[Deduplicate] 138 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 139 | 140 | case plan: Window => 141 | val project: Window = plan.asInstanceOf[Window] 142 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 143 | 144 | case plan: MapElements => 145 | val project: MapElements = plan.asInstanceOf[MapElements] 146 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 147 | 148 | case plan: TypedFilter => 149 | val project: TypedFilter = plan.asInstanceOf[TypedFilter] 150 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 151 | 152 | case plan: Distinct => 153 | val project: Distinct = plan.asInstanceOf[Distinct] 154 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 155 | 156 | case plan: SubqueryAlias => 157 | val project: SubqueryAlias = plan.asInstanceOf[SubqueryAlias] 158 | val childInputTables = new JSet[TableInfo]() 159 | val childOutputTables = new JSet[TableInfo]() 160 | 161 | resolveLogic(project.child, childInputTables, childOutputTables, tmpTables) 162 | if (childInputTables.size() > 1) { 163 | for (table <- childInputTables) inputTables.add(table) 164 | } else if (childInputTables.size() == 1) { 165 | val tableInfo: TableInfo = childInputTables.iterator().next() 166 | tableAliaMap.put(project.alias, tableInfo.getDbName + "." + tableInfo.getName) 167 | inputTables.add(new TableInfo(tableInfo.getName, tableInfo.getDbName, tableInfo.getType, splitColumn(tableInfo.getColumns, tableAliaMap))) 168 | } 169 | tmpTables.add(buildTableInfo(project.alias, this.currentDb, OperatorType.READ)) 170 | 171 | case plan: UnresolvedCatalogRelation => 172 | val project: UnresolvedCatalogRelation = plan.asInstanceOf[UnresolvedCatalogRelation] 173 | val identifier: TableIdentifier = project.tableMeta.identifier 174 | inputTables.add(buildTableInfo(identifier.table, identifier.database.getOrElse(this.currentDb), OperatorType.READ)) 175 | 176 | case plan: UnresolvedRelation => 177 | val project: UnresolvedRelation = plan.asInstanceOf[UnresolvedRelation] 178 | inputTables.add(buildTableInfo(project.tableIdentifier.table, project.tableIdentifier.database.getOrElse(this.currentDb), OperatorType.READ)) 179 | 180 | case plan: InsertIntoTable => 181 | val project: InsertIntoTable = plan.asInstanceOf[InsertIntoTable] 182 | plan.table match { 183 | case relation: UnresolvedRelation => 184 | val table: TableIdentifier = relation.tableIdentifier 185 | outputTables.add(buildTableInfo(table.table, table.database.getOrElse(this.currentDb), OperatorType.WRITE)) 186 | case _ => 187 | throw new RuntimeException("无法解析的插入逻辑语法树:" + plan.table) 188 | } 189 | 190 | resolveLogic(project.query, inputTables, outputTables, tmpTables) 191 | 192 | case plan: CreateTable => 193 | val project: CreateTable = plan.asInstanceOf[CreateTable] 194 | if (project.query.isDefined) { 195 | resolveLogic(project.query.get, inputTables, outputTables, tmpTables) 196 | } 197 | val columnsSet = new JSet[String]() 198 | project.tableDesc.schema.fields.foreach(field => { 199 | columnsSet.add(field.name) 200 | }) 201 | columnsStack.push(columnsSet) 202 | val tableIdentifier: TableIdentifier = project.tableDesc.identifier 203 | outputTables.add(buildTableInfo(tableIdentifier.table, tableIdentifier.database.getOrElse(this.currentDb), OperatorType.CREATE)) 204 | 205 | case plan: GlobalLimit => 206 | val project: GlobalLimit = plan.asInstanceOf[GlobalLimit] 207 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 208 | 209 | case plan: LocalLimit => 210 | val project: LocalLimit = plan.asInstanceOf[LocalLimit] 211 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 212 | 213 | case plan: With => 214 | val project: With = plan.asInstanceOf[With] 215 | project.cteRelations.foreach(cte => { 216 | tmpTables.add(buildTableInfo(cte._1, "temp", OperatorType.READ)) 217 | resolveLogic(cte._2, inputTables, outputTables, tmpTables) 218 | }) 219 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 220 | 221 | case plan: Sort => 222 | val project: Sort = plan.asInstanceOf[Sort] 223 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 224 | 225 | case ignore: SetCommand => 226 | print(ignore.toString()) 227 | 228 | case ignore: AddJarCommand => 229 | print(ignore.toString()) 230 | case ignore: CreateFunctionCommand => 231 | print(ignore.toString()) 232 | 233 | case ignore: SetDatabaseCommand => 234 | print(ignore.toString()) 235 | 236 | case ignore: OneRowRelation => 237 | print(ignore.toString()) 238 | 239 | case ignore: DropFunctionCommand => 240 | print(ignore.toString()) 241 | case plan: AlterTableAddPartitionCommand => 242 | val project: AlterTableAddPartitionCommand = plan.asInstanceOf[AlterTableAddPartitionCommand] 243 | 244 | outputTables.add(buildTableInfo(project.tableName.table, project.tableName.database.getOrElse(this.currentDb), OperatorType.ALTER)) 245 | 246 | case plan: AlterTableDropPartitionCommand => 247 | val project: AlterTableDropPartitionCommand = plan.asInstanceOf[AlterTableDropPartitionCommand] 248 | outputTables.add(buildTableInfo(project.tableName.table, project.tableName.database.getOrElse(this.currentDb), OperatorType.ALTER)) 249 | 250 | case plan: AlterTableAddColumnsCommand => 251 | val project: AlterTableAddColumnsCommand = plan.asInstanceOf[AlterTableAddColumnsCommand] 252 | outputTables.add(buildTableInfo(project.table.table, project.table.database.getOrElse(this.currentDb), OperatorType.ALTER)) 253 | 254 | 255 | case plan: CreateTableLikeCommand => 256 | val project: CreateTableLikeCommand = plan.asInstanceOf[CreateTableLikeCommand] 257 | inputTables.add(buildTableInfo(project.sourceTable.table, project.sourceTable.database.getOrElse(this.currentDb), OperatorType.READ)) 258 | outputTables.add(buildTableInfo(project.targetTable.table, project.targetTable.database.getOrElse(this.currentDb), OperatorType.CREATE)) 259 | 260 | 261 | case plan: DropTableCommand => 262 | val project: DropTableCommand = plan.asInstanceOf[DropTableCommand] 263 | outputTables.add(buildTableInfo(project.tableName.table, project.tableName.database.getOrElse(this.currentDb), OperatorType.DROP)) 264 | 265 | 266 | case plan: AlterTableRecoverPartitionsCommand => 267 | val project: AlterTableRecoverPartitionsCommand = plan.asInstanceOf[AlterTableRecoverPartitionsCommand] 268 | outputTables.add(buildTableInfo(project.tableName.table, project.tableName.database.getOrElse(this.currentDb), OperatorType.ALTER)) 269 | case plan: GroupingSets => 270 | val project: GroupingSets = plan.asInstanceOf[GroupingSets] 271 | resolveLogic(project.child, inputTables, outputTables, tmpTables) 272 | 273 | case plan: CreateDatabaseCommand => 274 | val project: CreateDatabaseCommand = plan.asInstanceOf[CreateDatabaseCommand] 275 | inputTables.add(new TableInfo(project.databaseName, OperatorType.CREATE)) 276 | 277 | case plan: DropDatabaseCommand => 278 | val project: DropDatabaseCommand = plan.asInstanceOf[DropDatabaseCommand] 279 | inputTables.add(new TableInfo(project.databaseName, OperatorType.DROP)) 280 | 281 | case plan: AlterDatabasePropertiesCommand => 282 | val project: AlterDatabasePropertiesCommand = plan.asInstanceOf[AlterDatabasePropertiesCommand] 283 | inputTables.add(new TableInfo(project.databaseName, OperatorType.ALTER)) 284 | 285 | case plan: ShowCreateTableCommand => 286 | val project: ShowCreateTableCommand = plan.asInstanceOf[ShowCreateTableCommand] 287 | outputTables.add(buildTableInfo(project.table.table, project.table.database.getOrElse(this.currentDb), OperatorType.READ)) 288 | 289 | case plan: RefreshTable => 290 | val project: RefreshTable = plan.asInstanceOf[RefreshTable] 291 | inputTables.add(buildTableInfo(project.tableIdent.table, project.tableIdent.database.getOrElse(this.currentDb), OperatorType.READ)) 292 | 293 | case `plan` => { 294 | throw new RuntimeException("******child plan******:\n" + plan.getClass.getName + "\n" + plan) 295 | } 296 | } 297 | } 298 | 299 | override protected def parseInternal(sqlText: String): (JSet[TableInfo], JSet[TableInfo], JSet[TableInfo]) = { 300 | val parser = new SparkSqlParser(new SQLConf) 301 | 302 | val logicalPlan: LogicalPlan = parser.parsePlan(sqlText) 303 | logicalPlan match { 304 | case command: SetDatabaseCommand => 305 | this.currentDb = command.databaseName 306 | return Tuple3(new JSet[TableInfo](0), new JSet[TableInfo](0), new JSet[TableInfo](0)) 307 | case _ => 308 | } 309 | this.resolveLogicPlan(logicalPlan) 310 | } 311 | } 312 | -------------------------------------------------------------------------------- /parsex-core/src/test/java/com/sucx/core/parse/presto/SqlBaseParserTest.java: -------------------------------------------------------------------------------- 1 | package com.sucx.core.parse.presto; 2 | 3 | import org.junit.Test; 4 | 5 | public class SqlBaseParserTest { 6 | 7 | 8 | @Test 9 | public void test1() { 10 | 11 | 12 | 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /parsex-core/src/test/java/org/apache/spark/sql/catalyst/expressions/parse.java: -------------------------------------------------------------------------------- 1 | package org.apache.spark.sql.catalyst.expressions; 2 | 3 | import com.sucx.core.SparkSQLParse; 4 | import com.sucx.core.SqlParse; 5 | import com.sucx.common.exceptions.SqlParseException; 6 | import com.sucx.common.model.Result; 7 | import com.sucx.core.SqlParseUtil; 8 | import org.apache.hadoop.conf.Configuration; 9 | import org.apache.hadoop.fs.FileSystem; 10 | import org.apache.hadoop.fs.Path; 11 | import org.apache.hadoop.io.IOUtils; 12 | import org.junit.Assert; 13 | import org.junit.Test; 14 | 15 | import java.io.BufferedReader; 16 | import java.io.IOException; 17 | import java.io.InputStreamReader; 18 | import java.util.Arrays; 19 | 20 | /** 21 | * desc: 22 | * 23 | * @author scx 24 | * @create 2020/02/24 25 | */ 26 | public class parse { 27 | 28 | String sql = ""; 29 | 30 | @Test 31 | public void parse() throws SqlParseException { 32 | 33 | SqlParse sqlParse = new SparkSQLParse(); 34 | 35 | Result tuple3 = sqlParse.parse(sql); 36 | 37 | SqlParseUtil.print(tuple3); 38 | 39 | } 40 | 41 | 42 | @Test 43 | public void splitSql() throws IOException { 44 | // deal(sql); 45 | 46 | readFile("/Users/scx/Desktop/test.hive"); 47 | } 48 | 49 | public void readFile(String fileName) throws IOException { 50 | Configuration conf = new Configuration(); 51 | Path path = new Path(fileName); 52 | FileSystem fs; 53 | if (!path.toUri().isAbsolute()) { 54 | fs = FileSystem.getLocal(conf); 55 | path = fs.makeQualified(path); 56 | } else { 57 | fs = FileSystem.get(path.toUri(), conf); 58 | } 59 | BufferedReader bufferReader = null; 60 | int rc = 0; 61 | try { 62 | bufferReader = new BufferedReader(new InputStreamReader(fs.open(path))); 63 | processReader(bufferReader); 64 | } finally { 65 | IOUtils.closeStream(bufferReader); 66 | } 67 | } 68 | 69 | public void processReader(BufferedReader r) throws IOException { 70 | String line; 71 | StringBuilder qsb = new StringBuilder(); 72 | 73 | while ((line = r.readLine()) != null) { 74 | // Skipping through comments 75 | if (!line.startsWith("--")) { 76 | qsb.append(line + "\n"); 77 | } 78 | } 79 | deal(qsb.toString()); 80 | } 81 | 82 | public void deal(String line) { 83 | 84 | String command = ""; 85 | for (String oneCmd : line.split(";")) { 86 | if (org.apache.commons.lang.StringUtils.endsWith(oneCmd, "\\")) { 87 | command += org.apache.commons.lang.StringUtils.chop(oneCmd) + ";"; 88 | continue; 89 | } else { 90 | command += oneCmd; 91 | } 92 | if (org.apache.commons.lang.StringUtils.isBlank(command)) { 93 | continue; 94 | } 95 | System.out.println(command); 96 | System.out.println("======="); 97 | command = ""; 98 | } 99 | } 100 | 101 | 102 | @Test 103 | public void split() { 104 | String sql = "abc; abcd;\n absc;\tabcde ';'; abcde\';\";"; 105 | 106 | String[] split = sql.split("[\\s]*(?!'|\");(?!'|\")[\\s]*"); 107 | for (String s : split) { 108 | System.out.println(s); 109 | } 110 | Assert.assertEquals(5, split.length); 111 | 112 | sql = "--我是注释 \n -------注释来了 \n -- 我也是注释\n select * from table 1 \n 哈哈--我的注释在后面"; 113 | 114 | 115 | split = sql.split("\n"); 116 | 117 | Arrays.stream(split).forEach(s -> { 118 | 119 | // System.out.println(s); 120 | s = s.replaceAll("\\s*-+.*", ""); 121 | System.out.println(s); 122 | 123 | 124 | }); 125 | 126 | 127 | } 128 | 129 | @Test 130 | public void lineSplit() { 131 | 132 | String regex = "\\s*-+.*\n$"; 133 | String blank = ""; 134 | Assert.assertEquals("--\n".replaceAll(regex, blank), blank); 135 | Assert.assertEquals("abc--\n".replaceAll(regex, blank), "abc"); 136 | Assert.assertEquals("---注释\n".replaceAll(regex, blank), blank); 137 | Assert.assertEquals(" \t -----注释\n".replaceAll(regex, blank), blank); 138 | 139 | 140 | Assert.assertEquals("SELECT /*+ REPARTITION(1) */ md5".replaceAll("/\\*.*\\*/", blank), "SELECT md5"); 141 | Assert.assertEquals("SELECT /****/ md5".replaceAll("/\\*.*\\*/", blank), "SELECT md5"); 142 | Assert.assertEquals("SELECT /**/ md5".replaceAll("/\\*.*\\*/", blank), "SELECT md5"); 143 | System.out.println("; \n".matches(";[ ]*\n+")); 144 | 145 | } 146 | 147 | 148 | @Test 149 | public void prestoTest() { 150 | 151 | 152 | } 153 | 154 | } 155 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | pom 6 | 7 | parsex-client 8 | parsex-core 9 | parsex-common 10 | 11 | 12 | org.springframework.boot 13 | spring-boot-starter-parent 14 | 2.2.4.RELEASE 15 | 16 | 17 | com.sucx.bigdata 18 | parsex 19 | 0.0.1-SNAPSHOT 20 | parsex 21 | Demo project for Spring Boot 22 | 23 | 24 | 1.8 25 | 4.8 26 | 4.11 27 | 1.7.16 28 | 1.2.17 29 | 2.4.0 30 | 2.11 31 | 32 | 33 | 34 | 35 | 36 | 37 | junit 38 | junit 39 | ${test.version} 40 | test 41 | 42 | 43 | 44 | com.google.guava 45 | guava 46 | 26.0-jre 47 | 48 | 49 | com.facebook.presto 50 | presto-parser 51 | 0.215 52 | 53 | 54 | org.jooq 55 | joor-java-8 56 | 0.9.12 57 | 58 | 59 | org.antlr 60 | antlr4 61 | ${antlr4.version} 62 | 63 | 64 | org.apache.hive 65 | hive-exec 66 | 2.3.4 67 | 68 | 69 | 70 | org.springframework.boot 71 | spring-boot-starter-web 72 | 73 | 74 | 75 | com.alibaba 76 | fastjson 77 | 1.2.70 78 | 79 | 80 | 81 | org.springframework.boot 82 | spring-boot-starter-test 83 | test 84 | 85 | 86 | org.junit.vintage 87 | junit-vintage-engine 88 | 89 | 90 | 91 | 92 | org.apache.httpcomponents 93 | httpclient 94 | 4.5.3 95 | 96 | 97 | 98 | org.slf4j 99 | slf4j-api 100 | ${slf4j.version} 101 | ${hadoop.deps.scope} 102 | 103 | 104 | org.slf4j 105 | slf4j-log4j12 106 | ${slf4j.version} 107 | ${hadoop.deps.scope} 108 | 109 | 110 | org.json4s 111 | json4s-jackson_${scala.binary.version} 112 | 3.5.3 113 | 114 | 115 | com.fasterxml.jackson.core 116 | * 117 | 118 | 119 | 120 | 121 | org.apache.spark 122 | spark-core_${scala.binary.version} 123 | ${spark.version} 124 | 125 | 126 | 127 | org.apache.spark 128 | spark-sql_${scala.binary.version} 129 | ${spark.version} 130 | 131 | 132 | 133 | org.apache.spark 134 | spark-catalyst_${scala.binary.version} 135 | ${spark.version} 136 | 137 | 138 | 139 | org.slf4j 140 | jul-to-slf4j 141 | ${slf4j.version} 142 | 143 | 144 | org.slf4j 145 | jcl-over-slf4j 146 | ${slf4j.version} 147 | 148 | 149 | 150 | log4j 151 | log4j 152 | ${log4j.version} 153 | 154 | 155 | org.scala-lang.modules 156 | scala-parser-combinators_${scala.binary.version} 157 | 1.1.0 158 | 159 | 160 | org.scala-lang 161 | scala-library 162 | 2.11.8 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | 主要是一个sql解析的小工具 2 | 可以搭配[分布式任务调度系统Hera](https://github.com/scxwhite/hera)或者其它调度使用。解析hive、spark sql的输入、输出表。达到自动依赖任务的目的 3 | 直接使用[SqlParseUtil](https://github.com/scxwhite/parseX/blob/master/parsex-core/src/main/java/com/sucx/core/SqlParseUtil.java) 类中的静态方法调用 4 | --------------------------------------------------------------------------------