├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── bin ├── activator └── activator.bat ├── build.sbt ├── clickhouse_files └── config.xml ├── docker_files └── docker_start.sh ├── libexec └── activator-launch-1.3.12.jar ├── project ├── build.properties └── plugins.sbt └── src ├── main └── scala │ └── io │ └── clickhouse │ └── ext │ ├── ClickhouseClient.scala │ ├── ClickhouseConnectionFactory.scala │ ├── ClickhouseResultSetExt.scala │ ├── ClusterResultSet.scala │ ├── Utils.scala │ └── spark │ └── DataFrameExt.scala └── test └── scala ├── DFExtSpec.scala └── UtilsSpec.scala /.gitignore: -------------------------------------------------------------------------------- 1 | # SBT 2 | boot/ 3 | lib_managed/ 4 | src_managed/ 5 | test-output/ 6 | target/ 7 | .history 8 | 9 | # IntelliJ 10 | .idea/ 11 | *.iml 12 | *.ipr 13 | *.iws 14 | out/ 15 | 16 | # Eclipse 17 | .cache 18 | .classpath 19 | .project 20 | .scala_dependencies 21 | .settings 22 | .target/ 23 | 24 | # Mac 25 | .DS_Store 26 | 27 | # Other 28 | *.pyc 29 | *.swp 30 | sync.sh 31 | 32 | logs/ 33 | metastore_db/ 34 | *.log -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM p7hb/docker-spark:2.1.0 2 | 3 | ARG CLICKHOUSE_LOGS=/var/log/clickhouse-server 4 | 5 | ENV APP_DIR /app 6 | 7 | RUN mkdir -p /etc/apt/sources.list.d && \ 8 | apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 && \ 9 | echo "deb http://repo.yandex.ru/clickhouse/trusty stable main" | tee /etc/apt/sources.list.d/clickhouse.list && \ 10 | apt-get -y update && \ 11 | apt-get -y install clickhouse-server-common clickhouse-client && \ 12 | mkdir -p ${CLICKHOUSE_LOGS} && \ 13 | touch ${CLICKHOUSE_LOGS}/tmp 14 | 15 | ADD docker_files/docker_start.sh /docker_start.sh 16 | 17 | RUN mkdir -p ${APP_DIR} 18 | WORKDIR ${APP_DIR} 19 | 20 | # clickhouse config with cluster def 21 | COPY /clickhouse_files/config.xml /etc/clickhouse-server/ 22 | 23 | COPY /target/pack/lib/clickhouse* ${APP_DIR}/lib/ 24 | COPY /target/pack/lib/guava* ${APP_DIR}/lib/ 25 | COPY Makefile ${APP_DIR} 26 | 27 | ENTRYPOINT ["/docker_start.sh"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2014 Typesafe, Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | # docker images params 3 | REPO=dmitryb/clickhouse-spark-connector 4 | TAG=0.0.1 5 | 6 | build: 7 | sbt compile 8 | sbt pack 9 | 10 | pack: 11 | sbt pack-archive 12 | 13 | run: 14 | env JAVA_OPTS="-Xmx4g -Xms4g -server -XX:+UseParallelGC -XX:NewRatio=1" \ 15 | ./target/pack/bin/main --conf 16 | 17 | start-activator: 18 | ./bin/activator ui -Dhttp.address=0.0.0.0 -Dhttp.port=8088 19 | 20 | docker-build: 21 | docker build -t $(REPO):$(TAG) . 22 | 23 | docker-push: 24 | docker push $(REPO):$(TAG) 25 | 26 | docker-clean: 27 | docker rm $(docker ps -a -q) 28 | docker rmi $(docker images | grep "dmitryb/clickhouse-spark-connector" | awk "{print $3}") 29 | 30 | # to create fat jar (not used) 31 | assembly: 32 | sbt assembly 33 | 34 | dev-local: 35 | #sbt clean compile 36 | #sbt pack 37 | mkdir -p target/l 38 | cp -f target/pack/lib/clickhouse* target/l/ 39 | cp -f target/pack/lib/guava* target/l/ 40 | 41 | clickhouse-server-start: 42 | docker run -it -d --name clickhouse-server -p 8123:8123 -v `pwd`/clickhouse_files/config.xml:/etc/clickhouse-server/config.xml yandex/clickhouse-server -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | clickhouse spark connector 2 | ========================== 3 | 4 | > connector #spark DataFrame -> Yandex #ClickHouse table 5 | 6 | Example 7 | ``` scala 8 | 9 | import io.clickhouse.ext.ClickhouseConnectionFactory 10 | import io.clickhouse.ext.spark.ClickhouseSparkExt._ 11 | import org.apache.spark.sql.SparkSession 12 | 13 | // spark config 14 | val sparkSession = SparkSession.builder 15 | .master("local") 16 | .appName("local spark") 17 | .getOrCreate() 18 | 19 | val sc = sparkSession.sparkContext 20 | val sqlContext = sparkSession.sqlContext 21 | 22 | // create test DF 23 | case class Row1(name: String, v: Int, v2: Int) 24 | val df = sqlContext.createDataFrame(1 to 1000 map(i => Row1(s"$i", i, i + 10)) ) 25 | 26 | // clickhouse params 27 | 28 | // any node 29 | val anyHost = "localhost" 30 | val db = "tmp1" 31 | val tableName = "t1" 32 | // cluster configuration must be defined in config.xml (clickhouse config) 33 | val clusterName = Some("perftest_1shards_1replicas"): Option[String] 34 | 35 | // define clickhouse datasource 36 | implicit val clickhouseDataSource = ClickhouseConnectionFactory.get(anyHost) 37 | 38 | // create db / table 39 | //df.dropClickhouseDb(db, clusterName) 40 | df.createClickhouseDb(db, clusterName) 41 | df.createClickhouseTable(db, tableName, "mock_date", Seq("name"), clusterNameO = clusterName) 42 | 43 | // save DF to clickhouse table 44 | val res = df.saveToClickhouse("tmp1", "t1", (row) => java.sql.Date.valueOf("2000-12-01"), "mock_date", clusterNameO = clusterName) 45 | assert(res.size == 1) 46 | assert(res.get("localhost") == Some(df.count())) 47 | 48 | ``` 49 | 50 | Docker image 51 | [Docker](https://hub.docker.com/r/dmitryb/clickhouse-spark-connector/) -------------------------------------------------------------------------------- /bin/activator: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ### ------------------------------- ### 4 | ### Helper methods for BASH scripts ### 5 | ### ------------------------------- ### 6 | 7 | realpath () { 8 | ( 9 | TARGET_FILE="$1" 10 | FIX_CYGPATH="$2" 11 | 12 | cd "$(dirname "$TARGET_FILE")" 13 | TARGET_FILE=$(basename "$TARGET_FILE") 14 | 15 | COUNT=0 16 | while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ] 17 | do 18 | TARGET_FILE=$(readlink "$TARGET_FILE") 19 | cd "$(dirname "$TARGET_FILE")" 20 | TARGET_FILE=$(basename "$TARGET_FILE") 21 | COUNT=$(($COUNT + 1)) 22 | done 23 | 24 | # make sure we grab the actual windows path, instead of cygwin's path. 25 | if [[ "x$FIX_CYGPATH" != "x" ]]; then 26 | echo "$(cygwinpath "$(pwd -P)/$TARGET_FILE")" 27 | else 28 | echo "$(pwd -P)/$TARGET_FILE" 29 | fi 30 | ) 31 | } 32 | 33 | 34 | # Uses uname to detect if we're in the odd cygwin environment. 35 | is_cygwin() { 36 | local os=$(uname -s) 37 | case "$os" in 38 | CYGWIN*) return 0 ;; 39 | *) return 1 ;; 40 | esac 41 | } 42 | 43 | # TODO - Use nicer bash-isms here. 44 | CYGWIN_FLAG=$(if is_cygwin; then echo true; else echo false; fi) 45 | 46 | 47 | # This can fix cygwin style /cygdrive paths so we get the 48 | # windows style paths. 49 | cygwinpath() { 50 | local file="$1" 51 | if [[ "$CYGWIN_FLAG" == "true" ]]; then 52 | echo $(cygpath -w $file) 53 | else 54 | echo $file 55 | fi 56 | } 57 | 58 | # Make something URI friendly 59 | make_url() { 60 | url="$1" 61 | local nospaces=${url// /%20} 62 | if is_cygwin; then 63 | echo "/${nospaces//\\//}" 64 | else 65 | echo "$nospaces" 66 | fi 67 | } 68 | 69 | declare -a residual_args 70 | declare -a java_args 71 | declare -a scalac_args 72 | declare -a sbt_commands 73 | declare java_cmd=java 74 | declare java_version 75 | declare -r real_script_path="$(realpath "$0")" 76 | declare -r sbt_home="$(realpath "$(dirname "$(dirname "$real_script_path")")")" 77 | declare -r sbt_bin_dir="$(dirname "$real_script_path")" 78 | declare -r app_version="1.3.12" 79 | 80 | declare -r script_name=activator 81 | declare -r java_opts=( "${ACTIVATOR_OPTS[@]}" "${SBT_OPTS[@]}" "${JAVA_OPTS[@]}" "${java_opts[@]}" ) 82 | userhome="$HOME" 83 | if is_cygwin; then 84 | # cygwin sets home to something f-d up, set to real windows homedir 85 | userhome="$USERPROFILE" 86 | fi 87 | declare -r activator_user_home_dir="${userhome}/.activator" 88 | declare -r java_opts_config_home="${activator_user_home_dir}/activatorconfig.txt" 89 | declare -r java_opts_config_version="${activator_user_home_dir}/${app_version}/activatorconfig.txt" 90 | 91 | echoerr () { 92 | echo 1>&2 "$@" 93 | } 94 | vlog () { 95 | [[ $verbose || $debug ]] && echoerr "$@" 96 | } 97 | dlog () { 98 | [[ $debug ]] && echoerr "$@" 99 | } 100 | 101 | jar_file () { 102 | echo "$(cygwinpath "${sbt_home}/libexec/activator-launch-${app_version}.jar")" 103 | } 104 | 105 | acquire_sbt_jar () { 106 | sbt_jar="$(jar_file)" 107 | 108 | if [[ ! -f "$sbt_jar" ]]; then 109 | echoerr "Could not find launcher jar: $sbt_jar" 110 | exit 2 111 | fi 112 | } 113 | 114 | execRunner () { 115 | # print the arguments one to a line, quoting any containing spaces 116 | [[ $verbose || $debug ]] && echo "# Executing command line:" && { 117 | for arg; do 118 | if printf "%s\n" "$arg" | grep -q ' '; then 119 | printf "\"%s\"\n" "$arg" 120 | else 121 | printf "%s\n" "$arg" 122 | fi 123 | done 124 | echo "" 125 | } 126 | 127 | # THis used to be exec, but we loose the ability to re-hook stty then 128 | # for cygwin... Maybe we should flag the feature here... 129 | "$@" 130 | } 131 | 132 | addJava () { 133 | dlog "[addJava] arg = '$1'" 134 | java_args=( "${java_args[@]}" "$1" ) 135 | } 136 | addSbt () { 137 | dlog "[addSbt] arg = '$1'" 138 | sbt_commands=( "${sbt_commands[@]}" "$1" ) 139 | } 140 | addResidual () { 141 | dlog "[residual] arg = '$1'" 142 | residual_args=( "${residual_args[@]}" "$1" ) 143 | } 144 | addDebugger () { 145 | addJava "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$1" 146 | } 147 | addConfigOpts () { 148 | dlog "[addConfigOpts] arg = '$*'" 149 | for item in $* 150 | do 151 | addJava "$item" 152 | done 153 | } 154 | 155 | get_mem_opts () { 156 | # if we detect any of these settings in ${JAVA_OPTS} we need to NOT output our settings. 157 | # The reason is the Xms/Xmx, if they don't line up, cause errors. 158 | if [[ "${JAVA_OPTS}" == *-Xmx* ]] || [[ "${JAVA_OPTS}" == *-Xms* ]] || [[ "${JAVA_OPTS}" == *-XX:MaxPermSize* ]] || [[ "${JAVA_OPTS}" == *-XX:MaxMetaspaceSize* ]] || [[ "${JAVA_OPTS}" == *-XX:ReservedCodeCacheSize* ]]; then 159 | echo "" 160 | else 161 | # a ham-fisted attempt to move some memory settings in concert 162 | # so they need not be messed around with individually. 163 | local mem=${1:-1024} 164 | local codecache=$(( $mem / 8 )) 165 | (( $codecache > 128 )) || codecache=128 166 | (( $codecache < 512 )) || codecache=512 167 | local class_metadata_size=$(( $codecache * 2 )) 168 | local class_metadata_opt=$([[ "$java_version" < "1.8" ]] && echo "MaxPermSize" || echo "MaxMetaspaceSize") 169 | 170 | echo "-Xms${mem}m -Xmx${mem}m -XX:ReservedCodeCacheSize=${codecache}m -XX:${class_metadata_opt}=${class_metadata_size}m" 171 | fi 172 | } 173 | 174 | require_arg () { 175 | local type="$1" 176 | local opt="$2" 177 | local arg="$3" 178 | if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then 179 | echo "$opt requires <$type> argument" 180 | exit 1 181 | fi 182 | } 183 | 184 | is_function_defined() { 185 | declare -f "$1" > /dev/null 186 | } 187 | 188 | # If we're *not* running in a terminal, and we don't have any arguments, then we need to add the 'ui' parameter 189 | detect_terminal_for_ui() { 190 | [[ ! -t 0 ]] && [[ "${#residual_args}" == "0" ]] && { 191 | addResidual "ui" 192 | } 193 | # SPECIAL TEST FOR MAC 194 | [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]] && [[ "${#residual_args}" == "0" ]] && { 195 | echo "Detected MAC OSX launched script...." 196 | echo "Swapping to UI" 197 | addResidual "ui" 198 | } 199 | } 200 | 201 | process_args () { 202 | while [[ $# -gt 0 ]]; do 203 | case "$1" in 204 | -h|-help) usage; exit 1 ;; 205 | -v|-verbose) verbose=1 && shift ;; 206 | -d|-debug) debug=1 && shift ;; 207 | 208 | -ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;; 209 | -mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;; 210 | -jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;; 211 | -batch) exec &1 | awk -F '"' '/version/ {print $2}') 230 | vlog "[process_args] java_version = '$java_version'" 231 | } 232 | 233 | # Detect that we have java installed. 234 | checkJava() { 235 | local required_version="$1" 236 | # Now check to see if it's a good enough version 237 | if [[ "$java_version" == "" ]]; then 238 | echo 239 | echo No java installations was detected. 240 | echo Please go to http://www.java.com/getjava/ and download 241 | echo 242 | exit 1 243 | elif [[ ! "$java_version" > "$required_version" ]]; then 244 | echo 245 | echo The java installation you have is not up to date 246 | echo $script_name requires at least version $required_version+, you have 247 | echo version $java_version 248 | echo 249 | echo Please go to http://www.java.com/getjava/ and download 250 | echo a valid Java Runtime and install before running $script_name. 251 | echo 252 | exit 1 253 | fi 254 | } 255 | 256 | 257 | run() { 258 | # no jar? download it. 259 | [[ -f "$sbt_jar" ]] || acquire_sbt_jar "$sbt_version" || { 260 | # still no jar? uh-oh. 261 | echo "Download failed. Obtain the sbt-launch.jar manually and place it at $sbt_jar" 262 | exit 1 263 | } 264 | 265 | # process the combined args, then reset "$@" to the residuals 266 | process_args "$@" 267 | detect_terminal_for_ui 268 | set -- "${residual_args[@]}" 269 | argumentCount=$# 270 | 271 | # TODO - java check should be configurable... 272 | checkJava "1.6" 273 | 274 | #If we're in cygwin, we should use the windows config, and terminal hacks 275 | if [[ "$CYGWIN_FLAG" == "true" ]]; then 276 | stty -icanon min 1 -echo > /dev/null 2>&1 277 | addJava "-Djline.terminal=jline.UnixTerminal" 278 | addJava "-Dsbt.cygwin=true" 279 | fi 280 | 281 | # run sbt 282 | execRunner "$java_cmd" \ 283 | "-Dactivator.home=$(make_url "$sbt_home")" \ 284 | ${SBT_OPTS:-$default_sbt_opts} \ 285 | $(get_mem_opts $sbt_mem) \ 286 | ${JAVA_OPTS} \ 287 | ${java_args[@]} \ 288 | -jar "$sbt_jar" \ 289 | "${sbt_commands[@]}" \ 290 | "${residual_args[@]}" 291 | 292 | exit_code=$? 293 | 294 | # Clean up the terminal from cygwin hacks. 295 | if [[ "$CYGWIN_FLAG" == "true" ]]; then 296 | stty icanon echo > /dev/null 2>&1 297 | fi 298 | exit $exit_code 299 | } 300 | 301 | 302 | declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy" 303 | declare -r sbt_opts_file=".sbtopts" 304 | declare -r etc_sbt_opts_file="${sbt_home}/conf/sbtopts" 305 | declare -r win_sbt_opts_file="${sbt_home}/conf/sbtconfig.txt" 306 | 307 | usage() { 308 | cat < path to global settings/plugins directory (default: ~/.sbt) 323 | -sbt-boot path to shared boot directory (default: ~/.sbt/boot in 0.11 series) 324 | -ivy path to local Ivy repository (default: ~/.ivy2) 325 | -mem set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem)) 326 | -no-share use all local caches; no sharing 327 | -no-global uses global caches, but does not use global ~/.sbt directory. 328 | -jvm-debug Turn on JVM debugging, open at the given port. 329 | -batch Disable interactive mode 330 | 331 | # sbt version (default: from project/build.properties if present, else latest release) 332 | -sbt-version use the specified version of sbt 333 | -sbt-jar use the specified jar as the sbt launcher 334 | -sbt-rc use an RC version of sbt 335 | -sbt-snapshot use a snapshot version of sbt 336 | 337 | # java version (default: java from PATH, currently $(java -version 2>&1 | grep version)) 338 | -java-home alternate JAVA_HOME 339 | 340 | # jvm options and output control 341 | JAVA_OPTS environment variable, if unset uses "$java_opts" 342 | SBT_OPTS environment variable, if unset uses "$default_sbt_opts" 343 | ACTIVATOR_OPTS Environment variable, if unset uses "" 344 | .sbtopts if this file exists in the current directory, it is 345 | prepended to the runner args 346 | /etc/sbt/sbtopts if this file exists, it is prepended to the runner args 347 | -Dkey=val pass -Dkey=val directly to the java runtime 348 | -J-X pass option -X directly to the java runtime 349 | (-J is stripped) 350 | -S-X add -X to sbt's scalacOptions (-S is stripped) 351 | 352 | In the case of duplicated or conflicting options, the order above 353 | shows precedence: JAVA_OPTS lowest, command line options highest. 354 | EOM 355 | } 356 | 357 | 358 | 359 | process_my_args () { 360 | while [[ $# -gt 0 ]]; do 361 | case "$1" in 362 | -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;; 363 | -no-share) addJava "$noshare_opts" && shift ;; 364 | -no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;; 365 | -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;; 366 | -sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;; 367 | -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;; 368 | -batch) exec ^&1') do ( 109 | if %%~j==java set JAVAINSTALLED=1 110 | if %%~j==openjdk set JAVAINSTALLED=1 111 | ) 112 | 113 | rem Detect the same thing about javac 114 | if "%_JAVACCMD%"=="" ( 115 | if not "%JAVA_HOME%"=="" ( 116 | if exist "%JAVA_HOME%\bin\javac.exe" set "_JAVACCMD=%JAVA_HOME%\bin\javac.exe" 117 | ) 118 | ) 119 | if "%_JAVACCMD%"=="" set _JAVACCMD=javac 120 | for /F %%j in ('"%_JAVACCMD%" -version 2^>^&1') do ( 121 | if %%~j==javac set JAVACINSTALLED=1 122 | ) 123 | 124 | rem BAT has no logical or, so we do it OLD SCHOOL! Oppan Redmond Style 125 | set JAVAOK=true 126 | if not defined JAVAINSTALLED set JAVAOK=false 127 | if not defined JAVACINSTALLED set JAVAOK=false 128 | 129 | if "%JAVAOK%"=="false" ( 130 | echo. 131 | echo A Java JDK is not installed or can't be found. 132 | if not "%JAVA_HOME%"=="" ( 133 | echo JAVA_HOME = "%JAVA_HOME%" 134 | ) 135 | echo. 136 | echo Please go to 137 | echo http://www.oracle.com/technetwork/java/javase/downloads/index.html 138 | echo and download a valid Java JDK and install before running Activator. 139 | echo. 140 | echo If you think this message is in error, please check 141 | echo your environment variables to see if "java.exe" and "javac.exe" are 142 | echo available via JAVA_HOME or PATH. 143 | echo. 144 | if defined DOUBLECLICKED pause 145 | exit /B 1 146 | ) 147 | 148 | rem Check what Java version is being used to determine what memory options to use 149 | for /f "tokens=3" %%g in ('java -version 2^>^&1 ^| findstr /i "version"') do ( 150 | set JAVA_VERSION=%%g 151 | ) 152 | 153 | rem Strips away the " characters 154 | set JAVA_VERSION=%JAVA_VERSION:"=% 155 | 156 | rem TODO Check if there are existing mem settings in JAVA_OPTS/CFG_OPTS and use those instead of the below 157 | for /f "delims=. tokens=1-3" %%v in ("%JAVA_VERSION%") do ( 158 | set MAJOR=%%v 159 | set MINOR=%%w 160 | set BUILD=%%x 161 | 162 | set META_SIZE=-XX:MetaspaceSize=64M -XX:MaxMetaspaceSize=256M 163 | if "!MINOR!" LSS "8" ( 164 | set META_SIZE=-XX:PermSize=64M -XX:MaxPermSize=256M 165 | ) 166 | 167 | set MEM_OPTS=!META_SIZE! 168 | ) 169 | 170 | rem We use the value of the JAVA_OPTS environment variable if defined, rather than the config. 171 | set _JAVA_OPTS=%JAVA_OPTS% 172 | if "%_JAVA_OPTS%"=="" set _JAVA_OPTS=%CFG_OPTS% 173 | 174 | set DEBUG_OPTS= 175 | 176 | rem Loop through the arguments, building remaining args in args variable 177 | set args= 178 | :argsloop 179 | if not "%~1"=="" ( 180 | rem Checks if the argument contains "-D" and if true, adds argument 1 with 2 and puts an equal sign between them. 181 | rem This is done since batch considers "=" to be a delimiter so we need to circumvent this behavior with a small hack. 182 | set arg1=%~1 183 | if "!arg1:~0,2!"=="-D" ( 184 | set "args=%args% "%~1"="%~2"" 185 | shift 186 | shift 187 | goto argsloop 188 | ) 189 | 190 | if "%~1"=="-jvm-debug" ( 191 | if not "%~2"=="" ( 192 | rem This piece of magic somehow checks that an argument is a number 193 | for /F "delims=0123456789" %%i in ("%~2") do ( 194 | set var="%%i" 195 | ) 196 | if defined var ( 197 | rem Not a number, assume no argument given and default to 9999 198 | set JPDA_PORT=9999 199 | ) else ( 200 | rem Port was given, shift arguments 201 | set JPDA_PORT=%~2 202 | shift 203 | ) 204 | ) else ( 205 | set JPDA_PORT=9999 206 | ) 207 | shift 208 | 209 | set DEBUG_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=!JPDA_PORT! 210 | goto argsloop 211 | ) 212 | rem else 213 | set "args=%args% "%~1"" 214 | shift 215 | goto argsloop 216 | ) 217 | 218 | :run 219 | 220 | if "!args!"=="" ( 221 | if defined DOUBLECLICKED ( 222 | set CMDS="ui" 223 | ) else set CMDS=!args! 224 | ) else set CMDS=!args! 225 | 226 | rem We add a / in front, so we get file:///C: instead of file://C: 227 | rem Java considers the later a UNC path. 228 | rem We also attempt a solid effort at making it URI friendly. 229 | rem We don't even bother with UNC paths. 230 | set JAVA_FRIENDLY_HOME_1=/!ACTIVATOR_HOME:\=/! 231 | set JAVA_FRIENDLY_HOME=/!JAVA_FRIENDLY_HOME_1: =%%20! 232 | 233 | rem Checks if the command contains spaces to know if it should be wrapped in quotes or not 234 | set NON_SPACED_CMD=%_JAVACMD: =% 235 | if "%_JAVACMD%"=="%NON_SPACED_CMD%" %_JAVACMD% %DEBUG_OPTS% %MEM_OPTS% %ACTIVATOR_OPTS% %SBT_OPTS% %_JAVA_OPTS% "-Dactivator.home=%JAVA_FRIENDLY_HOME%" -jar "%ACTIVATOR_HOME%\libexec\%ACTIVATOR_LAUNCH_JAR%" %CMDS% 236 | if NOT "%_JAVACMD%"=="%NON_SPACED_CMD%" "%_JAVACMD%" %DEBUG_OPTS% %MEM_OPTS% %ACTIVATOR_OPTS% %SBT_OPTS% %_JAVA_OPTS% "-Dactivator.home=%JAVA_FRIENDLY_HOME%" -jar "%ACTIVATOR_HOME%\libexec\%ACTIVATOR_LAUNCH_JAR%" %CMDS% 237 | 238 | if ERRORLEVEL 1 goto error 239 | goto end 240 | 241 | :error 242 | set ERROR_CODE=1 243 | 244 | :end 245 | 246 | @endlocal 247 | 248 | exit /B %ERROR_CODE% 249 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | 2 | name := """clickhouse-spark-connector""" 3 | 4 | version := "1.2" 5 | 6 | scalaVersion := "2.11.7" 7 | 8 | publishTo := Some("jFrog" at "http://10.2.95.5:8080/artifactory/libs-release") 9 | //credentials += Credentials("jFrog", "10.2.95.5", "admin", "password") 10 | 11 | libraryDependencies ++= Seq( 12 | "org.apache.spark" % "spark-core_2.11" % "2.0.0", 13 | "org.apache.spark" % "spark-sql_2.11" % "2.0.0", 14 | "ru.yandex.clickhouse" % "clickhouse-jdbc" % "0.1.14", 15 | "org.scalatest" %% "scalatest" % "2.2.4" % "test", 16 | "com.fasterxml.jackson.module" % "jackson-module-scala_2.11" % "2.7.4" 17 | ) 18 | 19 | fork in run := true 20 | 21 | test in assembly := {} 22 | 23 | assemblyMergeStrategy in assembly := { 24 | case n if n.startsWith("META-INF/MANIFEST.MF") => MergeStrategy.discard 25 | case "reference.conf" => MergeStrategy.concat 26 | case x => MergeStrategy.first 27 | } 28 | 29 | packAutoSettings -------------------------------------------------------------------------------- /clickhouse_files/config.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | trace 5 | /var/log/clickhouse-server/clickhouse-server.log 6 | /var/log/clickhouse-server/clickhouse-server.err.log 7 | 1000M 8 | 10 9 | 10 | 11 | 12 | 8123 13 | 9000 14 | 15 | 16 | 9009 17 | 18 | 22 | 25 | 26 | 27 | :: 28 | 29 | 4096 30 | 3 31 | 32 | 33 | 100 34 | 35 | 37 | 38 | 39 | 44 | 8589934592 45 | 46 | 50 | 5368709120 51 | 52 | 53 | 54 | /opt/clickhouse/ 55 | 56 | 57 | /opt/clickhouse/tmp/ 58 | 59 | 60 | users.xml 61 | 62 | 63 | default 64 | 65 | 66 | default 67 | 68 | 71 | 73 | 74 | 75 | 76 | 77 | localhost 78 | 9000 79 | 80 | 81 | 82 | 83 | 84 | 88 | 89 | 94 | 95 | 96 | 101 | 102 | 103 | 104 | 105 | 3600 106 | 107 | 108 | 109 | false 110 | 111 | 119 | 120 | 121 | 122 | 123 | 127 | system 128 | query_log
129 | 130 | 131 | 7500 132 |
133 | 134 | 135 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 149 | *_dictionary.xml 150 | 151 | 152 | 155 | 156 | 168 | 169 | 170 | 171 | /clickhouse/task_queue 172 | 173 | 174 | 175 | 180 | 181 | 182 | 183 | 184 | click_cost 185 | any 186 | 187 | 0 188 | 3600 189 | 190 | 191 | 86400 192 | 60 193 | 194 | 195 | 196 | max 197 | 198 | 0 199 | 60 200 | 201 | 202 | 3600 203 | 300 204 | 205 | 206 | 86400 207 | 3600 208 | 209 | 210 | 211 |
212 | -------------------------------------------------------------------------------- /docker_files/docker_start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | service clickhouse-server start 4 | #exec "$@" 5 | tail -f /var/log/clickhouse-server/* -------------------------------------------------------------------------------- /libexec/activator-launch-1.3.12.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DmitryBe/spark-clickhouse/d8e546505dc937ad567a32d8ab74e226ce5179e9/libexec/activator-launch-1.3.12.jar -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | #Activator-generated Properties 2 | #Thu Jan 19 11:34:38 SGT 2017 3 | template.uuid=e17acfbb-1ff5-41f5-b8cf-2c40be6a8340 4 | sbt.version=0.13.8 5 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.xerial.sbt" % "sbt-pack" % "0.8.2") 2 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.3") -------------------------------------------------------------------------------- /src/main/scala/io/clickhouse/ext/ClickhouseClient.scala: -------------------------------------------------------------------------------- 1 | package io.clickhouse.ext 2 | 3 | import ru.yandex.clickhouse.ClickHouseDataSource 4 | import io.clickhouse.ext.Utils._ 5 | 6 | case class ClickhouseClient(clusterNameO: Option[String] = None) 7 | (implicit ds: ClickHouseDataSource){ 8 | 9 | import io.clickhouse.ext.ClickhouseResultSetExt._ 10 | 11 | def createDb(dbName: String){ 12 | query(s"create database if not exists $dbName") 13 | } 14 | 15 | def dropDb(dbName: String){ 16 | query(s"DROP DATABASE IF EXISTS $dbName") 17 | } 18 | 19 | def query(sql: String) = { 20 | using(ds.getConnection){ conn => 21 | val statement = conn.createStatement() 22 | val rs = statement.executeQuery(sql) 23 | rs 24 | } 25 | } 26 | 27 | def queryCluster(sql: String) = { 28 | val resultSet = runOnAllNodes(sql) 29 | ClusterResultSet(resultSet) 30 | } 31 | 32 | def createDbCluster(dbName: String) = { 33 | runOnAllNodes(s"create database if not exists $dbName") 34 | .count(x => x._2 == null) 35 | } 36 | 37 | def dropDbCluster(dbName: String) = { 38 | runOnAllNodes(s"DROP DATABASE IF EXISTS $dbName") 39 | .count(x => x._2 == null) 40 | } 41 | 42 | def getClusterNodes() = { 43 | val clusterName = isClusterNameProvided() 44 | using(ds.getConnection) { conn => 45 | val statement = conn.createStatement() 46 | val rs = statement.executeQuery(s"select host_name, host_address from system.clusters where cluster == '$clusterName'") 47 | val r = rs.map(x => x.getString("host_name")) 48 | require(r.nonEmpty, s"cluster $clusterNameO not found") 49 | r 50 | } 51 | } 52 | 53 | private def runOnAllNodes(sql: String) = { 54 | getClusterNodes().map{ nodeIp => 55 | val nodeDs = ClickhouseConnectionFactory.get(nodeIp) 56 | val client = ClickhouseClient()(nodeDs) 57 | (nodeIp, client.query(sql)) 58 | } 59 | } 60 | 61 | private def isClusterNameProvided() = { 62 | clusterNameO match { 63 | case None => throw new Exception("cluster name is requires") 64 | case Some(clusterName) => clusterName 65 | } 66 | } 67 | } -------------------------------------------------------------------------------- /src/main/scala/io/clickhouse/ext/ClickhouseConnectionFactory.scala: -------------------------------------------------------------------------------- 1 | package io.clickhouse.ext 2 | 3 | import java.util.Properties 4 | import ru.yandex.clickhouse.ClickHouseDataSource 5 | import ru.yandex.clickhouse.settings.ClickHouseProperties 6 | 7 | object ClickhouseConnectionFactory extends Serializable{ 8 | 9 | private val dataSources = scala.collection.mutable.Map[(String, Int), ClickHouseDataSource]() 10 | 11 | def get(host: String, port: Int = 8123): ClickHouseDataSource ={ 12 | dataSources.get((host, port)) match { 13 | case Some(ds) => 14 | ds 15 | case None => 16 | val ds = createDatasource(host, port = port) 17 | dataSources += ((host, port) -> ds) 18 | ds 19 | } 20 | } 21 | 22 | private def createDatasource(host: String, dbO: Option[String] = None, port: Int = 8123) = { 23 | val props = new Properties() 24 | dbO map {db => props.setProperty("database", db)} 25 | 26 | val clickHouseProps = new ClickHouseProperties(props) 27 | new ClickHouseDataSource(s"jdbc:clickhouse://$host:$port", clickHouseProps) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/scala/io/clickhouse/ext/ClickhouseResultSetExt.scala: -------------------------------------------------------------------------------- 1 | package io.clickhouse.ext 2 | 3 | object ClickhouseResultSetExt{ 4 | implicit class ResultSetExt(rs: java.sql.ResultSet){ 5 | 6 | def map[T](delegate: (java.sql.ResultSet) => T): Seq[T] = { 7 | var results = List[T]() 8 | while(rs.next()){ 9 | results = delegate(rs) :: results 10 | } 11 | results 12 | } 13 | 14 | def toTab = { 15 | // rs meta: (colId, name, type) 16 | val header = getMeta.map(v => s"${v._2}").mkString("\t") 17 | 18 | val body = getData.map{ row => 19 | row.map(v => s"$v").mkString("\t") 20 | }.mkString("\n") 21 | 22 | val table = List(header, body).mkString("\n") 23 | println(s"%table $table") 24 | } 25 | 26 | def getMeta = { 27 | 1 to rs.getMetaData.getColumnCount map { i => 28 | (i, rs.getMetaData.getColumnName(i), rs.getMetaData.getColumnTypeName(i)) 29 | } 30 | } 31 | 32 | def getData = { 33 | val meta = getMeta 34 | val results = scala.collection.mutable.MutableList[Seq[AnyRef]]() 35 | while(rs.next()){ 36 | val row = meta.map(i => rs.getObject(i._1)) 37 | results += row 38 | } 39 | results.toList 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/scala/io/clickhouse/ext/ClusterResultSet.scala: -------------------------------------------------------------------------------- 1 | package io.clickhouse.ext 2 | 3 | case class ClusterResultSet(clusterRs: Seq[(String, java.sql.ResultSet)]){ 4 | import io.clickhouse.ext.ClickhouseResultSetExt._ 5 | 6 | def get = clusterRs 7 | 8 | def toTab = { 9 | val firstRow = clusterRs.head 10 | val firstRowRs = firstRow._2 11 | 12 | val metaTab = if(firstRowRs != null){ 13 | val meta = firstRowRs.getMeta 14 | ("host" :: meta.map(x => s"${x._2}").toList).mkString("\t") 15 | }else{ 16 | Seq("host", "result").mkString("\t") 17 | } 18 | 19 | val bodyTab = clusterRs.map{ cur => 20 | val hostIp = cur._1 21 | if(cur._2 != null){ 22 | val ds = cur._2.getData // Seq[Seq[AnyRef]] 23 | ds.map{ row => 24 | (hostIp :: row.map(v => s"$v").toList).mkString("\t") 25 | }.mkString("\n") 26 | }else{ 27 | Seq(hostIp, "null").mkString("\t") 28 | } 29 | }.mkString("\n") 30 | 31 | val table = List(metaTab, bodyTab).mkString("\n") 32 | println(s"%table $table") 33 | } 34 | } -------------------------------------------------------------------------------- /src/main/scala/io/clickhouse/ext/Utils.scala: -------------------------------------------------------------------------------- 1 | package io.clickhouse.ext 2 | 3 | object Utils { 4 | def using[A, B <: {def close(): Unit}] (closeable: B) (f: B => A): A = 5 | try { 6 | f(closeable) 7 | } 8 | finally { 9 | closeable.close() 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /src/main/scala/io/clickhouse/ext/spark/DataFrameExt.scala: -------------------------------------------------------------------------------- 1 | package io.clickhouse.ext.spark 2 | 3 | import io.clickhouse.ext.{ClickhouseClient, ClickhouseConnectionFactory} 4 | import ru.yandex.clickhouse.ClickHouseDataSource 5 | import io.clickhouse.ext.Utils._ 6 | import org.apache.spark.sql.types._ 7 | 8 | object ClickhouseSparkExt{ 9 | implicit def extraOperations(df: org.apache.spark.sql.DataFrame) = DataFrameExt(df) 10 | } 11 | 12 | case class DataFrameExt(df: org.apache.spark.sql.DataFrame) extends Serializable { 13 | 14 | def dropClickhouseDb(dbName: String, clusterNameO: Option[String] = None) 15 | (implicit ds: ClickHouseDataSource){ 16 | val client = ClickhouseClient(clusterNameO)(ds) 17 | clusterNameO match { 18 | case None => client.dropDb(dbName) 19 | case Some(x) => client.dropDbCluster(dbName) 20 | } 21 | } 22 | 23 | def createClickhouseDb(dbName: String, clusterNameO: Option[String] = None) 24 | (implicit ds: ClickHouseDataSource){ 25 | val client = ClickhouseClient(clusterNameO)(ds) 26 | clusterNameO match { 27 | case None => client.createDb(dbName) 28 | case Some(x) => client.createDbCluster(dbName) 29 | } 30 | } 31 | 32 | def createClickhouseTable(dbName: String, tableName: String, partitionColumnName: String, indexColumns: Seq[String], clusterNameO: Option[String] = None) 33 | (implicit ds: ClickHouseDataSource){ 34 | val client = ClickhouseClient(clusterNameO)(ds) 35 | val sqlStmt = createClickhouseTableDefinitionSQL(dbName, tableName, partitionColumnName, indexColumns) 36 | clusterNameO match { 37 | case None => client.query(sqlStmt) 38 | case Some(clusterName) => 39 | // create local table on every node 40 | client.queryCluster(sqlStmt) 41 | // create distrib table (view) on every node 42 | val sqlStmt2 = s"CREATE TABLE IF NOT EXISTS ${dbName}.${tableName}_all AS ${dbName}.${tableName} ENGINE = Distributed($clusterName, $dbName, $tableName, rand());" 43 | client.queryCluster(sqlStmt2) 44 | } 45 | } 46 | 47 | def saveToClickhouse(dbName: String, tableName: String, partitionFunc: (org.apache.spark.sql.Row) => java.sql.Date, partitionColumnName: String = "mock_date", clusterNameO: Option[String] = None, batchSize: Int = 100000) 48 | (implicit ds: ClickHouseDataSource)={ 49 | 50 | val defaultHost = ds.getHost 51 | val defaultPort = ds.getPort 52 | 53 | val (clusterTableName, clickHouseHosts) = clusterNameO match { 54 | case Some(clusterName) => 55 | // get nodes from cluster 56 | val client = ClickhouseClient(clusterNameO)(ds) 57 | (s"${tableName}_all", client.getClusterNodes()) 58 | case None => 59 | (tableName, Seq(defaultHost)) 60 | } 61 | 62 | val schema = df.schema 63 | 64 | // following code is going to be run on executors 65 | val insertResults = df.rdd.mapPartitions((partition: Iterator[org.apache.spark.sql.Row])=>{ 66 | 67 | val rnd = scala.util.Random.nextInt(clickHouseHosts.length) 68 | val targetHost = clickHouseHosts(rnd) 69 | val targetHostDs = ClickhouseConnectionFactory.get(targetHost, defaultPort) 70 | 71 | // explicit closing 72 | using(targetHostDs.getConnection) { conn => 73 | 74 | val insertStatementSql = generateInsertStatment(schema, dbName, clusterTableName, partitionColumnName) 75 | val statement = conn.prepareStatement(insertStatementSql) 76 | 77 | var totalInsert = 0 78 | var counter = 0 79 | 80 | while(partition.hasNext){ 81 | 82 | counter += 1 83 | val row = partition.next() 84 | 85 | // create mock date 86 | val partitionVal = partitionFunc(row) 87 | statement.setDate(1, partitionVal) 88 | 89 | // map fields 90 | schema.foreach{ f => 91 | val fieldName = f.name 92 | val fieldIdx = row.fieldIndex(fieldName) 93 | val fieldVal = row.get(fieldIdx) 94 | if(fieldVal != null) 95 | statement.setObject(fieldIdx + 2, fieldVal) 96 | else{ 97 | val defVal = defaultNullValue(f.dataType, fieldVal) 98 | statement.setObject(fieldIdx + 2, defVal) 99 | } 100 | } 101 | statement.addBatch() 102 | 103 | if(counter >= batchSize){ 104 | val r = statement.executeBatch() 105 | totalInsert += r.sum 106 | counter = 0 107 | } 108 | 109 | } // end: while 110 | 111 | if(counter > 0) { 112 | val r = statement.executeBatch() 113 | totalInsert += r.sum 114 | counter = 0 115 | } 116 | 117 | // return: Seq((host, insertCount)) 118 | List((targetHost, totalInsert)).toIterator 119 | } 120 | 121 | }).collect() 122 | 123 | // aggr insert results by hosts 124 | insertResults.groupBy(_._1) 125 | .map(x => (x._1, x._2.map(_._2).sum)) 126 | } 127 | 128 | private def generateInsertStatment(schema: org.apache.spark.sql.types.StructType, dbName: String, tableName: String, partitionColumnName: String) = { 129 | val columns = partitionColumnName :: schema.map(f => f.name).toList 130 | val vals = 1 to (columns.length) map (i => "?") 131 | s"INSERT INTO $dbName.$tableName (${columns.mkString(",")}) VALUES (${vals.mkString(",")})" 132 | } 133 | 134 | private def defaultNullValue(sparkType: org.apache.spark.sql.types.DataType, v: Any) = sparkType match { 135 | case DoubleType => 0 136 | case LongType => 0 137 | case FloatType => 0 138 | case IntegerType => 0 139 | case StringType => null 140 | case BooleanType => false 141 | case _ => null 142 | } 143 | 144 | private def createClickhouseTableDefinitionSQL(dbName: String, tableName: String, partitionColumnName: String, indexColumns: Seq[String])= { 145 | 146 | val header = s""" 147 | CREATE TABLE IF NOT EXISTS $dbName.$tableName( 148 | """ 149 | 150 | val columns = s"$partitionColumnName Date" :: df.schema.map{ f => 151 | Seq(f.name, sparkType2ClickhouseType(f.dataType)).mkString(" ") 152 | }.toList 153 | val columnsStr = columns.mkString(",\n") 154 | 155 | val footer = s""" 156 | )ENGINE = MergeTree($partitionColumnName, (${indexColumns.mkString(",")}), 8192); 157 | """ 158 | 159 | Seq(header, columnsStr, footer).mkString("\n") 160 | } 161 | 162 | private def sparkType2ClickhouseType(sparkType: org.apache.spark.sql.types.DataType)= sparkType match { 163 | case LongType => "Int64" 164 | case DoubleType => "Float64" 165 | case FloatType => "Float32" 166 | case IntegerType => "Int32" 167 | case StringType => "String" 168 | case BooleanType => "UInt8" 169 | case _ => "unknown" 170 | } 171 | 172 | } 173 | -------------------------------------------------------------------------------- /src/test/scala/DFExtSpec.scala: -------------------------------------------------------------------------------- 1 | 2 | import org.scalatest._ 3 | import io.clickhouse.ext.ClickhouseConnectionFactory 4 | import io.clickhouse.ext.spark.ClickhouseSparkExt._ 5 | import org.apache.spark.sql.SparkSession 6 | 7 | case class Row1(name: String, v: Int, v2: Int) 8 | 9 | class TestSpec extends FlatSpec with Matchers { 10 | 11 | "case0" should "" in { 12 | 13 | val max = 25e6 14 | val monthSize = max / 11 15 | val daySize = monthSize / 28 16 | 17 | def yearMap(chrom: String) = { 18 | 1900 + (math.abs(chrom.hashCode) % 200) 19 | } 20 | 21 | def monthDayMap(pos: Int) = { 22 | val m = (pos / monthSize).toInt 23 | val d = ((pos % monthSize) / daySize).toInt 24 | (m + 1, d + 1) 25 | } 26 | 27 | val r = (5024637 to 48119824).toList map { pos => 28 | monthDayMap(pos) 29 | } 30 | 31 | val month_range = r.map(_._1).distinct 32 | val day_range = r.map(_._2).distinct 33 | 34 | assert(true) 35 | } 36 | 37 | "case 11" should "" in { 38 | 39 | val a = 1 40 | 41 | def calc(pos: Int) = { 42 | val x = pos / 25e6 * 348 43 | val m = x % 12 44 | val d = x % 29 45 | (m.toInt, d.toInt) 46 | } 47 | 48 | val r = (0 to 1000000).toList map { pos => 49 | calc(pos) 50 | } 51 | 52 | val month_range = r.map(_._1).distinct 53 | val day_range = r.map(_._2).distinct 54 | 55 | 56 | assert(true) 57 | } 58 | 59 | "case1" should "ok" in { 60 | 61 | val sparkSession = SparkSession.builder 62 | .master("local") 63 | .appName("local spark") 64 | .getOrCreate() 65 | 66 | val sc = sparkSession.sparkContext 67 | val sqlContext = sparkSession.sqlContext 68 | 69 | // test dframe 70 | val df = sqlContext.createDataFrame(1 to 10 map(i => Row1(s"$i", i, i + 10)) ) 71 | 72 | // clickhouse params 73 | val anyHost = "localhost" 74 | val db = "tmp1" 75 | val tableName = "t1" 76 | // val clusterName = None: Option[String] 77 | // start clickhouse docker using config.xml from clickhouse_files 78 | val clusterName = Some("perftest_1shards_1replicas"): Option[String] 79 | 80 | // define clickhouse connection 81 | implicit val clickhouseDataSource = ClickhouseConnectionFactory.get(anyHost) 82 | 83 | // create db / table 84 | df.dropClickhouseDb(db, clusterName) 85 | df.createClickhouseDb(db, clusterName) 86 | df.createClickhouseTable(db, tableName, "mock_date", Seq("name"), clusterNameO = clusterName) 87 | 88 | // save data 89 | val res = df.saveToClickhouse("tmp1", "t1", (row) => java.sql.Date.valueOf("2000-12-01"), "mock_date", clusterNameO = clusterName) 90 | assert(res.size == 1) 91 | assert(res.get("localhost") == Some(df.count())) 92 | 93 | true should === (true) 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/test/scala/UtilsSpec.scala: -------------------------------------------------------------------------------- 1 | import java.util.Properties 2 | 3 | import org.scalatest.{FlatSpec, Matchers} 4 | import io.clickhouse.ext.Utils._ 5 | 6 | class UtilsSpec extends FlatSpec with Matchers{ 7 | 8 | "case 1" should "ok" in { 9 | 10 | var f = false 11 | case class Mock(){ 12 | def print(): Unit = { 13 | println("mock print") 14 | } 15 | def close(): Unit ={ 16 | f = true 17 | } 18 | } 19 | 20 | using(Mock()){ mock => 21 | mock.print() 22 | } 23 | assert(f.equals(true)) 24 | } 25 | 26 | } 27 | --------------------------------------------------------------------------------