├── .gitattributes
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
├── dependabot.yml
├── pull_request_template.md
└── workflows
│ ├── build-and-test.yml
│ ├── check-license.yml
│ ├── cloud.yml
│ ├── publish-release.yml
│ ├── publish-snapshot.yml
│ ├── sonar.yml
│ ├── style.yml
│ └── tpcds.yml
├── .gitignore
├── .scalafmt.conf
├── LICENSE
├── NOTICE
├── README.md
├── build.gradle
├── clickhouse-core-it
└── src
│ └── test
│ ├── resources
│ └── log4j.properties
│ └── scala
│ └── com
│ └── clickhouse
│ └── spark
│ ├── HashSuite.scala
│ └── UtilsSuite.scala
├── clickhouse-core
└── src
│ ├── main
│ ├── antlr
│ │ └── com.clickhouse
│ │ │ └── ClickHouseSQL.g4
│ ├── java
│ │ └── com
│ │ │ └── clickhouse
│ │ │ └── spark
│ │ │ ├── exception
│ │ │ └── ClickHouseErrCode.java
│ │ │ └── hash
│ │ │ └── cityhash
│ │ │ ├── CityHash_v1_0_2.java
│ │ │ └── UInt128.java
│ └── scala
│ │ └── com
│ │ └── clickhouse
│ │ └── spark
│ │ ├── JsonProtocol.scala
│ │ ├── Logging.scala
│ │ ├── Utils.scala
│ │ ├── client
│ │ ├── ClusterClient.scala
│ │ ├── NodeClient.scala
│ │ └── NodesClient.scala
│ │ ├── exception
│ │ └── CHException.scala
│ │ ├── expr
│ │ └── Expressions.scala
│ │ ├── format
│ │ ├── InputFormat.scala
│ │ ├── JSONOutputFormat.scala
│ │ └── OutputFormat.scala
│ │ ├── hash
│ │ ├── CityHash64.scala
│ │ ├── HashFunc.scala
│ │ ├── HashUtils.scala
│ │ ├── Murmurhash2_32.scala
│ │ ├── Murmurhash2_64.scala
│ │ ├── Murmurhash3_32.scala
│ │ └── Murmurhash3_64.scala
│ │ ├── io
│ │ ├── ForwardingOutputStream.scala
│ │ ├── ForwardingWriter.scala
│ │ └── ObservableOutputStream.scala
│ │ ├── parse
│ │ ├── AstVisitor.scala
│ │ ├── ParseUtils.scala
│ │ └── SQLParser.scala
│ │ └── spec
│ │ ├── DatabaseSpec.scala
│ │ ├── NodeSpec.scala
│ │ ├── PartitionSpec.scala
│ │ ├── ShardUtils.scala
│ │ ├── TableEngineSpec.scala
│ │ ├── TableEngineUtils.scala
│ │ └── TableSpec.scala
│ ├── test
│ ├── resources
│ │ └── log4j.properties
│ └── scala
│ │ └── com
│ │ └── clickhouse
│ │ └── spark
│ │ ├── UtilsSuite.scala
│ │ ├── parse
│ │ └── SQLParserSuite.scala
│ │ └── spec
│ │ ├── NodeSpecHelper.scala
│ │ ├── NodeSpecSuite.scala
│ │ └── ShardUtilsSuite.scala
│ └── testFixtures
│ ├── conf
│ ├── clickhouse-cluster
│ │ ├── .env
│ │ ├── clickhouse-s2r2-compose.yml
│ │ ├── config.xml
│ │ ├── remote_servers.xml
│ │ ├── s1r1
│ │ │ ├── interserver_http_host.xml
│ │ │ └── macros.xml
│ │ ├── s1r2
│ │ │ ├── interserver_http_host.xml
│ │ │ └── macros.xml
│ │ ├── s2r1
│ │ │ ├── interserver_http_host.xml
│ │ │ └── macros.xml
│ │ ├── s2r2
│ │ │ ├── interserver_http_host.xml
│ │ │ └── macros.xml
│ │ ├── users.xml
│ │ └── zookeeper.xml
│ └── clickhouse-single
│ │ └── users.xml
│ ├── java
│ └── org
│ │ └── scalatest
│ │ └── tags
│ │ └── Cloud.java
│ └── scala
│ └── com
│ └── clickhouse
│ └── spark
│ └── base
│ ├── ClickHouseCloudMixIn.scala
│ ├── ClickHouseClusterMixIn.scala
│ ├── ClickHouseProvider.scala
│ └── ClickHouseSingleMixIn.scala
├── deploy.gradle
├── dev
├── backport
└── reformat
├── docker
├── .env
├── .env-dev
├── README.md
├── build-image.sh
├── compose-dev.yml
├── compose.yml
├── conf
│ ├── cloudbeaver-conf
│ │ ├── README.md
│ │ ├── cloudbeaver.conf
│ │ ├── initial-data-sources.conf
│ │ ├── logback.xml
│ │ └── product.conf
│ ├── core-site.xml
│ ├── hive-site.xml
│ ├── kyuubi-defaults.conf
│ └── spark-defaults.conf
├── image
│ ├── scc-base.Dockerfile
│ ├── scc-hadoop.Dockerfile
│ ├── scc-kyuubi.Dockerfile
│ ├── scc-metastore.Dockerfile
│ └── scc-spark.Dockerfile
└── script
│ ├── hive-schema-2.3.0.postgres.sql
│ └── hive-txn-schema-2.3.0.postgres.sql
├── docs
├── best_practices
│ ├── 01_deployment.md
│ └── index.md
├── configurations
│ ├── 01_catalog_configurations.md
│ ├── 02_sql_configurations.md
│ └── index.md
├── developers
│ ├── 01_build_and_test.md
│ ├── 02_docs_and_website.md
│ ├── 03_private_release.md
│ ├── 04_public_release.md
│ └── index.md
├── imgs
│ ├── scc_overview.drawio
│ ├── scc_overview.drawio.png
│ ├── scc_read_bucket_join.drawio
│ ├── scc_read_bucket_join.drawio.png
│ ├── scc_read_pushdown_disable.drawio
│ ├── scc_read_pushdown_disable.drawio.png
│ ├── scc_read_pushdown_enable.drawio
│ ├── scc_read_pushdown_enable.drawio.png
│ ├── scc_read_sort_merge_join.drawio
│ ├── scc_read_sort_merge_join.drawio.png
│ ├── scc_write_rebalance_sort.drawio
│ ├── scc_write_rebalance_sort.drawio.png
│ ├── spark_centralized_metastore.drawio
│ ├── spark_centralized_metastore.drawio.png
│ ├── spark_multi_catalog.drawio
│ └── spark_multi_catalog.drawio.png
├── index.md
├── internals
│ ├── 01_catalog.md
│ ├── 02_read.md
│ ├── 03_write.md
│ └── index.md
└── quick_start
│ ├── 01_get_the_library.md
│ ├── 02_play_with_spark_sql.md
│ └── 03_play_with_spark_shell.md
├── examples
└── scala
│ ├── README.md
│ └── spark-3.5
│ ├── .bsp
│ └── sbt.json
│ ├── build.sbt
│ ├── project
│ └── build.properties
│ └── src
│ └── main
│ └── scala
│ └── Saprk-3.5.scala
├── gradle.properties
├── gradle
└── wrapper
│ ├── gradle-wrapper.jar
│ └── gradle-wrapper.properties
├── gradlew
├── gradlew.bat
├── settings.gradle
├── spark-3.3
├── build.gradle
├── clickhouse-spark-it
│ └── src
│ │ └── test
│ │ ├── resources
│ │ └── log4j2.xml
│ │ └── scala
│ │ └── org
│ │ └── apache
│ │ └── spark
│ │ └── sql
│ │ └── clickhouse
│ │ ├── SparkTest.scala
│ │ ├── TPCDSTestUtils.scala
│ │ ├── TestUtils.scala
│ │ ├── cluster
│ │ ├── BaseClusterWriteSuite.scala
│ │ ├── ClickHouseClusterReadSuite.scala
│ │ ├── ClickHouseClusterUDFSuite.scala
│ │ ├── ClusterDeleteSuite.scala
│ │ ├── ClusterPartitionManagementSuite.scala
│ │ ├── ClusterShardByRandSuite.scala
│ │ ├── ClusterTableManagementSuite.scala
│ │ ├── SparkClickHouseClusterTest.scala
│ │ └── TPCDSClusterSuite.scala
│ │ └── single
│ │ ├── ClickHouseDataTypeSuite.scala
│ │ ├── ClickHouseGenericSuite.scala
│ │ ├── ClickHouseTableDDLSuite.scala
│ │ ├── SparkClickHouseSingleTest.scala
│ │ ├── TPCDSSuite.scala
│ │ └── WriteDistributionAndOrderingSuite.scala
├── clickhouse-spark-runtime
│ └── .gitkeep
└── clickhouse-spark
│ └── src
│ ├── main
│ └── scala
│ │ ├── com
│ │ └── clickhouse
│ │ │ └── spark
│ │ │ ├── ClickHouseCatalog.scala
│ │ │ ├── ClickHouseCommandRunner.scala
│ │ │ ├── ClickHouseHelper.scala
│ │ │ ├── ClickHouseTable.scala
│ │ │ ├── CommitMessage.scala
│ │ │ ├── Constants.scala
│ │ │ ├── Metrics.scala
│ │ │ ├── SQLHelper.scala
│ │ │ ├── func
│ │ │ ├── ClickHouseXxHash64.scala
│ │ │ └── FunctionRegistry.scala
│ │ │ ├── read
│ │ │ ├── ClickHouseMetadataColumn.scala
│ │ │ ├── ClickHouseRead.scala
│ │ │ ├── ClickHouseReader.scala
│ │ │ ├── InputPartitions.scala
│ │ │ ├── ScanJobDescription.scala
│ │ │ └── format
│ │ │ │ ├── ClickHouseBinaryReader.scala
│ │ │ │ └── ClickHouseJsonReader.scala
│ │ │ └── write
│ │ │ ├── ClickHouseWrite.scala
│ │ │ ├── ClickHouseWriter.scala
│ │ │ ├── WriteJobDescription.scala
│ │ │ └── format
│ │ │ ├── ClickHouseArrowStreamWriter.scala
│ │ │ └── ClickHouseJsonEachRowWriter.scala
│ │ └── org
│ │ └── apache
│ │ └── spark
│ │ └── sql
│ │ └── clickhouse
│ │ ├── ClickHouseSQLConf.scala
│ │ ├── ExprUtils.scala
│ │ ├── JsonWriter.scala
│ │ ├── SchemaUtils.scala
│ │ ├── SparkOptions.scala
│ │ └── SparkUtils.scala
│ └── test
│ ├── resources
│ └── log4j2.xml
│ └── scala
│ └── org
│ └── apache
│ └── spark
│ └── sql
│ └── clickhouse
│ ├── ClickHouseHelperSuite.scala
│ └── SchemaUtilsSuite.scala
├── spark-3.4
├── build.gradle
├── clickhouse-spark-it
│ └── src
│ │ └── test
│ │ ├── resources
│ │ └── log4j2.xml
│ │ └── scala
│ │ └── org
│ │ └── apache
│ │ └── spark
│ │ └── sql
│ │ └── clickhouse
│ │ ├── SparkTest.scala
│ │ ├── TPCDSTestUtils.scala
│ │ ├── TestUtils.scala
│ │ ├── cluster
│ │ ├── BaseClusterWriteSuite.scala
│ │ ├── ClickHouseClusterHashUDFSuite.scala
│ │ ├── ClickHouseClusterReadSuite.scala
│ │ ├── ClusterDeleteSuite.scala
│ │ ├── ClusterPartitionManagementSuite.scala
│ │ ├── ClusterShardByRandSuite.scala
│ │ ├── ClusterTableManagementSuite.scala
│ │ ├── SparkClickHouseClusterTest.scala
│ │ └── TPCDSClusterSuite.scala
│ │ └── single
│ │ ├── ClickHouseDataTypeSuite.scala
│ │ ├── ClickHouseGenericSuite.scala
│ │ ├── ClickHouseTableDDLSuite.scala
│ │ ├── SparkClickHouseSingleTest.scala
│ │ ├── TPCDSSuite.scala
│ │ └── WriteDistributionAndOrderingSuite.scala
├── clickhouse-spark-runtime
│ └── .gitkeep
└── clickhouse-spark
│ └── src
│ ├── main
│ └── scala
│ │ ├── com
│ │ └── clickhouse
│ │ │ └── spark
│ │ │ ├── ClickHouseCatalog.scala
│ │ │ ├── ClickHouseCommandRunner.scala
│ │ │ ├── ClickHouseHelper.scala
│ │ │ ├── ClickHouseTable.scala
│ │ │ ├── CommitMessage.scala
│ │ │ ├── Constants.scala
│ │ │ ├── Metrics.scala
│ │ │ ├── SQLHelper.scala
│ │ │ ├── func
│ │ │ ├── CityHash64.scala
│ │ │ ├── FunctionRegistry.scala
│ │ │ ├── MultiStringArgsHash.scala
│ │ │ ├── MurmurHash2.scala
│ │ │ ├── MurmurHash3.scala
│ │ │ └── XxHash64.scala
│ │ │ ├── read
│ │ │ ├── ClickHouseMetadataColumn.scala
│ │ │ ├── ClickHouseRead.scala
│ │ │ ├── ClickHouseReader.scala
│ │ │ ├── InputPartitions.scala
│ │ │ ├── ScanJobDescription.scala
│ │ │ └── format
│ │ │ │ ├── ClickHouseBinaryReader.scala
│ │ │ │ └── ClickHouseJsonReader.scala
│ │ │ └── write
│ │ │ ├── ClickHouseWrite.scala
│ │ │ ├── ClickHouseWriter.scala
│ │ │ ├── WriteJobDescription.scala
│ │ │ └── format
│ │ │ ├── ClickHouseArrowStreamWriter.scala
│ │ │ └── ClickHouseJsonEachRowWriter.scala
│ │ └── org
│ │ └── apache
│ │ └── spark
│ │ └── sql
│ │ └── clickhouse
│ │ ├── ClickHouseSQLConf.scala
│ │ ├── ExprUtils.scala
│ │ ├── JsonWriter.scala
│ │ ├── SchemaUtils.scala
│ │ ├── SparkOptions.scala
│ │ └── SparkUtils.scala
│ └── test
│ ├── resources
│ └── log4j2.xml
│ └── scala
│ └── org
│ └── apache
│ └── spark
│ └── sql
│ └── clickhouse
│ ├── ClickHouseHelperSuite.scala
│ ├── ConfigurationSuite.scala
│ ├── FunctionRegistrySuite.scala
│ └── SchemaUtilsSuite.scala
├── spark-3.5
├── build.gradle
├── clickhouse-spark-it
│ └── src
│ │ └── test
│ │ ├── resources
│ │ └── log4j2.xml
│ │ └── scala
│ │ └── org
│ │ └── apache
│ │ └── spark
│ │ └── sql
│ │ └── clickhouse
│ │ ├── SparkTest.scala
│ │ ├── TPCDSTestUtils.scala
│ │ ├── TestUtils.scala
│ │ ├── cluster
│ │ ├── BaseClusterWriteSuite.scala
│ │ ├── ClickHouseClusterHashUDFSuite.scala
│ │ ├── ClickHouseClusterReadSuite.scala
│ │ ├── ClusterDeleteSuite.scala
│ │ ├── ClusterPartitionManagementSuite.scala
│ │ ├── ClusterShardByRandSuite.scala
│ │ ├── ClusterTableManagementSuite.scala
│ │ ├── SparkClickHouseClusterTest.scala
│ │ └── TPCDSClusterSuite.scala
│ │ └── single
│ │ ├── ClickHouseDataTypeSuite.scala
│ │ ├── ClickHouseGenericSuite.scala
│ │ ├── ClickHouseTableDDLSuite.scala
│ │ ├── SparkClickHouseSingleTest.scala
│ │ ├── TPCDSSuite.scala
│ │ └── WriteDistributionAndOrderingSuite.scala
├── clickhouse-spark-runtime
│ └── .gitkeep
└── clickhouse-spark
│ └── src
│ ├── main
│ └── scala
│ │ ├── com
│ │ └── clickhouse
│ │ │ └── spark
│ │ │ ├── ClickHouseCatalog.scala
│ │ │ ├── ClickHouseCommandRunner.scala
│ │ │ ├── ClickHouseHelper.scala
│ │ │ ├── ClickHouseTable.scala
│ │ │ ├── CommitMessage.scala
│ │ │ ├── Constants.scala
│ │ │ ├── Metrics.scala
│ │ │ ├── SQLHelper.scala
│ │ │ ├── func
│ │ │ ├── CityHash64.scala
│ │ │ ├── FunctionRegistry.scala
│ │ │ ├── MultiStringArgsHash.scala
│ │ │ ├── MurmurHash2.scala
│ │ │ ├── MurmurHash3.scala
│ │ │ └── XxHash64.scala
│ │ │ ├── read
│ │ │ ├── ClickHouseMetadataColumn.scala
│ │ │ ├── ClickHouseRead.scala
│ │ │ ├── ClickHouseReader.scala
│ │ │ ├── InputPartitions.scala
│ │ │ ├── ScanJobDescription.scala
│ │ │ └── format
│ │ │ │ ├── ClickHouseBinaryReader.scala
│ │ │ │ └── ClickHouseJsonReader.scala
│ │ │ └── write
│ │ │ ├── ClickHouseWrite.scala
│ │ │ ├── ClickHouseWriter.scala
│ │ │ ├── WriteJobDescription.scala
│ │ │ └── format
│ │ │ ├── ClickHouseArrowStreamWriter.scala
│ │ │ └── ClickHouseJsonEachRowWriter.scala
│ │ └── org
│ │ └── apache
│ │ └── spark
│ │ └── sql
│ │ └── clickhouse
│ │ ├── ClickHouseSQLConf.scala
│ │ ├── ExprUtils.scala
│ │ ├── JsonWriter.scala
│ │ ├── SchemaUtils.scala
│ │ ├── SparkOptions.scala
│ │ └── SparkUtils.scala
│ └── test
│ ├── resources
│ └── log4j2.xml
│ └── scala
│ └── org
│ └── apache
│ └── spark
│ └── sql
│ └── clickhouse
│ ├── ClickHouseHelperSuite.scala
│ ├── ConfigurationSuite.scala
│ ├── FunctionRegistrySuite.scala
│ └── SchemaUtilsSuite.scala
└── version.txt
/.gitattributes:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | #
14 |
15 | *.bat text eol=crlf
16 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 |
2 | ---
3 | name: Bug report
4 | about: Create a report to help us improve
5 | title: ''
6 | labels: bug
7 | assignees: ''
8 |
9 | ---
10 |
11 |
12 | ### Describe the bug
13 |
14 | ### Steps to reproduce
15 | 1.
16 | 2.
17 | 3.
18 |
19 | ### Expected behaviour
20 |
21 | ### Error log
22 |
23 | ### Configuration
24 | #### Environment
25 | * Apache Spark version:
26 | * Scala version
27 | * Connector configuration configuration:
28 | * OS:
29 |
30 | #### ClickHouse server
31 | * ClickHouse Server version:
32 | * ClickHouse Server non-default settings, if any:
33 | * `CREATE TABLE` statements for tables involved:
34 | * Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary
35 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: What would you like to add to the project?
4 | title: ''
5 | labels: 'enhancement'
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: "gradle"
9 | directory: "/"
10 | schedule:
11 | interval: "monthly"
12 | labels:
13 | - "dependencies"
14 | open-pull-requests-limit: 5
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | ## Summary
2 |
3 |
4 | ## Checklist
5 | Delete items not relevant to your PR:
6 | - [ ] Unit and integration tests covering the common scenarios were added
7 | - [ ] A human-readable description of the changes was provided to include in CHANGELOG
8 | - [ ] For significant changes, documentation in https://github.com/ClickHouse/clickhouse-docs was updated with further explanations or tutorials
--------------------------------------------------------------------------------
/.github/workflows/check-license.yml:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | #
14 |
15 | name: "Check License"
16 | on:
17 | push:
18 | branches:
19 | - "branch-*"
20 | - "main"
21 | pull_request:
22 | branches:
23 | - "branch-*"
24 | - "main"
25 |
26 | jobs:
27 | check-license:
28 | runs-on: ubuntu-22.04
29 | strategy:
30 | fail-fast: false
31 | matrix:
32 | spark: [ 3.3, 3.4, 3.5 ]
33 | steps:
34 | - uses: actions/checkout@v4
35 | - uses: actions/setup-java@v4
36 | with:
37 | distribution: zulu
38 | java-version: 8
39 | - run: >-
40 | ./gradlew rat --no-daemon
41 | -Dspark_binary_version=${{ matrix.spark }}
42 | -PmavenCentralMirror=https://maven-central.storage-download.googleapis.com/maven2/
43 |
--------------------------------------------------------------------------------
/.github/workflows/cloud.yml:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | #
14 |
15 | name: "ClickHouse Cloud"
16 |
17 | on:
18 | push:
19 | branches:
20 | - "branch-*"
21 | - "main"
22 | pull_request:
23 | branches:
24 | - "branch-*"
25 | - "main"
26 |
27 | jobs:
28 | run-tests-with-clickhouse-cloud:
29 | runs-on: ubuntu-22.04
30 | strategy:
31 | max-parallel: 1
32 | fail-fast: false
33 | matrix:
34 | spark: [ 3.3, 3.4, 3.5 ]
35 | scala: [ 2.12, 2.13 ]
36 | env:
37 | CLICKHOUSE_CLOUD_HOST: ${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_HOST_SMT }}
38 | CLICKHOUSE_CLOUD_PASSWORD: ${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_PASSWORD_SMT }}
39 | steps:
40 | - uses: actions/checkout@v4
41 | - uses: actions/setup-java@v4
42 | with:
43 | distribution: zulu
44 | java-version: 8
45 | cache: gradle
46 | - run: >-
47 | ./gradlew clean cloudTest --no-daemon --refresh-dependencies
48 | -Dspark_binary_version=${{ matrix.spark }}
49 | -Dscala_binary_version=${{ matrix.scala }}
50 | -PmavenCentralMirror=https://maven-central.storage-download.googleapis.com/maven2/
51 | - name: Upload test logs
52 | if: failure()
53 | uses: actions/upload-artifact@v3
54 | with:
55 | name: log-clickhouse-cloud-spark-${{ matrix.spark }}-scala-${{ matrix.scala }}
56 | path: |
57 | **/build/unit-tests.log
58 | log/**
59 |
--------------------------------------------------------------------------------
/.github/workflows/publish-snapshot.yml:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | #
14 |
15 | name: "Publish Snapshot"
16 |
17 | on:
18 | schedule:
19 | - cron: '0 0 * * *'
20 |
21 | jobs:
22 | publish-snapshot:
23 | if: ${{ startsWith(github.repository, 'clickhouse/') }}
24 | runs-on: ubuntu-22.04
25 | strategy:
26 | fail-fast: false
27 | matrix:
28 | branch:
29 | - "main"
30 | scala: [ 2.12, 2.13 ]
31 | spark: [ 3.3, 3.4, 3.5 ]
32 | steps:
33 | - uses: actions/checkout@v4
34 | with:
35 | ref: ${{ matrix.branch }}
36 | - uses: actions/setup-java@v4
37 | with:
38 | distribution: zulu
39 | java-version: 8
40 | cache: gradle
41 | - name: Publish Snapshot - ${{ matrix.branch }}
42 | run: >-
43 | ./gradlew clean publish --no-daemon --refresh-dependencies
44 | -Dscala_binary_version=${{ matrix.scala }}
45 | -Dspark_binary_version=${{ matrix.spark }}
46 | -PmavenUser='${{ secrets.NEXUS_USER }}'
47 | -PmavenPassword='${{ secrets.NEXUS_PW }}'
48 |
--------------------------------------------------------------------------------
/.github/workflows/sonar.yml:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | #
14 |
15 | name: "SonarQube"
16 |
17 | on:
18 | schedule:
19 | - cron: '0 0 * * *'
20 |
21 | jobs:
22 | sonar-report:
23 | if: ${{ startsWith(github.repository, 'clickhouse/') }}
24 | runs-on: ubuntu-22.04
25 | steps:
26 | - uses: actions/checkout@v4
27 | - uses: actions/setup-java@v4
28 | with:
29 | distribution: zulu
30 | java-version: 8
31 | cache: gradle
32 | - run: >-
33 | ./gradlew sonarqube
34 | -Dsonar.projectKey=spark-clickhouse-connector
35 | -Dsonar.host.url=${{ secrets.SONAR_URL }}
36 | -Dsonar.login=${{ secrets.SONAR_TOKEN }}
37 | -PmavenCentralMirror=https://maven-central.storage-download.googleapis.com/maven2/
38 |
--------------------------------------------------------------------------------
/.github/workflows/style.yml:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | #
14 |
15 | name: "Check Style"
16 |
17 | on:
18 | push:
19 | branches:
20 | - "branch-*"
21 | - "main"
22 | pull_request:
23 | branches:
24 | - "branch-*"
25 | - "main"
26 |
27 | jobs:
28 | check-style:
29 | runs-on: ubuntu-22.04
30 | strategy:
31 | fail-fast: false
32 | matrix:
33 | spark: [ 3.3, 3.4, 3.5 ]
34 | steps:
35 | - uses: actions/checkout@v4
36 | - uses: actions/setup-java@v4
37 | with:
38 | distribution: zulu
39 | java-version: 8
40 | cache: gradle
41 | - run: >-
42 | ./gradlew spotlessCheck --no-daemon --refresh-dependencies
43 | -Dspark_binary_version=${{ matrix.spark }}
44 | -PmavenCentralMirror=https://maven-central.storage-download.googleapis.com/maven2/
45 |
--------------------------------------------------------------------------------
/.github/workflows/tpcds.yml:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | #
14 |
15 | name: "TPC-DS"
16 |
17 | on:
18 | push:
19 | branches:
20 | - "branch-*"
21 | - "main"
22 | pull_request:
23 | branches:
24 | - "branch-*"
25 | - "main"
26 |
27 | jobs:
28 | run-tpcds-sf1:
29 | runs-on: ubuntu-22.04
30 | strategy:
31 | fail-fast: false
32 | matrix:
33 | spark: [ 3.3, 3.4, 3.5 ]
34 | scala: [ 2.12, 2.13 ]
35 | steps:
36 | - uses: actions/checkout@v4
37 | - uses: actions/setup-java@v4
38 | with:
39 | distribution: zulu
40 | java-version: 8
41 | cache: gradle
42 | - run: >-
43 | ./gradlew clean slowTest --no-daemon --refresh-dependencies
44 | -Dspark_binary_version=${{ matrix.spark }}
45 | -Dscala_binary_version=${{ matrix.scala }}
46 | -PmavenCentralMirror=https://maven-central.storage-download.googleapis.com/maven2/
47 | - name: Upload test logs
48 | if: failure()
49 | uses: actions/upload-artifact@v3
50 | with:
51 | name: log-tpcds-spark-${{ matrix.spark }}-scala-${{ matrix.scala }}
52 | path: |
53 | **/build/unit-tests.log
54 | log/**
55 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | .DS_Store
3 | .cache
4 | tmp/
5 |
6 | # intellij files
7 | .idea
8 | .idea_modules/
9 | *.ipr
10 | *.iws
11 | *.iml
12 |
13 | # gradle build
14 | .gradle
15 | .out/
16 | build
17 | dependencies.lock
18 | **/dependencies.lock
19 |
20 | # rat library install location
21 | lib/
22 |
23 | # web site build
24 | site/
25 |
26 | __pycache__/
27 | *.py[cod]
28 | .eggs/
29 | .tox/
30 | env/
31 | venv/
32 | *.egg-info/
33 | test-reports
34 | build/
35 | dist/
36 | sdist/
37 | .coverage
38 | coverage.xml
39 | .pytest_cache/
40 | .python-version
41 |
42 | # vscode/eclipse files
43 | .classpath
44 | .project
45 | .settings
46 | bin/
47 |
48 | # Hive/metastore files
49 | metastore_db/
50 |
51 | # Spark/metastore files
52 | spark-warehouse/
53 | derby.log
54 |
55 | # Python stuff
56 | python/.mypy_cache/
57 |
58 | *.patch
59 | **/*.drawio.bkp
60 |
61 | log/
62 |
63 | # sbt
64 | dist/*
65 | target/
66 | lib_managed/
67 | src_managed/
68 | project/boot/
69 | project/plugins/project/
70 | .history
71 | .cache
72 | .lib/
73 |
--------------------------------------------------------------------------------
/.scalafmt.conf:
--------------------------------------------------------------------------------
1 | // Licensed under the Apache License, Version 2.0 (the "License");
2 | // you may not use this file except in compliance with the License.
3 | // You may obtain a copy of the License at
4 | //
5 | // https://www.apache.org/licenses/LICENSE-2.0
6 | //
7 | // Unless required by applicable law or agreed to in writing, software
8 | // distributed under the License is distributed on an "AS IS" BASIS,
9 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | // See the License for the specific language governing permissions and
11 | // limitations under the License.
12 |
13 | version=3.6.1
14 | runner.dialect=scala212
15 | project.git=true
16 |
17 | align.preset=none
18 | align.stripMargin=true
19 | docstrings.style=keep
20 | maxColumn=120
21 | newlines.source=keep
22 | continuationIndent.defnSite=2
23 | danglingParentheses.callSite=true
24 | assumeStandardLibraryStripMargin=true
25 | rewrite.rules=[SortImports, RedundantBraces, RedundantParens, SortModifiers]
26 |
--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 |
2 | Spark ClickHouse Connector
3 | Was donated from The HousePower Organization (https://github.com/housepower)
--------------------------------------------------------------------------------
/clickhouse-core-it/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License. See accompanying LICENSE file.
13 | #
14 |
15 | log4j.rootLogger=INFO, file
16 |
17 | log4j.appender.console=org.apache.log4j.ConsoleAppender
18 | log4j.appender.console.target=System.out
19 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
20 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss.SSS} %p %c: %m%n
21 |
22 | log4j.appender.file=org.apache.log4j.FileAppender
23 | log4j.appender.file.append=true
24 | log4j.appender.file.file=build/unit-tests.log
25 | log4j.appender.file.layout=org.apache.log4j.PatternLayout
26 | log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
27 |
28 | log4j.logger.org.apache.hadoop.util.Shell=ERROR
29 | log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR
30 | log4j.logger.com.clickhouse.spark=DEBUG
31 |
--------------------------------------------------------------------------------
/clickhouse-core-it/src/test/scala/com/clickhouse/spark/UtilsSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark
16 |
17 | import com.clickhouse.spark.base.{ClickHouseCloudMixIn, ClickHouseProvider, ClickHouseSingleMixIn}
18 | import org.scalatest.funsuite.AnyFunSuite
19 | import org.scalatest.tags.Cloud
20 |
21 | import java.time.{LocalDateTime, ZoneId}
22 |
23 | @Cloud
24 | class ClickHouseCloudUtilsSuite extends UtilsSuite with ClickHouseCloudMixIn
25 |
26 | class ClickHouseSingleUtilsSuite extends UtilsSuite with ClickHouseSingleMixIn
27 |
28 | abstract class UtilsSuite extends AnyFunSuite with ClickHouseProvider with Logging {
29 |
30 | test("parse date with nano seconds") {
31 | withNodeClient() { client =>
32 | val tz = ZoneId.systemDefault()
33 | val sql = s"SELECT toDateTime64('2023-03-29 15:25:25.977654', 3, '$tz')"
34 | val output = client.syncQueryAndCheckOutputJSONCompactEachRowWithNamesAndTypes(sql)
35 | assert(output.rows === 1L)
36 | val row = output.records.head
37 | assert(row.length === 1L)
38 | val actual = LocalDateTime.parse(row.head.asText, Utils.dateTimeFmt)
39 | val expected = LocalDateTime.of(2023, 3, 29, 15, 25, 25, 977000000)
40 | assert(actual === expected)
41 | }
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/java/com/clickhouse/spark/hash/cityhash/UInt128.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.hash.cityhash;
16 |
17 | /**
18 | * copied from https://github.com/dpoluyanov/achord/blob/master/src/main/java/com/github/mangelion/achord/UInt128.java
19 | */
20 | final public class UInt128 {
21 | final public long first;
22 | final public long second;
23 |
24 | public UInt128(long first, long second) {
25 | this.first = first;
26 | this.second = second;
27 | }
28 |
29 | static UInt128 of(long first, long second) {
30 | return new UInt128(first, second);
31 | }
32 | }
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/JsonProtocol.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark
16 |
17 | import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature}
18 | import com.fasterxml.jackson.module.scala.ClassTagExtensions
19 |
20 | trait ToJson {
21 |
22 | def toJson: String = JsonProtocol.toJson(this)
23 |
24 | override def toString: String = toJson
25 | }
26 |
27 | trait FromJson[T] {
28 | def fromJson(json: String): T
29 | }
30 |
31 | trait JsonProtocol[T] extends FromJson[T] with ToJson
32 |
33 | object JsonProtocol {
34 |
35 | @transient lazy val om: ObjectMapper with ClassTagExtensions = {
36 | val _om = new ObjectMapper() with ClassTagExtensions
37 | _om.findAndRegisterModules()
38 | _om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
39 | _om
40 | }
41 |
42 | def toJson(value: Any): String = om.writeValueAsString(value)
43 |
44 | def toPrettyJson(value: Any): String = om.writer(SerializationFeature.INDENT_OUTPUT).writeValueAsString(value)
45 | }
46 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/Logging.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark
16 |
17 | import org.slf4j.{Logger, LoggerFactory}
18 |
19 | trait Logging {
20 |
21 | @transient lazy val log: Logger = LoggerFactory.getLogger(logName)
22 |
23 | protected def logName: String = this.getClass.getName.stripSuffix("$")
24 | }
25 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/client/NodesClient.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.client
16 |
17 | import com.clickhouse.spark.Logging
18 | import com.clickhouse.spark.spec.{NodeSpec, Nodes}
19 |
20 | import java.util.concurrent.ConcurrentHashMap
21 | import scala.collection.JavaConverters._
22 | import scala.util.Random.shuffle
23 |
24 | object NodesClient {
25 | def apply(nodes: Nodes) = new NodesClient(nodes)
26 | }
27 |
28 | class NodesClient(nodes: Nodes) extends AutoCloseable with Logging {
29 | assert(nodes.nodes.nonEmpty)
30 |
31 | @transient lazy val cache = new ConcurrentHashMap[NodeSpec, NodeClient]
32 |
33 | def node: NodeClient = {
34 |
35 | val nodeSpec = shuffle(nodes.nodes.toSeq).head
36 | cache.computeIfAbsent(
37 | nodeSpec,
38 | { nodeSpec =>
39 | log.info(s"Create client of $nodeSpec")
40 | new NodeClient(nodeSpec)
41 | }
42 | )
43 | }
44 |
45 | override def close(): Unit = cache.asScala.values.foreach(_.close())
46 | }
47 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/exception/CHException.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.exception
16 |
17 | import com.clickhouse.spark.spec.NodeSpec
18 |
19 | abstract class CHException(
20 | val code: Int,
21 | val reason: String,
22 | val node: Option[NodeSpec],
23 | val cause: Option[Throwable]
24 | ) extends RuntimeException(s"${node.getOrElse("")} [$code] $reason", cause.orNull)
25 |
26 | case class CHServerException(
27 | override val code: Int,
28 | override val reason: String,
29 | override val node: Option[NodeSpec],
30 | override val cause: Option[Throwable]
31 | ) extends CHException(code, reason, node, cause)
32 |
33 | case class CHClientException(
34 | override val reason: String,
35 | override val node: Option[NodeSpec] = None,
36 | override val cause: Option[Throwable] = None
37 | ) extends CHException(ClickHouseErrCode.CLIENT_ERROR.code(), reason, node, cause)
38 |
39 | case class RetryableCHException(
40 | override val code: Int,
41 | override val reason: String,
42 | override val node: Option[NodeSpec],
43 | override val cause: Option[Throwable] = None
44 | ) extends CHException(code, reason, node, cause)
45 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/expr/Expressions.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.expr
16 |
17 | trait Expr extends Serializable {
18 | def sql: String // ClickHouse dialect
19 | def sparkSql: String = sql
20 | def desc: String = s"${this.getClass.getSimpleName.stripSuffix("$")}[$sql]"
21 | override def toString: String = desc
22 | }
23 |
24 | case class StringLiteral(value: String) extends Expr {
25 | override def sql: String = s"'$value'"
26 | }
27 |
28 | case class FieldRef(name: String) extends Expr {
29 | override def sql: String = name
30 | }
31 |
32 | case class SQLExpr(sqlText: String) extends Expr {
33 | override def sql: String = sqlText
34 | }
35 |
36 | case class FuncExpr(name: String, args: List[Expr]) extends Expr {
37 | override def sql: String = s"$name(${args.map(_.sql).mkString(",")})"
38 | }
39 |
40 | // If the direction is not specified, ASC is assumed ...
41 | // By default or with the NULLS LAST modifier: first the values, then NaN, then NULL ...
42 | // https://clickhouse.com/docs/en/sql-reference/statements/select/order-by
43 | case class OrderExpr(expr: Expr, asc: Boolean = true, nullFirst: Boolean = false) extends Expr {
44 | override def sql: String = s"$expr ${if (asc) "ASC" else "DESC"} NULLS ${if (nullFirst) "FIRST" else "LAST"}"
45 | }
46 |
47 | case class TupleExpr(exprList: List[Expr]) extends Expr {
48 | override def sql: String = exprList.mkString("(", ",", ")")
49 | }
50 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/format/InputFormat.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.format
16 |
17 | trait InputFormat
18 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/format/OutputFormat.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.format
16 |
17 | import scala.collection.immutable.ListMap
18 |
19 | trait OutputFormat
20 |
21 | trait NamesAndTypes { self: OutputFormat =>
22 |
23 | def namesAndTypes: ListMap[String, String]
24 | }
25 |
26 | trait SimpleOutput[T] extends OutputFormat {
27 |
28 | def records: Seq[T]
29 |
30 | def rows: Long = records.length
31 |
32 | def isEmpty: Boolean = records.isEmpty
33 | }
34 |
35 | trait StreamOutput[T] extends Iterator[T] with OutputFormat
36 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/hash/CityHash64.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.hash
16 |
17 | import com.clickhouse.spark.hash.cityhash.{CityHash_v1_0_2, UInt128}
18 |
19 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L694
20 | object CityHash64 extends HashFunc[Long] {
21 | override def applyHash(input: Array[Byte]): Long =
22 | CityHash_v1_0_2.CityHash64(input, 0, input.length)
23 |
24 | override def combineHashes(h1: Long, h2: Long): Long =
25 | CityHash_v1_0_2.Hash128to64(new UInt128(h1, h2))
26 | }
27 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/hash/HashFunc.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.hash
16 |
17 | import java.nio.charset.StandardCharsets
18 | import scala.reflect.ClassTag
19 |
20 | abstract class HashFunc[T: ClassTag] {
21 | def applyHash(input: Array[Byte]): T
22 | def combineHashes(h1: T, h2: T): T
23 |
24 | final def executeAny(input: Any): T =
25 | input match {
26 | // Here Array[Byte] means raw byte array, not Clickhouse's Array[UInt8] or Array[Int8].
27 | // Note that Array[UInt8] is handled differently in Clickhouse, so passing it here as Array[Byte] will cause different result.
28 | // This is left for performance issue, as sometimes raw bytes is better than constructing the real type
29 | // see https://github.com/clickhouse/spark-clickhouse-connector/pull/261#discussion_r1271828750
30 | case bytes: Array[Byte] => applyHash(bytes)
31 | case string: String => applyHash(string.getBytes(StandardCharsets.UTF_8))
32 | case _ => throw new IllegalArgumentException(s"Unsupported input type: ${input.getClass}")
33 | }
34 | final def apply(input: Array[Any]): T = input.map(executeAny).reduce(combineHashes)
35 | }
36 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/hash/HashUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.hash
16 |
17 | object HashUtils {
18 | def intHash64Impl(x: Long): Long =
19 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L140
20 | intHash64(x ^ 0x4cf2d2baae6da887L)
21 |
22 | def intHash64(l: Long): Long = {
23 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Common/HashTable/Hash.h#L26
24 | var x = l
25 | x ^= x >>> 33
26 | x *= 0xff51afd7ed558ccdL
27 | x ^= x >>> 33
28 | x *= 0xc4ceb9fe1a85ec53L
29 | x ^= x >>> 33
30 | x
31 | }
32 |
33 | def int32Impl(x: Long): Int =
34 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L133
35 | intHash32(x, 0x75d9543de018bf45L)
36 |
37 | def intHash32(l: Long, salt: Long): Int = {
38 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Common/HashTable/Hash.h#L502
39 | var x = l
40 |
41 | x ^= salt
42 | x = (~x) + (x << 18)
43 | x = x ^ ((x >>> 31) | (x << 33))
44 | x = x * 21
45 | x = x ^ ((x >>> 11) | (x << 53))
46 | x = x + (x << 6)
47 | x = x ^ ((x >>> 22) | (x << 42))
48 | x.toInt
49 | }
50 |
51 | def toUInt32(v: Int): Long = if (v < 0) v + (1L << 32) else v
52 | }
53 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/hash/Murmurhash2_32.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.hash
16 |
17 | import org.apache.commons.codec.digest.MurmurHash2
18 |
19 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L519
20 | object Murmurhash2_32 extends HashFunc[Int] {
21 | override def applyHash(input: Array[Byte]): Int =
22 | MurmurHash2.hash32(input, input.length, 0)
23 |
24 | override def combineHashes(h1: Int, h2: Int): Int =
25 | HashUtils.int32Impl(HashUtils.toUInt32(h1)) ^ h2
26 | }
27 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/hash/Murmurhash2_64.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.hash
16 |
17 | import org.apache.commons.codec.digest.MurmurHash2
18 |
19 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L460
20 | object Murmurhash2_64 extends HashFunc[Long] {
21 | override def applyHash(input: Array[Byte]): Long =
22 | MurmurHash2.hash64(input, input.length, 0)
23 |
24 | override def combineHashes(h1: Long, h2: Long): Long =
25 | HashUtils.intHash64Impl(h1) ^ h2
26 | }
27 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/hash/Murmurhash3_32.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.hash
16 |
17 | import org.apache.commons.codec.digest.MurmurHash3
18 |
19 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L519
20 | object Murmurhash3_32 extends HashFunc[Int] {
21 | override def applyHash(input: Array[Byte]): Int =
22 | MurmurHash3.hash32x86(input, 0, input.length, 0)
23 |
24 | override def combineHashes(h1: Int, h2: Int): Int =
25 | HashUtils.int32Impl(HashUtils.toUInt32(h1)) ^ h2
26 | }
27 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/hash/Murmurhash3_64.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.hash
16 |
17 | import org.apache.commons.codec.digest.MurmurHash3
18 |
19 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L543
20 | object Murmurhash3_64 extends HashFunc[Long] {
21 | override def applyHash(input: Array[Byte]): Long = {
22 | val hashes = MurmurHash3.hash128x64(input, 0, input.length, 0)
23 | hashes(0) ^ hashes(1)
24 | }
25 |
26 | override def combineHashes(h1: Long, h2: Long): Long =
27 | HashUtils.intHash64Impl(h1) ^ h2
28 | }
29 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/io/ForwardingOutputStream.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.io
16 |
17 | import java.io.OutputStream
18 |
19 | class ForwardingOutputStream(@volatile var delegate: OutputStream = null) extends OutputStream {
20 |
21 | def updateDelegate(delegate: OutputStream): Unit = this.delegate = delegate
22 |
23 | override def write(b: Int): Unit = delegate.write(b)
24 |
25 | override def write(b: Array[Byte]): Unit = delegate.write(b)
26 |
27 | override def write(b: Array[Byte], off: Int, len: Int): Unit = delegate.write(b, off, len)
28 |
29 | override def flush(): Unit = delegate.flush()
30 |
31 | override def close(): Unit = delegate.close()
32 | }
33 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/io/ForwardingWriter.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.io
16 |
17 | import java.io.Writer
18 |
19 | class ForwardingWriter(@volatile var delegate: Writer = null) extends Writer {
20 |
21 | def updateDelegate(delegate: Writer): Unit = this.delegate = delegate
22 |
23 | override def write(cbuf: Array[Char], off: Int, len: Int): Unit = delegate.write(cbuf, off, len)
24 |
25 | override def flush(): Unit = delegate.flush()
26 |
27 | override def close(): Unit = delegate.close()
28 | }
29 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/spec/DatabaseSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.spec
16 |
17 | import com.clickhouse.spark.ToJson
18 |
19 | import java.util
20 | import scala.collection.JavaConverters._
21 |
22 | case class DatabaseSpec(
23 | name: String,
24 | engine: String,
25 | data_path: String,
26 | metadata_path: String,
27 | uuid: String
28 | ) extends ToJson {
29 |
30 | def toMap: Map[String, String] = Map(
31 | "name" -> name,
32 | "engine" -> engine,
33 | "data_path" -> data_path,
34 | "metadata_path" -> metadata_path,
35 | "uuid" -> uuid
36 | )
37 |
38 | def toJavaMap: util.Map[String, String] = toMap.asJava
39 | }
40 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/spec/PartitionSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.spec
16 |
17 | import com.clickhouse.spark.ToJson
18 |
19 | case class PartitionSpec(
20 | partition_value: String,
21 | partition_id: String,
22 | row_count: Long,
23 | size_in_bytes: Long
24 | ) extends ToJson
25 |
26 | object NoPartitionSpec extends PartitionSpec("", "", 0, 0)
27 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/spec/ShardUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.spec
16 |
17 | import java.lang.{Long => JLong}
18 |
19 | object ShardUtils {
20 |
21 | def calcShard(cluster: ClusterSpec, hashVal: Long): ShardSpec = {
22 | val shards = cluster.shards.sorted
23 | val weights = shards.map(_.weight)
24 | val lowerBounds = weights.indices.map(i => weights.slice(0, i).sum)
25 | val upperBounds = weights.indices.map(i => weights.slice(0, i + 1).sum)
26 | val ranges = (lowerBounds zip upperBounds).map { case (l, u) => l until u }
27 | val rem = JLong.remainderUnsigned(hashVal, weights.sum)
28 | (shards zip ranges).find(_._2 contains rem).map(_._1).get
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/clickhouse-core/src/main/scala/com/clickhouse/spark/spec/TableEngineUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.spec
16 |
17 | import com.clickhouse.spark.Logging
18 | import com.clickhouse.spark.exception.CHClientException
19 | import com.clickhouse.spark.parse.{ParseException, ParseUtils}
20 |
21 | object TableEngineUtils extends Logging {
22 |
23 | def resolveTableEngine(tableSpec: TableSpec): TableEngineSpec = synchronized {
24 | try ParseUtils.parser.parseEngineClause(tableSpec.engine_full)
25 | catch {
26 | case cause: ParseException =>
27 | log.warn(s"Unknown table engine for table ${tableSpec.database}.${tableSpec.name}: ${tableSpec.engine_full}")
28 | UnknownTableEngineSpec(tableSpec.engine_full)
29 | }
30 | }
31 |
32 | def resolveTableCluster(distributedEngineSpec: DistributedEngineSpec, clusterSpecs: Seq[ClusterSpec]): ClusterSpec =
33 | clusterSpecs.find(_.name == distributedEngineSpec.cluster)
34 | .getOrElse(throw CHClientException(s"Unknown cluster: ${distributedEngineSpec.cluster}"))
35 | }
36 |
--------------------------------------------------------------------------------
/clickhouse-core/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License. See accompanying LICENSE file.
13 | #
14 |
15 | log4j.rootLogger=INFO, file
16 |
17 | log4j.appender.console=org.apache.log4j.ConsoleAppender
18 | log4j.appender.console.target=System.out
19 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
20 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss.SSS} %p %c: %m%n
21 |
22 | log4j.appender.file=org.apache.log4j.FileAppender
23 | log4j.appender.file.append=true
24 | log4j.appender.file.file=build/unit-tests.log
25 | log4j.appender.file.layout=org.apache.log4j.PatternLayout
26 | log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
27 |
28 | log4j.logger.org.apache.hadoop.util.Shell=ERROR
29 | log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR
30 | log4j.logger.com.clickhouse.spark=DEBUG
31 |
--------------------------------------------------------------------------------
/clickhouse-core/src/test/scala/com/clickhouse/spark/spec/NodeSpecHelper.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.spec
16 |
17 | trait NodeSpecHelper {
18 |
19 | val node_s1r1: NodeSpec = NodeSpec("s1r1")
20 | val node_s1r2: NodeSpec = NodeSpec("s1r2")
21 | val node_s2r1: NodeSpec = NodeSpec("s2r1")
22 | val node_s2r2: NodeSpec = NodeSpec("s2r2")
23 |
24 | val replica_s1r1: ReplicaSpec = ReplicaSpec(1, node_s1r1)
25 | val replica_s1r2: ReplicaSpec = ReplicaSpec(2, node_s1r2)
26 | val replica_s2r1: ReplicaSpec = ReplicaSpec(1, node_s2r1)
27 | val replica_s2r2: ReplicaSpec = ReplicaSpec(2, node_s2r2)
28 |
29 | val shard_s1: ShardSpec = ShardSpec(
30 | num = 1,
31 | weight = 1,
32 | replicas = Array(replica_s1r1, replica_s1r2) // sorted
33 | )
34 |
35 | val shard_s2: ShardSpec = ShardSpec(
36 | num = 2,
37 | weight = 2,
38 | replicas = Array(replica_s2r2, replica_s2r1) // unsorted
39 | )
40 |
41 | val cluster: ClusterSpec =
42 | ClusterSpec(
43 | name = "cluster-s2r2",
44 | shards = Array(shard_s2, shard_s1) // unsorted
45 | )
46 | }
47 |
--------------------------------------------------------------------------------
/clickhouse-core/src/test/scala/com/clickhouse/spark/spec/NodeSpecSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.spec
16 |
17 | import org.scalatest.funsuite.AnyFunSuite
18 |
19 | class NodeSpecSuite extends AnyFunSuite with NodeSpecHelper {
20 |
21 | test("nodes should be sorted") {
22 | assert(shard_s1.nodes.map(_.host) === Array("s1r1", "s1r2"))
23 | assert(shard_s2.nodes.map(_.host) === Array("s2r1", "s2r2"))
24 |
25 | assert(cluster.nodes.map(_.host) === Array("s1r1", "s1r2", "s2r1", "s2r2"))
26 | assert(cluster.nodes.map(_.host) === Array("s1r1", "s1r2", "s2r1", "s2r2"))
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/clickhouse-core/src/test/scala/com/clickhouse/spark/spec/ShardUtilsSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.spec
16 |
17 | import org.scalatest.funsuite.AnyFunSuite
18 |
19 | class ShardUtilsSuite extends AnyFunSuite with NodeSpecHelper {
20 |
21 | test("test calculate shard") {
22 | assert(ShardUtils.calcShard(cluster, 0).num === 1)
23 | assert(ShardUtils.calcShard(cluster, 1).num === 2)
24 | assert(ShardUtils.calcShard(cluster, 2).num === 2)
25 | assert(ShardUtils.calcShard(cluster, 3).num === 1)
26 | assert(ShardUtils.calcShard(cluster, 4).num === 2)
27 | assert(ShardUtils.calcShard(cluster, 5).num === 2)
28 | assert(ShardUtils.calcShard(cluster, 6).num === 1)
29 | assert(ShardUtils.calcShard(cluster, 7).num === 2)
30 | assert(ShardUtils.calcShard(cluster, 8).num === 2)
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/clickhouse-core/src/testFixtures/conf/clickhouse-cluster/.env:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | #
14 |
15 | CLICKHOUSE_IMAGE=clickhouse/clickhouse-server:23.8
16 | ZOOKEEPER_VERSION=3.6.3
17 |
--------------------------------------------------------------------------------
/clickhouse-core/src/testFixtures/conf/clickhouse-cluster/s1r1/interserver_http_host.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 | clickhouse-s1r1
17 |
--------------------------------------------------------------------------------
/clickhouse-core/src/testFixtures/conf/clickhouse-cluster/s1r1/macros.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 |
17 | 1
18 | 1
19 |
20 |
--------------------------------------------------------------------------------
/clickhouse-core/src/testFixtures/conf/clickhouse-cluster/s1r2/interserver_http_host.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 | clickhouse-s1r2
17 |
--------------------------------------------------------------------------------
/clickhouse-core/src/testFixtures/conf/clickhouse-cluster/s1r2/macros.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 |
17 | 1
18 | 2
19 |
20 |
--------------------------------------------------------------------------------
/clickhouse-core/src/testFixtures/conf/clickhouse-cluster/s2r1/interserver_http_host.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 | clickhouse-s2r1
17 |
--------------------------------------------------------------------------------
/clickhouse-core/src/testFixtures/conf/clickhouse-cluster/s2r1/macros.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 |
17 | 2
18 | 1
19 |
20 |
--------------------------------------------------------------------------------
/clickhouse-core/src/testFixtures/conf/clickhouse-cluster/s2r2/interserver_http_host.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 | clickhouse-s2r2
17 |
--------------------------------------------------------------------------------
/clickhouse-core/src/testFixtures/conf/clickhouse-cluster/s2r2/macros.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 |
17 | 2
18 | 2
19 |
20 |
--------------------------------------------------------------------------------
/clickhouse-core/src/testFixtures/conf/clickhouse-cluster/zookeeper.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 |
17 |
18 | zookeeper
19 | 2181
20 |
21 |
22 |
--------------------------------------------------------------------------------
/clickhouse-core/src/testFixtures/java/org/scalatest/tags/Cloud.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.scalatest.tags;
16 |
17 | import java.lang.annotation.*;
18 | import org.scalatest.TagAnnotation;
19 |
20 | @TagAnnotation
21 | @Retention(RetentionPolicy.RUNTIME)
22 | @Target({ElementType.METHOD, ElementType.TYPE})
23 | @Inherited
24 | public @interface Cloud {}
25 |
--------------------------------------------------------------------------------
/clickhouse-core/src/testFixtures/scala/com/clickhouse/spark/base/ClickHouseCloudMixIn.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.base
16 |
17 | import com.clickhouse.data.ClickHouseVersion
18 | import com.clickhouse.spark.Utils
19 |
20 | trait ClickHouseCloudMixIn extends ClickHouseProvider {
21 |
22 | override def clickhouseHost: String = Utils.load("CLICKHOUSE_CLOUD_HOST")
23 |
24 | override def clickhouseHttpPort: Int = Utils.load("CLICKHOUSE_CLOUD_HTTP_PORT", "8443").toInt
25 |
26 | override def clickhouseTcpPort: Int = Utils.load("CLICKHOUSE_CLOUD_TCP_PORT", "9000").toInt
27 |
28 | override def clickhouseUser: String = Utils.load("CLICKHOUSE_CLOUD_USER", "default")
29 |
30 | override def clickhousePassword: String = Utils.load("CLICKHOUSE_CLOUD_PASSWORD")
31 |
32 | override def clickhouseDatabase: String = "default"
33 |
34 | override def clickhouseVersion: ClickHouseVersion = ClickHouseVersion.of("latest")
35 |
36 | override def isSslEnabled: Boolean = true
37 | override def isCloud: Boolean = true
38 | }
39 |
--------------------------------------------------------------------------------
/clickhouse-core/src/testFixtures/scala/com/clickhouse/spark/base/ClickHouseProvider.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.base
16 |
17 | import com.clickhouse.client.ClickHouseProtocol
18 | import com.clickhouse.client.ClickHouseProtocol.HTTP
19 | import com.clickhouse.data.ClickHouseVersion
20 | import com.clickhouse.spark.Utils
21 | import com.clickhouse.spark.client.NodeClient
22 | import com.clickhouse.spark.spec.NodeSpec
23 | import scala.collection.JavaConverters._
24 |
25 | trait ClickHouseProvider {
26 | def clickhouseHost: String
27 | def clickhouseHttpPort: Int
28 | def clickhouseTcpPort: Int
29 | def clickhouseUser: String
30 | def clickhousePassword: String
31 | def clickhouseDatabase: String
32 | def clickhouseVersion: ClickHouseVersion
33 | def isSslEnabled: Boolean
34 | def isCloud: Boolean = false
35 |
36 | def withNodeClient(protocol: ClickHouseProtocol = HTTP)(block: NodeClient => Unit): Unit =
37 | Utils.tryWithResource {
38 | NodeClient(NodeSpec(
39 | clickhouseHost,
40 | Some(clickhouseHttpPort),
41 | Some(clickhouseTcpPort),
42 | protocol,
43 | username = clickhouseUser,
44 | database = clickhouseDatabase,
45 | password = clickhousePassword,
46 | options = Map("ssl" -> isSslEnabled.toString).asJava
47 | ))
48 | } {
49 | client => block(client)
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/dev/backport:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 |
16 | function usage {
17 | set +x
18 | echo "./dev/backport - Tool for back port patch"
19 | echo ""
20 | echo "Usage:"
21 | echo "+----------------------------------------------------+"
22 | echo "| ./dev/backport |"
23 | echo "+----------------------------------------------------+"
24 | echo "commit-id: - git commit hash id"
25 | echo "spark-from: - options: 3.4, 3.5"
26 | echo "spark-to: - options: 3.3, 3.4"
27 | echo ""
28 | }
29 |
30 | function exit_with_usage {
31 | usage
32 | exit 1
33 | }
34 |
35 | if [[ $# -eq 3 ]]; then
36 | COMMIT_ID=$1
37 | SPARK_FROM=$2
38 | SPARK_TO=$3
39 | else
40 | exit_with_usage
41 | fi
42 |
43 | echo "Backport $COMMIT_ID from spark-$SPARK_FROM to spark-$SPARK_TO - `git log $COMMIT_ID --pretty="%s" -1`"
44 |
45 | PROJECT_DIR="$(cd "`dirname "$0"`/.."; pwd)"
46 | mkdir -p "${PROJECT_DIR}/tmp"
47 | git format-patch $COMMIT_ID -1 --stdout -- "spark-$SPARK_FROM/" > "${PROJECT_DIR}/tmp/$COMMIT_ID-$SPARK_FROM.patch"
48 | cp "${PROJECT_DIR}/tmp/$COMMIT_ID-$SPARK_FROM.patch" "${PROJECT_DIR}/tmp/$COMMIT_ID-$SPARK_TO.patch"
49 | SED_I="sed -i"
50 | if [[ $(uname) == 'Darwin' ]]; then
51 | SED_I="sed -i .bak"
52 | fi
53 | $SED_I "s/spark-$SPARK_FROM/spark-$SPARK_TO/g" "${PROJECT_DIR}/tmp/$COMMIT_ID-$SPARK_TO.patch"
54 | git apply "${PROJECT_DIR}/tmp/$COMMIT_ID-$SPARK_TO.patch"
55 |
--------------------------------------------------------------------------------
/dev/reformat:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 |
16 | set -x
17 |
18 | PROJECT_DIR="$(cd "`dirname "$0"`/.."; pwd)"
19 | ${PROJECT_DIR}/gradlew spotlessApply -Dspark_binary_version=3.3
20 | ${PROJECT_DIR}/gradlew spotlessApply -Dspark_binary_version=3.4
21 | ${PROJECT_DIR}/gradlew spotlessApply -Dspark_binary_version=3.5
22 |
--------------------------------------------------------------------------------
/docker/.env:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | #
14 |
15 | # latest stable version
16 | PROJECT_VERSION=0.8.0
17 |
18 | CLICKHOUSE_IMAGE=clickhouse/clickhouse-server:23.8
19 | AWS_JAVA_SDK_VERSION=1.12.367
20 | CLICKHOUSE_JDBC_VERSION=0.6.3
21 | HADOOP_VERSION=3.3.6
22 | HIVE_VERSION=2.3.9
23 | ICEBERG_VERSION=1.6.0
24 | KYUUBI_VERSION=1.9.2
25 | KYUUBI_HADOOP_VERSION=3.3.6
26 | POSTGRES_VERSION=12
27 | POSTGRES_JDBC_VERSION=42.3.4
28 | SCALA_BINARY_VERSION=2.12
29 | SPARK_VERSION=3.5.2
30 | SPARK_BINARY_VERSION=3.5
31 | SPARK_HADOOP_VERSION=3.3.4
32 | ZOOKEEPER_VERSION=3.6.3
33 |
--------------------------------------------------------------------------------
/docker/.env-dev:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | #
14 |
15 | # current developing version
16 | PROJECT_VERSION=0.9.0-SNAPSHOT
17 |
18 | CLICKHOUSE_IMAGE=clickhouse/clickhouse-server:23.8
19 | AWS_JAVA_SDK_VERSION=1.12.367
20 | CLICKHOUSE_JDBC_VERSION=0.6.3
21 | HADOOP_VERSION=3.3.6
22 | HIVE_VERSION=2.3.9
23 | ICEBERG_VERSION=1.6.0
24 | KYUUBI_VERSION=1.9.2
25 | KYUUBI_HADOOP_VERSION=3.3.6
26 | POSTGRES_VERSION=12
27 | POSTGRES_JDBC_VERSION=42.3.4
28 | SCALA_BINARY_VERSION=2.12
29 | SPARK_VERSION=3.5.2
30 | SPARK_BINARY_VERSION=3.5
31 | SPARK_HADOOP_VERSION=3.3.4
32 | ZOOKEEPER_VERSION=3.6.3
33 |
--------------------------------------------------------------------------------
/docker/conf/cloudbeaver-conf/README.md:
--------------------------------------------------------------------------------
1 | ## Generate Admin Password MD5
2 | ```
3 | CB_ADMIN_PASSWORD_MD5=`echo -n "$CB_ADMIN_PASSWORD" | md5sum | tr 'a-z' 'A-Z'`
4 | CB_ADMIN_PASSWORD_MD5=${PASSWORD_MD5:0:32}
5 | ```
6 |
7 | ## Authenticate as Admin
8 | ```
9 | curl 'http://0.0.0.0:8978/api/gql' \
10 | -X POST \
11 | -H 'content-type: application/json' \
12 | --cookie-jar /tmp/cookie.txt \
13 | --data '{
14 | "query": "
15 | query authLogin($provider: ID!, $credentials: Object!, $linkUser: Boolean) {
16 | authToken: authLogin(
17 | provider: $provider
18 | credentials: $credentials
19 | linkUser: $linkUser
20 | ) {
21 | authProvider
22 | }
23 | }
24 | ",
25 | "variables": {
26 | "provider": "local",
27 | "credentials": {
28 | "user": "kyuubi",
29 | "password": "4E212BBF8F138808DB96B969716D1580"
30 | },
31 | "linkUser": true
32 | }
33 | }'
34 | ```
35 |
36 | ## Expose Connection to Anonymous
37 | ```
38 | curl 'http://0.0.0.0:8978/api/gql' \
39 | -X POST \
40 | -H 'content-type: application/json' \
41 | --cookie /tmp/cookie.txt \
42 | --data '{
43 | "query": "
44 | query setConnectionAccess($connectionId: ID!, $subjects: [ID!]!) {
45 | setConnectionSubjectAccess(connectionId: $connectionId, subjects: $subjects)
46 | }
47 | ",
48 | "variables": {
49 | "connectionId": "kyuubi_hive-180f13452e0-749c09a3cdb63869",
50 | "subjects": ["user"]
51 | }
52 | }'
53 | ```
54 |
--------------------------------------------------------------------------------
/docker/conf/cloudbeaver-conf/initial-data-sources.conf:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | #
14 |
15 | {
16 | "connections": {
17 | "kyuubi": {
18 | "provider": "generic",
19 | "driver": "kyuubi_hive",
20 | "name": "Kyuubi on Spark",
21 | "save-password": false,
22 | "show-system-objects": true,
23 | "read-only": false,
24 | "template": false,
25 | "configuration": {
26 | "host": "kyuubi",
27 | "port": "10009",
28 | "url": "jdbc:kyuubi://kyuubi:10009/",
29 | "auth-model": "native"
30 | }
31 | }
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/docker/conf/cloudbeaver-conf/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 |
17 |
18 |
19 | %d{dd-MM-yyyy HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/docker/conf/cloudbeaver-conf/product.conf:
--------------------------------------------------------------------------------
1 | // Licensed under the Apache License, Version 2.0 (the "License");
2 | // you may not use this file except in compliance with the License.
3 | // You may obtain a copy of the License at
4 | //
5 | // https://www.apache.org/licenses/LICENSE-2.0
6 | //
7 | // Unless required by applicable law or agreed to in writing, software
8 | // distributed under the License is distributed on an "AS IS" BASIS,
9 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | // See the License for the specific language governing permissions and
11 | // limitations under the License.
12 |
13 | // Product configuration. Customized web application behavior
14 | // It is in JSONC format
15 | {
16 | // Global properties
17 | core: {
18 | // User defaults
19 | user: {
20 | defaultTheme: "light",
21 | defaultLanguage: "en"
22 | },
23 | app: {
24 | // Log viewer config
25 | logViewer: {
26 | refreshTimeout: 3000,
27 | logBatchSize: 1000,
28 | maxLogRecords: 2000,
29 | maxFailedRequests: 3
30 | }
31 | }
32 | },
33 | // Notifications config
34 | core_events: {
35 | notificationsPool: 20
36 | },
37 | plugin_data_spreadsheet_new: {
38 | hidden: false
39 | },
40 | plugin_data_export: {
41 | disabled: false
42 | },
43 | sql.proposals.insert.table.alias: true
44 | }
45 |
--------------------------------------------------------------------------------
/docker/conf/core-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 |
17 |
18 | fs.s3a.access.key
19 | minio
20 |
21 |
22 | fs.s3a.secret.key
23 | minio_minio
24 |
25 |
26 | fs.s3a.connection.ssl.enabled
27 | false
28 |
29 |
30 | fs.s3a.path.style.access
31 | true
32 |
33 |
34 | fs.s3a.endpoint
35 | http://minio:9000
36 |
37 |
38 | fs.s3a.committer.name
39 | magic
40 |
41 |
42 | fs.defaultFS
43 | s3a://spark-bucket/
44 |
45 |
46 |
--------------------------------------------------------------------------------
/docker/conf/hive-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 |
17 |
18 | javax.jdo.option.ConnectionURL
19 | jdbc:postgresql://postgres:5432/metastore
20 |
21 |
22 | javax.jdo.option.ConnectionDriverName
23 | org.postgresql.Driver
24 |
25 |
26 | javax.jdo.option.ConnectionUserName
27 | postgres
28 |
29 |
30 | javax.jdo.option.ConnectionPassword
31 | postgres
32 |
33 |
34 | hive.metastore.warehouse.dir
35 | s3a://spark-bucket/warehouse
36 | location of default database for the warehouse
37 |
38 |
39 | hive.metastore.uris
40 | thrift://metastore:9083
41 |
42 |
43 |
--------------------------------------------------------------------------------
/docker/conf/kyuubi-defaults.conf:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed to the Apache Software Foundation (ASF) under one or more
3 | # contributor license agreements. See the NOTICE file distributed with
4 | # this work for additional information regarding copyright ownership.
5 | # The ASF licenses this file to You under the Apache License, Version 2.0
6 | # (the "License"); you may not use this file except in compliance with
7 | # the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 |
18 | ## Kyuubi Configurations
19 |
20 | kyuubi.authentication=NONE
21 | kyuubi.frontend.bind.host=0.0.0.0
22 | kyuubi.frontend.bind.port=10009
23 | kyuubi.ha.zookeeper.quorum=zookeeper:2181
24 | kyuubi.operation.progress.enabled=true
25 |
26 | kyuubi.engine.session.initialize.sql \
27 | show namespaces in tpcds; \
28 | show namespaces in postgres; \
29 | show namespaces in clickhouse_s1r1; \
30 | show namespaces in clickhouse_s1r2; \
31 | show namespaces in clickhouse_s2r1; \
32 | show namespaces in clickhouse_s2r2
33 |
--------------------------------------------------------------------------------
/docker/image/scc-base.Dockerfile:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | #
14 |
15 | FROM eclipse-temurin:8-focal
16 | LABEL org.opencontainers.image.authors="Cheng Pan"
17 |
18 | RUN set -x && \
19 | ln -snf /usr/bin/bash /usr/bin/sh && \
20 | apt-get update -q && \
21 | apt-get install -yq retry busybox && \
22 | rm -rf /var/lib/apt/lists/* && \
23 | mkdir /opt/busybox && \
24 | busybox --install /opt/busybox
25 |
26 | ENV PATH=${PATH}:/opt/busybox
27 |
--------------------------------------------------------------------------------
/docker/image/scc-hadoop.Dockerfile:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | #
14 |
15 | ARG PROJECT_VERSION
16 |
17 | FROM pan3793/scc-base:${PROJECT_VERSION}
18 |
19 | ARG AWS_JAVA_SDK_VERSION
20 | ARG HADOOP_VERSION
21 |
22 | ARG APACHE_MIRROR
23 | ARG MAVEN_MIRROR
24 |
25 | ENV HADOOP_HOME=/opt/hadoop
26 | ENV HADOOP_CONF_DIR=/etc/hadoop/conf
27 |
28 | RUN set -x && \
29 | if [ $(uname -m) = "aarch64" ]; then HADOOP_TAR_NAME=hadoop-${HADOOP_VERSION}-aarch64; else HADOOP_TAR_NAME=hadoop-${HADOOP_VERSION}; fi && \
30 | wget -q ${APACHE_MIRROR}/hadoop/core/hadoop-${HADOOP_VERSION}/${HADOOP_TAR_NAME}.tar.gz && \
31 | tar -xzf ${HADOOP_TAR_NAME}.tar.gz -C /opt && \
32 | ln -s /opt/hadoop-${HADOOP_VERSION} ${HADOOP_HOME} && \
33 | rm ${HADOOP_TAR_NAME}.tar.gz && \
34 | HADOOP_CLOUD_STORAGE_JAR_NAME=hadoop-cloud-storage && \
35 | wget -q ${MAVEN_MIRROR}/org/apache/hadoop/${HADOOP_CLOUD_STORAGE_JAR_NAME}/${HADOOP_VERSION}/${HADOOP_CLOUD_STORAGE_JAR_NAME}-${HADOOP_VERSION}.jar -P ${HADOOP_HOME}/share/hadoop/hdfs/lib && \
36 | HADOOP_AWS_JAR_NAME=hadoop-aws && \
37 | wget -q ${MAVEN_MIRROR}/org/apache/hadoop/${HADOOP_AWS_JAR_NAME}/${HADOOP_VERSION}/${HADOOP_AWS_JAR_NAME}-${HADOOP_VERSION}.jar -P ${HADOOP_HOME}/share/hadoop/hdfs/lib && \
38 | AWS_JAVA_SDK_BUNDLE_JAR_NAME=aws-java-sdk-bundle && \
39 | wget -q ${MAVEN_MIRROR}/com/amazonaws/${AWS_JAVA_SDK_BUNDLE_JAR_NAME}/${AWS_JAVA_SDK_VERSION}/${AWS_JAVA_SDK_BUNDLE_JAR_NAME}-${AWS_JAVA_SDK_VERSION}.jar -P ${HADOOP_HOME}/share/hadoop/hdfs/lib
40 |
--------------------------------------------------------------------------------
/docker/image/scc-kyuubi.Dockerfile:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | #
14 |
15 | ARG PROJECT_VERSION
16 |
17 | FROM pan3793/scc-spark:${PROJECT_VERSION}
18 |
19 | ARG AWS_JAVA_SDK_VERSION
20 | ARG KYUUBI_HADOOP_VERSION
21 | ARG KYUUBI_VERSION
22 |
23 | ARG APACHE_MIRROR
24 | ARG MAVEN_MIRROR
25 |
26 | ENV KYUUBI_HOME=/opt/kyuubi
27 | ENV KYUUBI_CONF_DIR=/etc/kyuubi/conf
28 |
29 | RUN set -x && \
30 | wget -q ${APACHE_MIRROR}/kyuubi/kyuubi-${KYUUBI_VERSION}/apache-kyuubi-${KYUUBI_VERSION}-bin.tgz && \
31 | tar -xzf apache-kyuubi-${KYUUBI_VERSION}-bin.tgz -C /opt && \
32 | ln -s /opt/apache-kyuubi-${KYUUBI_VERSION}-bin ${KYUUBI_HOME} && \
33 | rm apache-kyuubi-${KYUUBI_VERSION}-bin.tgz && \
34 | HADOOP_CLOUD_STORAGE_JAR_NAME=hadoop-cloud-storage && \
35 | wget -q ${MAVEN_MIRROR}/org/apache/hadoop/${HADOOP_CLOUD_STORAGE_JAR_NAME}/${KYUUBI_HADOOP_VERSION}/${HADOOP_CLOUD_STORAGE_JAR_NAME}-${KYUUBI_HADOOP_VERSION}.jar -P ${KYUUBI_HOME}/jars && \
36 | HADOOP_AWS_JAR_NAME=hadoop-aws && \
37 | wget -q ${MAVEN_MIRROR}/org/apache/hadoop/${HADOOP_AWS_JAR_NAME}/${KYUUBI_HADOOP_VERSION}/${HADOOP_AWS_JAR_NAME}-${KYUUBI_HADOOP_VERSION}.jar -P ${KYUUBI_HOME}/jars && \
38 | AWS_JAVA_SDK_BUNDLE_JAR_NAME=aws-java-sdk-bundle && \
39 | wget -q ${MAVEN_MIRROR}/com/amazonaws/${AWS_JAVA_SDK_BUNDLE_JAR_NAME}/${AWS_JAVA_SDK_VERSION}/${AWS_JAVA_SDK_BUNDLE_JAR_NAME}-${AWS_JAVA_SDK_VERSION}.jar -P ${KYUUBI_HOME}/jars && \
40 | useradd anonymous
41 |
42 | ENTRYPOINT ["/opt/kyuubi/bin/kyuubi", "run"]
43 |
--------------------------------------------------------------------------------
/docker/image/scc-metastore.Dockerfile:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | #
14 |
15 | ARG PROJECT_VERSION
16 |
17 | FROM pan3793/scc-hadoop:${PROJECT_VERSION}
18 |
19 | ARG HIVE_VERSION
20 |
21 | ARG APACHE_MIRROR
22 |
23 | ENV HIVE_HOME=/opt/hive
24 | ENV HIVE_CONF_DIR=/etc/hive/conf
25 |
26 | RUN set -x && \
27 | wget -q https://archive.apache.org/dist/hive/hive-${HIVE_VERSION}/apache-hive-${HIVE_VERSION}-bin.tar.gz && \
28 | tar -xzf apache-hive-${HIVE_VERSION}-bin.tar.gz -C /opt && \
29 | ln -s /opt/apache-hive-${HIVE_VERSION}-bin ${HIVE_HOME} && \
30 | rm apache-hive-${HIVE_VERSION}-bin.tar.gz
31 |
32 | ENTRYPOINT ["/opt/hive/bin/hive", "--service", "metastore"]
33 |
--------------------------------------------------------------------------------
/docs/best_practices/01_deployment.md:
--------------------------------------------------------------------------------
1 | ---
2 | license: |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ---
15 |
16 | Deployment
17 | ===
18 |
19 | ## Jar
20 |
21 | Put `clickhouse-spark-runtime-{{ spark_binary_version }}_{{ scala_binary_version }}-{{ stable_version }}.jar` and
22 | `clickhouse-jdbc-{{ clickhouse_jdbc_version }}-all.jar` into `$SPARK_HOME/jars/`, then you don't need to bundle the jar
23 | into your Spark application, and `--jar` is not required when using `spark-shell` or `spark-sql`(again, for SQL-only
24 | use cases, [Apache Kyuubi](https://github.com/apache/kyuubi) is recommended for Production).
25 |
26 | ## Configuration
27 |
28 | Persist catalog configurations into `$SPARK_HOME/conf/spark-defaults.conf`, then `--conf`s are not required when using
29 | `spark-shell` or `spark-sql`.
30 |
31 | ```
32 | spark.sql.catalog.ck_01=com.clickhouse.spark.ClickHouseCatalog
33 | spark.sql.catalog.ck_01.host=10.0.0.1
34 | spark.sql.catalog.ck_01.protocol=http
35 | spark.sql.catalog.ck_01.http_port=8123
36 | spark.sql.catalog.ck_01.user=app
37 | spark.sql.catalog.ck_01.password=pwd
38 | spark.sql.catalog.ck_01.database=default
39 |
40 | spark.sql.catalog.ck_02=com.clickhouse.spark.ClickHouseCatalog
41 | spark.sql.catalog.ck_02.host=10.0.0.2
42 | spark.sql.catalog.ck_02.protocol=http
43 | spark.sql.catalog.ck_02.http_port=8123
44 | spark.sql.catalog.ck_02.user=app
45 | spark.sql.catalog.ck_02.password=pwd
46 | spark.sql.catalog.ck_02.database=default
47 | ```
48 |
--------------------------------------------------------------------------------
/docs/best_practices/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | license: |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ---
15 |
16 | TODO
17 | ===
18 |
--------------------------------------------------------------------------------
/docs/configurations/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | hide:
3 | - navigation
4 | license: |
5 | Licensed under the Apache License, Version 2.0 (the "License");
6 | you may not use this file except in compliance with the License.
7 | You may obtain a copy of the License at
8 |
9 | https://www.apache.org/licenses/LICENSE-2.0
10 |
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 | ---
17 |
18 | Configurations
19 | ===
20 |
21 | ## Catalog Configurations
22 |
23 | {!
24 | include-markdown "./01_catalog_configurations.md"
25 | start=""
26 | end=""
27 | !}
28 |
29 | ## SQL Configurations
30 |
31 | SQL Configurations could be overwritten by `SET =` in runtime.
32 |
33 | {!
34 | include-markdown "./02_sql_configurations.md"
35 | start=""
36 | end=""
37 | !}
38 |
--------------------------------------------------------------------------------
/docs/developers/01_build_and_test.md:
--------------------------------------------------------------------------------
1 | ---
2 | license: |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ---
15 |
16 | Build and Test
17 | ===
18 |
19 | ## Build
20 |
21 | Check out source code from GitHub
22 |
23 | ```
24 | git checkout https://github.com/ClickHouse/spark-clickhouse-connector.git
25 | ```
26 |
27 | Build w/o test
28 |
29 | ```shell
30 | ./gradlew clean build -x test
31 | ```
32 |
33 | Go to `spark-{{ spark_binary_version }}/clickhouse-spark-runtime/build/libs/` to find the output jar
34 | `clickhouse-spark-runtime-{{ spark_binary_version }}_{{ scala_binary_version }}-{{ version }}.jar`.
35 |
36 | ## Test
37 |
38 | The project leverage [Testcontainers](https://www.testcontainers.org/) and [Docker Compose](https://docs.docker.com/compose/)
39 | to do integration tests, you should install [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/)
40 | before running test, and check more details on [Testcontainers document](https://www.testcontainers.org/) if you'd
41 | like to run test with remote Docker daemon.
42 |
43 | Run all test
44 |
45 | `./gradlew clean test`
46 |
47 | Run single test
48 |
49 | `./gradlew test --tests=ConvertDistToLocalWriteSuite`
50 |
51 | Test against custom ClickHouse image
52 |
53 | `CLICKHOUSE_IMAGE=custom-org/clickhouse-server:custom-tag ./gradlew test`
54 |
--------------------------------------------------------------------------------
/docs/developers/02_docs_and_website.md:
--------------------------------------------------------------------------------
1 | ---
2 | license: |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ---
15 |
16 | Docs and Website
17 | ===
18 |
19 | ## Setup Python
20 |
21 | Follow the [Python official document](https://wiki.python.org/moin/BeginnersGuide) to install.
22 |
23 | ## Setup `pyenv` on macOS (optional)
24 |
25 | Optionally, recommend to manage Python environments by [pyenv](https://github.com/pyenv/pyenv).
26 |
27 | Install from Homebrew
28 |
29 | ```bash
30 | brew install pyenv pyenv-virtualenv
31 | ```
32 |
33 | Setup in `~/.zshrc`
34 |
35 | ```bash
36 | eval "$(pyenv init -)"
37 | eval "$(pyenv virtualenv-init -)"
38 | ```
39 |
40 | Install `virtualenv`
41 |
42 | ```bash
43 | pyenv install 3.9.13
44 | pyenv virtualenv 3.9.13 scc
45 | ```
46 |
47 | Localize `virtualenv`
48 |
49 | ```bash
50 | pyenv local scc
51 | ```
52 |
53 | ## Install dependencies
54 |
55 | ```bash
56 | pip install -r requirements.txt
57 | ```
58 |
59 | ## Preview website
60 |
61 | ```
62 | mkdocs serve
63 | ```
64 |
65 | Open [http://127.0.0.1:8000/](http://127.0.0.1:8000/) in browser.
66 |
--------------------------------------------------------------------------------
/docs/developers/03_private_release.md:
--------------------------------------------------------------------------------
1 | ---
2 | license: |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ---
15 |
16 | Private Release
17 | ===
18 |
19 | !!! tip
20 |
21 | Internal Release means deploying to private Nexus Repository. Please make sure you are granted to access your
22 | company private Nexus Repository.
23 |
24 | ### Repository and Authentication
25 |
26 | Configure Gradle in `~/.gradle/gradle.properties`.
27 |
28 | ```
29 | mavenUser=xxx
30 | mavenPassword=xxx
31 | mavenReleasesRepo=xxx
32 | mavenSnapshotsRepo=xxx
33 | ```
34 |
35 | ### Upgrade Version
36 |
37 | Modify version in `version.txt` and `docker/.env-dev`
38 |
39 | ### Build and Deploy
40 |
41 | Publish to Maven Repository using `./gradlew publish`
42 |
--------------------------------------------------------------------------------
/docs/developers/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | license: |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ---
15 |
16 | TODO
17 | ===
18 |
--------------------------------------------------------------------------------
/docs/imgs/scc_overview.drawio:
--------------------------------------------------------------------------------
1 | 7Vtbd5s4EP41fqwPIG5+9K3JyWm72SZ7urtvBFRDghEVcmzvr98RCGMQju34Jp8mDzEaDbL45hvNSIM7aDhd3FAvDb+SAMcdQwsWHTTqGIauazZ8cMmykLiOEExoFAilSvAQ/YeFUBPSWRTgrKbICIlZlNaFPkkS7LOazKOUzOtqP0lc/9bUm2BJ8OB7sSz9EQUsFE9haZX8FkeTkK0eWPRMvVJZCLLQC8h8TYTGHTSkhLDiaroY4piDV+JS3Pd5Q+9qYhQnbJcbRv2bOPk+H6dJP3Lm5OvL091fn/gNfJhXL56JJxazZcsSAkpmSYD5KFoHDeZhxPBD6vm8dw5GB1nIpjG0dLj8GcXxkMSEQjshCSgN5ImKub9iyvBiTSQmfoPJFDO6BJWyt0RVsMhyRXte2WQlC9ft4QihJ3gwWY1dQQUXAq19kLOuAzm7jpzRQxJyptOCHEB3IuT0fYDTtwN3DJSMOkq2JvNrpbOOUu9U9ELqgWTYqoFkqgeSqakG0l7r1HlAskzVQNJ1CRQcQCIgmoSykExI4sXjSjqoL++VzhdCUgHWM2ZsKbIab8ZIHUpAkC7/5vd3rbL5jxgub4wWtdZStDaaICMz6uM3nlMkX8yjE8y2r9EcgzcNSnHssei1niy1WSe/tU+pt1xTSEmUsGxt5HsuWFtxrEbYtxopTkNfd9/Uh4tiBhVPVo9yAHVkf/qtqYOukjrIuAR10Ad1WuL5lVHHtC9BHfMS1Dk9BayrpIDd3J+egwK2lNiNF9gHm1E5mTn7hqpXB8REO2Z49skyPOv3WGudqwrTqHcFYdrZ6GiX30ohpJyj2dfkaBk4Cuvzg+LO9mO3CyUw2zOO5ilnMVFxV2XPfb1zr8xGuttqUNPSG7Ms1jJplsdyW3ej28pp99mPiRzl3LZ3TW77/vio7xogHSUCZNOJyvPFjQGymXhpZwiQJVHfOmNUeneyMyncY5PisBrURU4tzwCzfZW+19w1nsX3dDk7HVGAhCoX45B+6RhntFXV7Jjx6ivJLVnBZf+akbLjU5b7Qh8UDDNd5PCU/XA14Z/DOPJfbsksw6A1jGcZ4yYoBofJFuMXqpJlAFBWh59i+ErvKVfgbsq9sJhE3u3F0SSBax+sA1+DBtwqke/FfdExjYIg93bBcfgaa9CxRu115oxR8oIbwvo6cQQ+NIusTgsfjDY+GCfjQ1sBcT8+6O4GPvxLAFKcchLsS4e9HJWPJJZqeJxTvUZgNpY2R7Nk2+kttkMns518/HU0Xwa06Qso9NMUvBoiDEk+XLnmyo1CG2rZvhjaWV25JQxGAEb0NGPw5Ib2mJvg0jHRMu2u69Sws7WeHBY1u1u+YHQeb5I3yyrCZwIurqsgfL2rgM+w1YQPyftHFeHTDQ6fVv3pCkIpv133hUA0qUAs4tgTLUNYFno00PPNVB7tFCgebcHZdLpy9rERaetUSBuHIi0fkqiwICiCrrxV2wVdQyUet0UrRdCVNz57ont57ralUoqg2/bK5ZG2lbePj/cwqcn3e5gbBBHbm3Joi/98y5KXsozPdw9/fIMPzPzux9al7pSoxhnLaNm6tL4Qf4SQ/ec0fB3cPeuj5IfxHN0t/Wzg7PI+/FkqMUFrGbRxBvu+g1+33SQHlkGl89nm68FHKoNKhcyDyqDSLN9dBoVm9fOhQr36ERYa/w8=
--------------------------------------------------------------------------------
/docs/imgs/scc_overview.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/docs/imgs/scc_overview.drawio.png
--------------------------------------------------------------------------------
/docs/imgs/scc_read_bucket_join.drawio:
--------------------------------------------------------------------------------
1 | 7Vpdc5s4FP01zGwfnAGJD/sxdtK02XS3E+9Od/uyo4AKWsuIChHb/fUrgYBgkQ9v7ZikfTK6uhLinHMv0sUWnC3XFxxlyQcWYWoBO1pb8MwCwHFsX/4oy6ayjANtiDmJtFNrmJNvWBttbS1IhPOOo2CMCpJ1jSFLUxyKjg1xzlZdty+Mdu+aoRgbhnmIqGn9RCKR6Kfw7Nb+DpM4Ec0D654lqp21IU9QxFZ3TPDcgjPOmKiulusZpgq8Gpdq3Nt7epuFcZyKpwxYf7ahWK3ejt6PRsX1p4tF9jscwUk1zS2ihX5ivVqxqSHgrEgjrGaxLThdJUTgeYZC1buSpEtbIpZUthx5+YVQOmOUcdlOWSqdpuZC9dpvMRd4fcekF36B2RILvpEuda+rQdQqAnV71XLijbUtucMHqIlCWgdxM3cLlbzQaO2AnPMigfOeCpwTHAo4EBhA4UjGnG4yLhIWsxTR89Y67ULZ+lwxlmkA/8VCbHQCQYVgXXjxmoi/1PATT7f+1pOp67P13cZGN+7FP2cFD/FDz6gzFeIxFg+JSCdDBcCDdHJMkSC33aS0d2qg85OahppgUNSAXdKN83i62Udu8bdyi2/mFgf05JbJwVLL+PXrF+5blnroR0bkUhpyG5I0ue72+6AKID1qi7dmGd+Rioand2gPTu+T169398fQuzs8vbvbe8ej6x3aBiivTu/ej6F3b3h6b85Bg9G73wOST4U6LbKSzBYt/2vB6o5RXkr5VDoAN1uX6NT98ipWvzNKwsU7VuRYes1okQvM68nlWqv5K1eDGImn6KLPsbwluikdlPxVEFWLKLsRJXEqr0NJjrwNnCpSSIjoqe5YkigqgzVTeith9KaWd9Z/Ls4FZwu8ZeyG+R7ksL299QNTDqBPDuBQcgi+Ww7O+B45fGYSUZwpDeyqhp3CVM2kEy1wD1b1cLeSm+97JnVOD3XwUNSNDerOiJQxuSmEhA7Yf5TBc/wM6J+Mg60kODGToO2fAO8Z0TOLlENEz3UleuPhoVcLfeDwQXug8JmV3iHC5/gDhc+sXF0x+epvgateMze8fsPkCeKRU25xM7lLQWZRcgDY+nZwYr5U7kXXOxS6Zp1kR3SPv+/uCfyhoGueyp+CLhiSdnveSkNB1zwD7oju8bXbs2N6fnTJ7W/R9Z/R5sPi4+XXX9OLxaW/GJnKe4ZSidVXu/h/dZD609ijH2qg38/Q83yXccwz+jxEqandX+RRm/8j5wIztY4okmfm/I3pl6A8qSATRBCWkjTuDgU9gxiPMO942qfz2Zujh4cz8Tqx4To9JZVJz0HM38N2pDcszJTxwsIieBlhYdYqnhIWpY5l67XHhRscMS4ekEuHsPdpWtaDLhnpI+7KArCFtRxiX7e244MMYRfk5vvd0UB2zZP43gq6Ekq+UPrOyq2RCpGf9dxHDnGgp8IP7P2UdGWz/W9f9dWk/YckPP8P
--------------------------------------------------------------------------------
/docs/imgs/scc_read_bucket_join.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/docs/imgs/scc_read_bucket_join.drawio.png
--------------------------------------------------------------------------------
/docs/imgs/scc_read_pushdown_disable.drawio:
--------------------------------------------------------------------------------
1 | 7Zrbcps6FIafxjPthTOAANuXsZ2k56ZJOt3tzR4FFFCMERXCh/30W4AAg7AdT3xQk14ZLQlZfOvXQlqiA0bTxRWFkf+ZuCjoGJq76IBxxzB0XbP5T2pZ5pZ+Txg8il3RqDLc4v+QMGrCmmAXxbWGjJCA4ahudEgYIofVbJBSMq83eyBB/V8j6CHJcOvAQLb+wC7zxVNYWmV/h7Dns/KBRc0UFo2FIfahS+YrJnDRASNKCMuvposRClJ4BZf8vss1teXAKArZU25Y/NIAm88vu++73eTmx9Uk+gq6YJB3M4NBIp5YjJYtCwSUJKGL0l60DhjOfczQbQSdtHbOnc5tPpsGvKTzywccBCMSEMrLIQl5o6E8UDH2GaIMLVZMYuBXiEwRo0vepKg1BcRCRaI4r1xiFTZ/xR1mISMoZOCVXVek+IWAtQM4/Y/kZplPBKf3DgXO6EmgkMunnCgSynzikRAGF5V1WEdZtflESCQAPiLGliJ+wISROl60wOyf9PYzS5R+is7S6/FitbAUhbX8Y5JQB216RhGoIPUQ2yQiEQtTABvdSVEAGZ7VY9LeXQP0V+Oa/REXt14TzIdSTjyrV594ZnM+5doQdzX8Vg7jGbNsl/Ckbw9P+4hFdiMW2XIs0o2WWDQ4WCjqv3y9g+PovXTSifQO1NM70JTT++Dl6918HXo31dO72VxrnlzvQJOgvDi9W69D75Z6ei/3Tcro3W6BZAcs3V2SzJkVLft3QoqKbpxJ+Zw3MMxokdEp6vmVl/6OAuxM3pEkRrzVKEhihmjROR9r3n/eVHIM58nq9Cnifwnvswap/NNJlA8iq4YB9kJ+7XDn8L8Bw9Qp2IHBuaiYYtfNJmuU6i3DaA071rh9Hx0zSiaoYaxP8z3Iobm8tXuyHIw2ORiHkkPv2XLQ+2vk8ItwoihKNbCrGnaapmlPItAa5sGyJGYjuNm2JbtOb3EdOJTr+pLrxpjLGN8njKMztLts8pw+Atpn/V4jCA7kIKjZZ4Z1RHpyTlNFeqbJ6fXVo1cIXXF8QFMUn5wZVhGfbiuKT85cfSL81V+By18z97R4w8Q+pK6eLXEjvkqBchJTAba21juTXypr6VqHoivnSXake/p1d8vEV4WuvCt/Cl1DJe22vJVUoSvvAXeke3rttqyYjk8Xz764N9/d5efJ9YffH8OryQd70pWVd4RUyVqiW9MexcnZ1iM2oNYRmy7v0W8dGMrafcO32vRf3pcxSsfhunzPHL89uYCNRpKvPGBf0S5oO1C297BgWI4v8c8HnT7OvvlR5LrOxY35lMy/0koGLUpufVBdKSEDWcjXlDymn+Cs1fLp5QuK+CnkC4yWnN3gQPJtT+TLu629Je04SjrhDc6j7PXHMAn/5uy2LNTL77dWcz/agdJ2rfO8LW2naEDbFKe2xrO9n1w8i7q8a7+GXMDZ8vLc8yjyIGtZYr4Zff3+5Y63UTbElVv1Y4S4TY5eQftHA9VbvqHbF1BerD4Izc/Oqs9qwcX/
--------------------------------------------------------------------------------
/docs/imgs/scc_read_pushdown_disable.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/docs/imgs/scc_read_pushdown_disable.drawio.png
--------------------------------------------------------------------------------
/docs/imgs/scc_read_pushdown_enable.drawio:
--------------------------------------------------------------------------------
1 | 7ZrbcpswEIafxjPthT2AODiXPuTQmbTpNOmkzU1HAQVUy4gKEdt9+goQYBB244kd06RXRqtFFt/+WrSye2AyX54zGAUfqYdIz9C8ZQ9Me4ah65otPlLLKrcMHWnwGfakU2W4xr+RNGrSmmAPxTVHTinhOKobXRqGyOU1G2SMLupuD5TUvzWCPlIM1y4kqvUWezyQT2Fplf0CYT/g5QPLnjksnKUhDqBHF2smcNoDE0Ypz6/mywkiKbyCS37f2YbecmIMhfwpNyzvNMAXi7P+h34/+XJ7PouuQB+c5MM8QpLIJ5az5asCAaNJ6KF0FK0HxosAc3QdQTftXYigC1vA50S0dHH5gAmZUEKZaIc0FE5jdaJy7o+IcbRcM8mJnyM6R5ythEvRa0qIUkXAlu1FFRNrKG3BejyANEKpA78cu0IlLiStHcjp/yQ4y3wqOOdQ4AxHAYU8seZkkzIeUJ+GkJxW1nEdZeVzSWkkAf5EnK9kAoEJp3W8aIn5t/T2gSVb3+Vg6fV0ud5YycZG/jFNmIu2PaPMVJD5iG8TkUyGKYCt4WSIQI4f60lp76EB+psJzf6Iy1s/UyymUi48y6kvPLO5nnJtyLsacSun8YxVtkt60v+envaRi+xGLmpJ4rrRkotODpaKhq9f7+Bl9F4G6Uh6B93TO9A6p/eT1693823o3eye3s3mXvPoei82/69Z79bb0LvVPb2XdVNn9G63QLIJT6tLmgWzomX/SmjR0Y8zKY+Eg2FGy4xO0S+u/PRzQrA7u6BJjITXhCQxR6wYXMw1Hz93VQIjePI6fYbEV8L7zCGVf7qI8klk3ZBgPxTXrgiO+BowToOCXUhGsmOOPS9brFGqtwyjNe5Z0/Y6OuaMzlDDWF/me5BDc3trO6ocjDY5GIeSg/NsOejDDXK4o4IoilIN7KqGnZZpOpJMtIZ5sFMSs5HcbNtSQ6e3hO5gp0tDJXRTLGSM7xMu0BnaTbZ4jp8B7cHQaSTBEzUJavbAsF6Qnnqo2UV6pinoDbtHrxB6x/EBraP41JPhLuLT7Y7iU0+uLql49Vfg8tfMPSveMHEAmadnW9xI7FKgeojZAba25gzUl8pGutah6KrnJDvSPf6+u2Xhd4WuWpU/ha7RJe22vJW6QletAXeke3zttuyYukJXLR5Hvs+QD3kL2XeTq6+fboS7KAjZD+y9PzpZo3H6ZOpq+QXafum0D/UmM9V91N7KcYGSzYTDKMqEzTEN/1fj26rxMvTrJZ22n2pcNKu/ceQHXtWfYcDpHw==
--------------------------------------------------------------------------------
/docs/imgs/scc_read_pushdown_enable.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/docs/imgs/scc_read_pushdown_enable.drawio.png
--------------------------------------------------------------------------------
/docs/imgs/scc_read_sort_merge_join.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/docs/imgs/scc_read_sort_merge_join.drawio.png
--------------------------------------------------------------------------------
/docs/imgs/scc_write_rebalance_sort.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/docs/imgs/scc_write_rebalance_sort.drawio.png
--------------------------------------------------------------------------------
/docs/imgs/spark_centralized_metastore.drawio:
--------------------------------------------------------------------------------
1 | 7Vjfc6IwEP5rfNRBKGof1dp6U522Y2+uvZdOhAg5A8uExR/96y+BIFg463TOq525BzX77Qay334bgg1rGGxuBIn8KbiUN0zD3TSsq4ZptttGR/4oZJshva4GPMFcHVQAM/ZKNWhoNGEujfcCEYAji/ZBB8KQOriHESFgvR+2AL5/14h4tALMHMKr6A/moq+zsI0CH1Pm+bhLWHsCkgdrIPaJC+sSZI0a1lAAYDYKNkPKFXk5L9m86z94dwsTNMRjJoxnd+70id2Nmsnt/CEhbuK2m/oqK8ITnbBeLG5zBqgrCdEmCPTBg5DwUYEOBCShS9VtDGkVMROASIJtCf6iiFtdXZIgSMjHgGtvNRW9rhgS4eh1fCO3jzNndi8er0fN7c/4dTIZNvVykQiP4oE4O4tTuZRuoIm6oRBQFFsZICgnyFb7xSdaQ94urqBZDjTT9awfWvUh1qVaIjVkQSrQwYoKZFKWEzKn/B5ihgxC6Z8DIgQygCvHgDhLLy3HEDgI6XfpgiQcS1foc+apmaiKMyBxlLXNgm1UCQfpDfs5auSIHPuIqun6KnfzOok4ELe1ZksWUJeRFghPwsqOlC3HDgQBhLEcoZ8Ec/m7UB9LfvUj4vj0ZSZrvnzhUi6teKWmm72eEW2ate5WFHqHxKISpJuD5dVeq6N7Um9Kdt6j66LF2/ZFy+hmsF/q8J5xIlHYFVGMlRDNTlq9uZAjT42mFEmMIGhFM0UfqqZa+wypZDDtn7XcnY/sueNptC/f0GjV0JhTW+awcyoO2xefsX3RDcMnNb1la+u55Lna6CunxjY3QplvaZIyn8u+Ylpq5fM+vlV2vuRW2al0xXQ7e5icnfYvukdq3zqZ9u3z0b7xIe23T6b97pfUfrei/SFnznIMSXx+m3+n7hl6qgaoPctWH6Cf0RAfF2nvSJGaZyXSXoX17zEVFebz82wS8L6D6mj6/pG2/sRa4hoS5Cykw92738Fd4nhtm/b7Bxvrn55rqo9BQT0Wo2JaKmbOqxuCzBX36RI0Zq9pbMaUEm+c6Vi5iabZkcRRUcN/wFw3bZIIWIhpjvagYV9JZME4z185QghVUIwClvQNuN9ef6FUnbelMuxKqXo1pTJPVqrqrv2/VPWlujxZqaRZ/LGS+kp/T1mj3w==
--------------------------------------------------------------------------------
/docs/imgs/spark_centralized_metastore.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/docs/imgs/spark_centralized_metastore.drawio.png
--------------------------------------------------------------------------------
/docs/imgs/spark_multi_catalog.drawio:
--------------------------------------------------------------------------------
1 | 1VdRc5swDP41eWyOQKHpY0LTdrf0rrt0t24vPQcU8GIQZ0RC+utnExPIaNPsbl2zhwTpk2yjTzKye46flDeSZfEdhiB6thWWPeeqZ9uDgeWph0Y2W2R4YYBI8tA4NcCMP4MBLYMWPIR8z5EQBfFsHwwwTSGgPYxJiet9twWK/VUzFkEHmAVMdNFvPKTYROFaDX4LPIppF7CxJKx2NkAesxDXLciZ9BxfItJWSkofhCav5mU77voV6+7FJKR0zIBP7PPDLJjdy4frydnmR/48nfpng+F2mhUThYnYvC1tagogVIwYFSXFGGHKxKRBxxKLNAS9jqW0xmeKmClwoMCfQLQx6WUFoYJiSoSxdmMx4eVYyAAOBGBel5iMgA74uVs/HUtrAcPUDWACJDfKQYJgxFf72WemiKKdX8OzEgzVf0L75UewDCWnx5b8XU/Vd412VZqZK2VTK6mK97GttEZptRlWafW498+od1IZ3e3yf5vRk83OxWll5+2vnPo8Z1rkSdURxiuQxFUfmLI5iHvMOXFMlX2ORJgoB6ENYxYsoyqNPgqUyh7CghWCWjOMBI/0SNJJHbM82/apBS916sfVgqMatWpEyTGR7nIjHbt9XWQCWdhf8yVPIOSsjzJSsNYzrSs5wCTBNFcSxUUyV8+F/jnqb5SxIIanmcr58kmoMuvnKz3cHg6trDx70dzP0uhQsegAoTyYXmN1PLM9zCnArdv7uumpA/e8b5myiVstdWi9U1G4naK41YVoe1X25lJJkZbugFhOKKFTM83+1ZtxHXMCxWC1f9bqOHRkjzueRvfyNxrtF2issTaH3ntx6HU4vNvMvkxPjqnz849m6qLDlC94sLzFIj+9wvKsj6ZrF8P/ejAdHtko7ZNqlN3rwNcc5KvNskjEKCDd997uly+3wxbXWJDgKfi7m9zBk8rxxW27b381nb9T20ptLnSVrXUtdia/AA==
--------------------------------------------------------------------------------
/docs/imgs/spark_multi_catalog.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/docs/imgs/spark_multi_catalog.drawio.png
--------------------------------------------------------------------------------
/docs/internals/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | license: |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | https://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ---
15 |
16 | Overview Design
17 | ===
18 |
19 | In high level, Spark ClickHouse Connector is a connector build on top of Spark DataSource V2 and
20 | ClickHouse HTTP protocol.
21 |
22 |
23 | 
24 |
25 |
--------------------------------------------------------------------------------
/examples/scala/README.md:
--------------------------------------------------------------------------------
1 | # Spark ClickHouse Connector Examples
2 |
3 | Provide examples for Spark ClickHouse Connector. In various languages.
4 | Ready to use as simple standalone application.
5 |
6 |
--------------------------------------------------------------------------------
/examples/scala/spark-3.5/.bsp/sbt.json:
--------------------------------------------------------------------------------
1 | {"name":"sbt","version":"1.9.2","bspVersion":"2.1.0-M1","languages":["scala"],"argv":["/Users/mark.zitnik/Library/Java/JavaVirtualMachines/openjdk-21.0.1/Contents/Home/bin/java","-Xms100m","-Xmx100m","-classpath","/Users/mark.zitnik/Library/Application Support/JetBrains/IntelliJIdea2023.2/plugins/Scala/launcher/sbt-launch.jar","-Dsbt.script=/opt/homebrew/bin/sbt","xsbt.boot.Boot","-bsp"]}
--------------------------------------------------------------------------------
/examples/scala/spark-3.5/build.sbt:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | name := "ClickHouse Apache Spark 3.5 Example Project"
16 |
17 | version := "1.0"
18 |
19 | scalaVersion := "2.12.18"
20 |
21 | libraryDependencies += "org.apache.spark" %% "spark-sql" % "3.5.0"
22 | libraryDependencies += "org.apache.spark" %% "spark-core" % "3.5.0"
23 |
24 | libraryDependencies += "com.clickhouse" % "clickhouse-jdbc" % "0.6.3" classifier "all"
25 | libraryDependencies += "com.clickhouse.spark" %% "clickhouse-spark-runtime-3.5" % "0.8.0"
26 |
27 |
--------------------------------------------------------------------------------
/examples/scala/spark-3.5/project/build.properties:
--------------------------------------------------------------------------------
1 | # Licensed under the Apache License, Version 2.0 (the "License");
2 | # you may not use this file except in compliance with the License.
3 | # You may obtain a copy of the License at
4 | #
5 | # https://www.apache.org/licenses/LICENSE-2.0
6 | #
7 | # Unless required by applicable law or agreed to in writing, software
8 | # distributed under the License is distributed on an "AS IS" BASIS,
9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | # See the License for the specific language governing permissions and
11 | # limitations under the License.
12 | sbt.version=1.9.2
13 |
--------------------------------------------------------------------------------
/examples/scala/spark-3.5/src/main/scala/Saprk-3.5.scala:
--------------------------------------------------------------------------------
1 |
2 | import org.apache.spark.sql.SparkSession
3 |
4 | object SparkTestApp {
5 | def main(args: Array[String]): Unit = {
6 | val username = "default"
7 | val password = "replace with password"
8 | val host = "replace with host"
9 |
10 | val spark = SparkSession.builder.appName("ClickHouse Apache Spark 3.5 Example")
11 | .master("local[2]")
12 | .config("spark.sql.catalog.clickhouse","com.clickhouse.spark.ClickHouseCatalog")
13 | .config("spark.sql.catalog.clickhouse.host", host)
14 | .config("spark.sql.catalog.clickhouse.protocol","http")
15 | .config("spark.sql.catalog.clickhouse.http_port","8443")
16 | .config("spark.sql.catalog.clickhouse.user", username)
17 | .config("spark.sql.catalog.clickhouse.password", password)
18 | .config("spark.sql.catalog.clickhouse.database","default")
19 | .config("spark.sql.catalog.clickhouse.option.ssl","true")
20 | .getOrCreate()
21 |
22 | spark.sql("use clickhouse")
23 | spark.sql("show tables").show()
24 | spark.stop()
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/gradle.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | #
14 |
15 | mavenCentralMirror=https://repo1.maven.org/maven2/
16 | mavenSnapshotsRepo=https://s01.oss.sonatype.org/content/repositories/snapshots/
17 | mavenReleasesRepo=https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/
18 |
19 | systemProp.scala_binary_version=2.12
20 | systemProp.known_scala_binary_versions=2.12,2.13
21 | systemProp.spark_binary_version=3.5
22 | systemProp.known_spark_binary_versions=3.3,3.4,3.5
23 |
24 | group=com.clickhouse.spark
25 |
26 | clickhouse_jdbc_version=0.6.3
27 |
28 | spark_33_version=3.3.4
29 | spark_34_version=3.4.2
30 | spark_35_version=3.5.1
31 |
32 | spark_33_scala_212_version=2.12.15
33 | spark_34_scala_212_version=2.12.17
34 | spark_35_scala_212_version=2.12.18
35 |
36 | spark_33_scala_213_version=2.13.8
37 | spark_34_scala_213_version=2.13.8
38 | spark_35_scala_213_version=2.13.8
39 |
40 | spark_33_antlr_version=4.8
41 | spark_34_antlr_version=4.9.3
42 | spark_35_antlr_version=4.9.3
43 |
44 | spark_33_jackson_version=2.13.4
45 | spark_34_jackson_version=2.14.2
46 | spark_35_jackson_version=2.15.2
47 |
48 | spark_33_slf4j_version=1.7.32
49 | spark_34_slf4j_version=2.0.6
50 | spark_35_slf4j_version=2.0.7
51 |
52 | # Align with Apache Spark, and don't bundle them in release jar.
53 | commons_lang3_version=3.12.0
54 | commons_codec_version=1.16.0
55 |
56 | # javax annotations removed in jdk 11
57 | # fix build error with jakarta annotations
58 | jakarta_annotation_api_version=1.3.5
59 |
60 | # Test only
61 | kyuubi_version=1.9.2
62 | testcontainers_scala_version=0.41.2
63 | scalatest_version=3.2.16
64 | flexmark_version=0.62.2
65 |
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | distributionBase=GRADLE_USER_HOME
2 | distributionPath=wrapper/dists
3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.9-bin.zip
4 | networkTimeout=10000
5 | validateDistributionUrl=true
6 | zipStoreBase=GRADLE_USER_HOME
7 | zipStorePath=wrapper/dists
8 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark-it/src/test/resources/log4j2.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/TPCDSTestUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse
16 |
17 | object TPCDSTestUtils {
18 | val tablePrimaryKeys: Map[String, Seq[String]] = Map(
19 | "call_center" -> Array("cc_call_center_sk"),
20 | "catalog_page" -> Array("cp_catalog_page_sk"),
21 | "catalog_returns" -> Array("cr_item_sk", "cr_order_number"),
22 | "catalog_sales" -> Array("cs_item_sk", "cs_order_number"),
23 | "customer" -> Array("c_customer_sk"),
24 | "customer_address" -> Array("ca_address_sk"),
25 | "customer_demographics" -> Array("cd_demo_sk"),
26 | "date_dim" -> Array("d_date_sk"),
27 | "household_demographics" -> Array("hd_demo_sk"),
28 | "income_band" -> Array("ib_income_band_sk"),
29 | "inventory" -> Array("inv_date_sk", "inv_item_sk", "inv_warehouse_sk"),
30 | "item" -> Array("i_item_sk"),
31 | "promotion" -> Array("p_promo_sk"),
32 | "reason" -> Array("r_reason_sk"),
33 | "ship_mode" -> Array("sm_ship_mode_sk"),
34 | "store" -> Array("s_store_sk"),
35 | "store_returns" -> Array("sr_item_sk", "sr_ticket_number"),
36 | "store_sales" -> Array("ss_item_sk", "ss_ticket_number"),
37 | "time_dim" -> Array("t_time_sk"),
38 | "warehouse" -> Array("w_warehouse_sk"),
39 | "web_page" -> Array("wp_web_page_sk"),
40 | "web_returns" -> Array("wr_item_sk", "wr_order_number"),
41 | "web_sales" -> Array("ws_item_sk", "ws_order_number"),
42 | "web_site" -> Array("web_site_sk")
43 | )
44 | }
45 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/TestUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse
16 |
17 | import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
18 | import com.fasterxml.jackson.module.scala.ClassTagExtensions
19 |
20 | object TestUtils {
21 |
22 | @transient lazy val om: ObjectMapper with ClassTagExtensions = {
23 | val _om = new ObjectMapper() with ClassTagExtensions
24 | _om.findAndRegisterModules()
25 | _om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
26 | _om
27 | }
28 |
29 | def toJson(value: Any): String = om.writeValueAsString(value)
30 | }
31 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterDeleteSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse.cluster
16 |
17 | class ClusterDeleteSuite extends SparkClickHouseClusterTest {
18 |
19 | test("truncate distribute table") {
20 | withSimpleDistTable("single_replica", "db_truncate", "tbl_truncate", true) { (_, db, tbl_dist, _) =>
21 | assert(spark.table(s"$db.$tbl_dist").count() === 4)
22 | spark.sql(s"TRUNCATE TABLE $db.$tbl_dist")
23 | assert(spark.table(s"$db.$tbl_dist").count() === 0)
24 | }
25 | }
26 |
27 | test("delete from distribute table") {
28 | withSimpleDistTable("single_replica", "db_delete", "tbl_delete", true) { (_, db, tbl_dist, _) =>
29 | assert(spark.table(s"$db.$tbl_dist").count() === 4)
30 | spark.sql(s"DELETE FROM $db.$tbl_dist WHERE m = 1")
31 | assert(spark.table(s"$db.$tbl_dist").count() === 3)
32 | }
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterPartitionManagementSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse.cluster
16 |
17 | import org.apache.spark.sql.Row
18 |
19 | class ClusterPartitionManagementSuite extends SparkClickHouseClusterTest {
20 |
21 | test("distribute table partition") {
22 | withSimpleDistTable("single_replica", "db_part", "tbl_part", true) { (_, db, tbl_dist, _) =>
23 | checkAnswer(
24 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist"),
25 | Seq(Row("m=1"), Row("m=2"), Row("m=3"), Row("m=4"))
26 | )
27 | checkAnswer(
28 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist PARTITION(m = 2)"),
29 | Seq(Row("m=2"))
30 | )
31 | spark.sql(s"ALTER TABLE $db.$tbl_dist DROP PARTITION(m = 2)")
32 | checkAnswer(
33 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist"),
34 | Seq(Row("m=1"), Row("m=3"), Row("m=4"))
35 | )
36 | }
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterTableManagementSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse.cluster
16 |
17 | class ClusterTableManagementSuite extends SparkClickHouseClusterTest {
18 |
19 | test("create or replace distribute table") {
20 | autoCleanupDistTable("single_replica", "db_cor", "tbl_cor_dist") { (cluster, db, _, tbl_local) =>
21 | def createLocalTable(): Unit = spark.sql(
22 | s"""CREATE TABLE $db.$tbl_local (
23 | | id Long NOT NULL
24 | |) USING ClickHouse
25 | |TBLPROPERTIES (
26 | | cluster = '$cluster',
27 | | engine = 'MergeTree()',
28 | | order_by = 'id',
29 | | settings.index_granularity = 8192
30 | |)
31 | |""".stripMargin
32 | )
33 |
34 | def createOrReplaceLocalTable(): Unit = spark.sql(
35 | s"""CREATE OR REPLACE TABLE `$db`.`$tbl_local` (
36 | | id Long NOT NULL
37 | |) USING ClickHouse
38 | |TBLPROPERTIES (
39 | | engine = 'MergeTree()',
40 | | order_by = 'id',
41 | | settings.index_granularity = 8192
42 | |)
43 | |""".stripMargin
44 | )
45 | createLocalTable()
46 | createOrReplaceLocalTable()
47 | createOrReplaceLocalTable()
48 | }
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/ClickHouseTableDDLSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse.single
16 |
17 | import com.clickhouse.spark.base.{ClickHouseCloudMixIn, ClickHouseSingleMixIn}
18 | import org.apache.spark.sql.Row
19 | import org.scalatest.tags.Cloud
20 |
21 | @Cloud
22 | class ClickHouseCloudTableDDLSuite extends ClickHouseTableDDLSuite with ClickHouseCloudMixIn
23 |
24 | class ClickHouseSingleTableDDLSuite extends ClickHouseTableDDLSuite with ClickHouseSingleMixIn
25 |
26 | abstract class ClickHouseTableDDLSuite extends SparkClickHouseSingleTest {
27 |
28 | import testImplicits._
29 |
30 | test("clickhouse command runner") {
31 | withTable("default.abc") {
32 | runClickHouseSQL("CREATE TABLE default.abc(a UInt8) ENGINE=Memory()")
33 | checkAnswer(
34 | spark.sql("""DESC default.abc""").select($"col_name", $"data_type").limit(1),
35 | Row("a", "smallint") :: Nil
36 | )
37 | }
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark-runtime/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/spark-3.3/clickhouse-spark-runtime/.gitkeep
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark/src/main/scala/com/clickhouse/spark/ClickHouseCommandRunner.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark
16 |
17 | import org.apache.spark.sql.connector.ExternalCommandRunner
18 | import org.apache.spark.sql.util.CaseInsensitiveStringMap
19 | import com.clickhouse.spark.client.NodeClient
20 |
21 | class ClickHouseCommandRunner extends ExternalCommandRunner with ClickHouseHelper {
22 |
23 | override def executeCommand(sql: String, options: CaseInsensitiveStringMap): Array[String] =
24 | Utils.tryWithResource(NodeClient(buildNodeSpec(options))) { nodeClient =>
25 | nodeClient.syncQueryAndCheckOutputJSONEachRow(sql).records.map(_.toString).toArray
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark/src/main/scala/com/clickhouse/spark/CommitMessage.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark
16 |
17 | import org.apache.spark.sql.connector.write.WriterCommitMessage
18 |
19 | case class CommitMessage(msg: String = "") extends WriterCommitMessage
20 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark/src/main/scala/com/clickhouse/spark/Constants.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark
16 |
17 | import com.clickhouse.client.config.ClickHouseClientOption._
18 |
19 | object Constants {
20 | // format: off
21 | //////////////////////////////////////////////////////////
22 | //////// clickhouse datasource catalog properties ////////
23 | //////////////////////////////////////////////////////////
24 | final val CATALOG_PROP_HOST = "host"
25 | final val CATALOG_PROP_TCP_PORT = "tcp_port"
26 | final val CATALOG_PROP_HTTP_PORT = "http_port"
27 | final val CATALOG_PROP_PROTOCOL = "protocol"
28 | final val CATALOG_PROP_USER = "user"
29 | final val CATALOG_PROP_PASSWORD = "password"
30 | final val CATALOG_PROP_DATABASE = "database"
31 | final val CATALOG_PROP_TZ = "timezone" // server(default), client, UTC+3, Asia/Shanghai, etc.
32 | final val CATALOG_PROP_OPTION_PREFIX = "option."
33 | final val CATALOG_PROP_IGNORE_OPTIONS = Seq(
34 | DATABASE.getKey, COMPRESS.getKey, DECOMPRESS.getKey, FORMAT.getKey, RETRY.getKey,
35 | USE_SERVER_TIME_ZONE.getKey, USE_SERVER_TIME_ZONE_FOR_DATES.getKey, SERVER_TIME_ZONE.getKey, USE_TIME_ZONE.getKey)
36 |
37 | //////////////////////////////////////////////////////////
38 | ////////// clickhouse datasource read properties /////////
39 | //////////////////////////////////////////////////////////
40 |
41 | //////////////////////////////////////////////////////////
42 | ///////// clickhouse datasource write properties /////////
43 | //////////////////////////////////////////////////////////
44 | // format: on
45 | }
46 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark/src/main/scala/com/clickhouse/spark/func/FunctionRegistry.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.func
16 |
17 | import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
18 |
19 | import scala.collection.mutable
20 |
21 | trait FunctionRegistry {
22 |
23 | def list: Array[String]
24 |
25 | def load(name: String): Option[UnboundFunction]
26 | }
27 |
28 | class CompositeFunctionRegistry(registries: Array[FunctionRegistry]) extends FunctionRegistry {
29 |
30 | override def list: Array[String] = registries.flatMap(_.list)
31 |
32 | override def load(name: String): Option[UnboundFunction] = registries.flatMap(_.load(name)).headOption
33 | }
34 |
35 | object StaticFunctionRegistry extends FunctionRegistry {
36 |
37 | private val functions = Map[String, UnboundFunction](
38 | "ck_xx_hash64" -> ClickHouseXxHash64, // for compatible
39 | "clickhouse_xxHash64" -> ClickHouseXxHash64
40 | )
41 |
42 | override def list: Array[String] = functions.keys.toArray
43 |
44 | override def load(name: String): Option[UnboundFunction] = functions.get(name)
45 | }
46 |
47 | class DynamicFunctionRegistry extends FunctionRegistry {
48 |
49 | private val functions = mutable.Map[String, UnboundFunction]()
50 |
51 | def register(name: String, function: UnboundFunction): DynamicFunctionRegistry = {
52 | functions += (name -> function)
53 | this
54 | }
55 |
56 | override def list: Array[String] = functions.keys.toArray
57 |
58 | override def load(name: String): Option[UnboundFunction] = functions.get(name)
59 | }
60 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark/src/main/scala/com/clickhouse/spark/read/ClickHouseMetadataColumn.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.read
16 |
17 | import org.apache.spark.sql.connector.catalog.MetadataColumn
18 | import org.apache.spark.sql.types.{DataType, DoubleType, IntegerType, LongType, StringType, StructField}
19 |
20 | object ClickHouseMetadataColumn {
21 | val mergeTreeMetadataCols: Array[MetadataColumn] = Array(
22 | ClickHouseMetadataColumn("_part", StringType),
23 | ClickHouseMetadataColumn("_part_index", LongType),
24 | ClickHouseMetadataColumn("_part_uuid", StringType),
25 | ClickHouseMetadataColumn("_partition_id", StringType),
26 | // ClickHouseMetadataColumn("_partition_value", StringType),
27 | ClickHouseMetadataColumn("_sample_factor", DoubleType)
28 | )
29 |
30 | val distributeMetadataCols: Array[MetadataColumn] = Array(
31 | ClickHouseMetadataColumn("_table", StringType),
32 | ClickHouseMetadataColumn("_part", StringType),
33 | ClickHouseMetadataColumn("_part_index", LongType),
34 | ClickHouseMetadataColumn("_part_uuid", StringType),
35 | ClickHouseMetadataColumn("_partition_id", StringType),
36 | ClickHouseMetadataColumn("_sample_factor", DoubleType),
37 | ClickHouseMetadataColumn("_shard_num", IntegerType)
38 | )
39 | }
40 |
41 | case class ClickHouseMetadataColumn(
42 | override val name: String,
43 | override val dataType: DataType,
44 | override val isNullable: Boolean = false
45 | ) extends MetadataColumn {
46 | def toStructField: StructField = StructField(name, dataType, isNullable)
47 | }
48 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark/src/main/scala/com/clickhouse/spark/read/ScanJobDescription.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.read
16 |
17 | import org.apache.spark.sql.clickhouse.ReadOptions
18 | import org.apache.spark.sql.types.StructType
19 | import com.clickhouse.spark.spec._
20 |
21 | import java.time.ZoneId
22 |
23 | case class ScanJobDescription(
24 | node: NodeSpec,
25 | tz: ZoneId,
26 | tableSpec: TableSpec,
27 | tableEngineSpec: TableEngineSpec,
28 | cluster: Option[ClusterSpec],
29 | localTableSpec: Option[TableSpec],
30 | localTableEngineSpec: Option[TableEngineSpec],
31 | readOptions: ReadOptions,
32 | // Below fields will be constructed in ScanBuilder.
33 | readSchema: StructType = new StructType,
34 | // We should pass compiled ClickHouse SQL snippets(or ClickHouse SQL AST data structure) instead of Spark Expression
35 | // into Scan tasks because the check happens in planing phase on driver side.
36 | filtersExpr: String = "1=1",
37 | groupByClause: Option[String] = None,
38 | limit: Option[Int] = None
39 | ) {
40 |
41 | def database: String = tableEngineSpec match {
42 | case dist: DistributedEngineSpec if readOptions.convertDistributedToLocal => dist.local_db
43 | case _ => tableSpec.database
44 | }
45 |
46 | def table: String = tableEngineSpec match {
47 | case dist: DistributedEngineSpec if readOptions.convertDistributedToLocal => dist.local_table
48 | case _ => tableSpec.name
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark/src/main/scala/com/clickhouse/spark/write/format/ClickHouseJsonEachRowWriter.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under th e License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.write.format
16 |
17 | import org.apache.commons.io.IOUtils
18 | import org.apache.spark.sql.catalyst.InternalRow
19 | import org.apache.spark.sql.clickhouse.JsonWriter
20 | import com.clickhouse.spark.write.{ClickHouseWriter, WriteJobDescription}
21 |
22 | class ClickHouseJsonEachRowWriter(writeJob: WriteJobDescription) extends ClickHouseWriter(writeJob) {
23 |
24 | override def format: String = "JSONEachRow"
25 |
26 | val jsonWriter: JsonWriter = new JsonWriter(revisedDataSchema, writeJob.tz, output)
27 |
28 | override def writeRow(record: InternalRow): Unit = jsonWriter.write(record)
29 |
30 | override def doSerialize(): Array[Byte] = {
31 | jsonWriter.flush()
32 | output.close()
33 | serializedBuffer.toByteArray
34 | }
35 |
36 | override def close(): Unit = {
37 | IOUtils.closeQuietly(jsonWriter)
38 | super.close()
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark/src/main/scala/org/apache/spark/sql/clickhouse/JsonWriter.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse
16 |
17 | import org.apache.spark.sql.catalyst.InternalRow
18 | import org.apache.spark.sql.catalyst.json.{JSONOptions, JacksonGenerator}
19 | import org.apache.spark.sql.types.StructType
20 |
21 | import java.io.{Closeable, Flushable, OutputStream, OutputStreamWriter}
22 | import java.nio.charset.StandardCharsets
23 | import java.time.ZoneId
24 |
25 | class JsonWriter(schema: StructType, tz: ZoneId, output: OutputStream) extends Closeable with Flushable {
26 | private val option: Map[String, String] = Map(
27 | "timestampFormat" -> "yyyy-MM-dd HH:mm:ss",
28 | "timestampNTZFormat" -> "yyyy-MM-dd HH:mm:ss"
29 | )
30 | private val utf8Writer = new OutputStreamWriter(output, StandardCharsets.UTF_8)
31 | private val jsonWriter = new JacksonGenerator(schema, utf8Writer, new JSONOptions(option, tz.getId))
32 |
33 | def write(row: InternalRow): Unit = {
34 | jsonWriter.write(row)
35 | jsonWriter.writeLineEnding()
36 | }
37 |
38 | override def flush(): Unit = jsonWriter.flush()
39 |
40 | override def close(): Unit = jsonWriter.close()
41 | }
42 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark/src/main/scala/org/apache/spark/sql/clickhouse/SparkUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse
16 |
17 | import org.apache.arrow.memory.BufferAllocator
18 | import org.apache.arrow.vector.types.pojo.Schema
19 | import org.apache.spark._
20 | import org.apache.spark.sql.types.StructType
21 | import org.apache.spark.sql.util.ArrowUtils
22 | import org.apache.spark.util.VersionUtils
23 |
24 | object SparkUtils {
25 |
26 | lazy val MAJOR_MINOR_VERSION: (Int, Int) = VersionUtils.majorMinorVersion(SPARK_VERSION)
27 |
28 | def toArrowSchema(schema: StructType, timeZoneId: String): Schema = ArrowUtils.toArrowSchema(schema, timeZoneId)
29 |
30 | def spawnArrowAllocator(name: String): BufferAllocator =
31 | ArrowUtils.rootAllocator.newChildAllocator(name, 0, Long.MaxValue)
32 | }
33 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark/src/test/resources/log4j2.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/spark-3.3/clickhouse-spark/src/test/scala/org/apache/spark/sql/clickhouse/ClickHouseHelperSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse
16 |
17 | import org.apache.spark.sql.util.CaseInsensitiveStringMap
18 | import org.scalatest.funsuite.AnyFunSuite
19 | import com.clickhouse.spark.ClickHouseHelper
20 |
21 | import scala.collection.JavaConverters._
22 |
23 | class ClickHouseHelperSuite extends AnyFunSuite with ClickHouseHelper {
24 |
25 | test("buildNodeSpec") {
26 | val nodeSpec = buildNodeSpec(
27 | new CaseInsensitiveStringMap(Map(
28 | "database" -> "testing",
29 | "option.database" -> "production",
30 | "option.ssl" -> "true"
31 | ).asJava)
32 | )
33 | assert(nodeSpec.database === "testing")
34 | assert(nodeSpec.options.get("ssl") === "true")
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark-it/src/test/resources/log4j2.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/TPCDSTestUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse
16 |
17 | object TPCDSTestUtils {
18 | val tablePrimaryKeys: Map[String, Seq[String]] = Map(
19 | "call_center" -> Array("cc_call_center_sk"),
20 | "catalog_page" -> Array("cp_catalog_page_sk"),
21 | "catalog_returns" -> Array("cr_item_sk", "cr_order_number"),
22 | "catalog_sales" -> Array("cs_item_sk", "cs_order_number"),
23 | "customer" -> Array("c_customer_sk"),
24 | "customer_address" -> Array("ca_address_sk"),
25 | "customer_demographics" -> Array("cd_demo_sk"),
26 | "date_dim" -> Array("d_date_sk"),
27 | "household_demographics" -> Array("hd_demo_sk"),
28 | "income_band" -> Array("ib_income_band_sk"),
29 | "inventory" -> Array("inv_date_sk", "inv_item_sk", "inv_warehouse_sk"),
30 | "item" -> Array("i_item_sk"),
31 | "promotion" -> Array("p_promo_sk"),
32 | "reason" -> Array("r_reason_sk"),
33 | "ship_mode" -> Array("sm_ship_mode_sk"),
34 | "store" -> Array("s_store_sk"),
35 | "store_returns" -> Array("sr_item_sk", "sr_ticket_number"),
36 | "store_sales" -> Array("ss_item_sk", "ss_ticket_number"),
37 | "time_dim" -> Array("t_time_sk"),
38 | "warehouse" -> Array("w_warehouse_sk"),
39 | "web_page" -> Array("wp_web_page_sk"),
40 | "web_returns" -> Array("wr_item_sk", "wr_order_number"),
41 | "web_sales" -> Array("ws_item_sk", "ws_order_number"),
42 | "web_site" -> Array("web_site_sk")
43 | )
44 | }
45 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/TestUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse
16 |
17 | import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
18 | import com.fasterxml.jackson.module.scala.ClassTagExtensions
19 |
20 | object TestUtils {
21 |
22 | @transient lazy val om: ObjectMapper with ClassTagExtensions = {
23 | val _om = new ObjectMapper() with ClassTagExtensions
24 | _om.findAndRegisterModules()
25 | _om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
26 | _om
27 | }
28 |
29 | def toJson(value: Any): String = om.writeValueAsString(value)
30 | }
31 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterDeleteSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse.cluster
16 |
17 | class ClusterDeleteSuite extends SparkClickHouseClusterTest {
18 |
19 | test("truncate distribute table") {
20 | withSimpleDistTable("single_replica", "db_truncate", "tbl_truncate", true) { (_, db, tbl_dist, _) =>
21 | assert(spark.table(s"$db.$tbl_dist").count() === 4)
22 | spark.sql(s"TRUNCATE TABLE $db.$tbl_dist")
23 | assert(spark.table(s"$db.$tbl_dist").count() === 0)
24 | }
25 | }
26 |
27 | test("delete from distribute table") {
28 | withSimpleDistTable("single_replica", "db_delete", "tbl_delete", true) { (_, db, tbl_dist, _) =>
29 | assert(spark.table(s"$db.$tbl_dist").count() === 4)
30 | spark.sql(s"DELETE FROM $db.$tbl_dist WHERE m = 1")
31 | assert(spark.table(s"$db.$tbl_dist").count() === 3)
32 | }
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterPartitionManagementSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse.cluster
16 |
17 | import org.apache.spark.sql.Row
18 |
19 | class ClusterPartitionManagementSuite extends SparkClickHouseClusterTest {
20 |
21 | test("distribute table partition") {
22 | withSimpleDistTable("single_replica", "db_part", "tbl_part", true) { (_, db, tbl_dist, _) =>
23 | checkAnswer(
24 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist"),
25 | Seq(Row("m=1"), Row("m=2"), Row("m=3"), Row("m=4"))
26 | )
27 | checkAnswer(
28 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist PARTITION(m = 2)"),
29 | Seq(Row("m=2"))
30 | )
31 | spark.sql(s"ALTER TABLE $db.$tbl_dist DROP PARTITION(m = 2)")
32 | checkAnswer(
33 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist"),
34 | Seq(Row("m=1"), Row("m=3"), Row("m=4"))
35 | )
36 | }
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterTableManagementSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse.cluster
16 |
17 | class ClusterTableManagementSuite extends SparkClickHouseClusterTest {
18 |
19 | test("create or replace distribute table") {
20 | autoCleanupDistTable("single_replica", "db_cor", "tbl_cor_dist") { (cluster, db, _, tbl_local) =>
21 | def createLocalTable(): Unit = spark.sql(
22 | s"""CREATE TABLE $db.$tbl_local (
23 | | id Long NOT NULL
24 | |) USING ClickHouse
25 | |TBLPROPERTIES (
26 | | cluster = '$cluster',
27 | | engine = 'MergeTree()',
28 | | order_by = 'id',
29 | | settings.index_granularity = 8192
30 | |)
31 | |""".stripMargin
32 | )
33 |
34 | def createOrReplaceLocalTable(): Unit = spark.sql(
35 | s"""CREATE OR REPLACE TABLE `$db`.`$tbl_local` (
36 | | id Long NOT NULL
37 | |) USING ClickHouse
38 | |TBLPROPERTIES (
39 | | engine = 'MergeTree()',
40 | | order_by = 'id',
41 | | settings.index_granularity = 8192
42 | |)
43 | |""".stripMargin
44 | )
45 | createLocalTable()
46 | createOrReplaceLocalTable()
47 | createOrReplaceLocalTable()
48 | }
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/ClickHouseTableDDLSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse.single
16 |
17 | import com.clickhouse.spark.base.{ClickHouseCloudMixIn, ClickHouseSingleMixIn}
18 | import org.apache.spark.sql.Row
19 | import org.scalatest.tags.Cloud
20 |
21 | @Cloud
22 | class ClickHouseCloudTableDDLSuite extends ClickHouseTableDDLSuite with ClickHouseCloudMixIn
23 |
24 | class ClickHouseSingleTableDDLSuite extends ClickHouseTableDDLSuite with ClickHouseSingleMixIn
25 |
26 | abstract class ClickHouseTableDDLSuite extends SparkClickHouseSingleTest {
27 |
28 | import testImplicits._
29 |
30 | test("clickhouse command runner") {
31 | withTable("default.abc") {
32 | runClickHouseSQL("CREATE TABLE default.abc(a UInt8) ENGINE=Memory()")
33 | checkAnswer(
34 | spark.sql("""DESC default.abc""").select($"col_name", $"data_type").limit(1),
35 | Row("a", "smallint") :: Nil
36 | )
37 | }
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark-runtime/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/spark-3.4/clickhouse-spark-runtime/.gitkeep
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/ClickHouseCommandRunner.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark
16 |
17 | import org.apache.spark.sql.connector.ExternalCommandRunner
18 | import org.apache.spark.sql.util.CaseInsensitiveStringMap
19 | import com.clickhouse.spark.client.NodeClient
20 |
21 | class ClickHouseCommandRunner extends ExternalCommandRunner with ClickHouseHelper {
22 |
23 | override def executeCommand(sql: String, options: CaseInsensitiveStringMap): Array[String] =
24 | Utils.tryWithResource(NodeClient(buildNodeSpec(options))) { nodeClient =>
25 | nodeClient.syncQueryAndCheckOutputJSONEachRow(sql).records.map(_.toString).toArray
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/CommitMessage.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark
16 |
17 | import org.apache.spark.sql.connector.write.WriterCommitMessage
18 |
19 | case class CommitMessage(msg: String = "") extends WriterCommitMessage
20 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/Constants.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark
16 |
17 | import com.clickhouse.client.config.ClickHouseClientOption._
18 |
19 | object Constants {
20 | // format: off
21 | //////////////////////////////////////////////////////////
22 | //////// clickhouse datasource catalog properties ////////
23 | //////////////////////////////////////////////////////////
24 | final val CATALOG_PROP_HOST = "host"
25 | final val CATALOG_PROP_TCP_PORT = "tcp_port"
26 | final val CATALOG_PROP_HTTP_PORT = "http_port"
27 | final val CATALOG_PROP_PROTOCOL = "protocol"
28 | final val CATALOG_PROP_USER = "user"
29 | final val CATALOG_PROP_PASSWORD = "password"
30 | final val CATALOG_PROP_DATABASE = "database"
31 | final val CATALOG_PROP_TZ = "timezone" // server(default), client, UTC+3, Asia/Shanghai, etc.
32 | final val CATALOG_PROP_OPTION_PREFIX = "option."
33 | final val CATALOG_PROP_IGNORE_OPTIONS = Seq(
34 | DATABASE.getKey, COMPRESS.getKey, DECOMPRESS.getKey, FORMAT.getKey, RETRY.getKey,
35 | USE_SERVER_TIME_ZONE.getKey, USE_SERVER_TIME_ZONE_FOR_DATES.getKey, SERVER_TIME_ZONE.getKey, USE_TIME_ZONE.getKey)
36 |
37 | //////////////////////////////////////////////////////////
38 | ////////// clickhouse datasource read properties /////////
39 | //////////////////////////////////////////////////////////
40 |
41 | //////////////////////////////////////////////////////////
42 | ///////// clickhouse datasource write properties /////////
43 | //////////////////////////////////////////////////////////
44 | // format: on
45 | }
46 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/func/CityHash64.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.func
16 |
17 | import com.clickhouse.spark.hash
18 |
19 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L694
20 | object CityHash64 extends MultiStringArgsHash {
21 |
22 | override protected def funcName: String = "clickhouse_cityHash64"
23 |
24 | override val ckFuncNames: Array[String] = Array("cityHash64")
25 |
26 | override def applyHash(input: Array[Any]): Long = hash.CityHash64(input)
27 | }
28 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/func/MurmurHash2.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.func
16 |
17 | import com.clickhouse.spark.hash
18 | import com.clickhouse.spark.hash.HashUtils
19 |
20 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L460
21 | object MurmurHash2_64 extends MultiStringArgsHash {
22 |
23 | override protected def funcName: String = "clickhouse_murmurHash2_64"
24 |
25 | override val ckFuncNames: Array[String] = Array("murmurHash2_64")
26 |
27 | override def applyHash(input: Array[Any]): Long = hash.Murmurhash2_64(input)
28 | }
29 |
30 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L519
31 | object MurmurHash2_32 extends MultiStringArgsHash {
32 |
33 | override protected def funcName: String = "clickhouse_murmurHash2_32"
34 |
35 | override val ckFuncNames: Array[String] = Array("murmurHash2_32")
36 |
37 | override def applyHash(input: Array[Any]): Long = HashUtils.toUInt32(hash.Murmurhash2_32(input))
38 | }
39 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/func/MurmurHash3.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.func
16 |
17 | import com.clickhouse.spark.hash
18 | import com.clickhouse.spark.hash.HashUtils
19 |
20 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L543
21 | object MurmurHash3_64 extends MultiStringArgsHash {
22 |
23 | override protected def funcName: String = "clickhouse_murmurHash3_64"
24 |
25 | override val ckFuncNames: Array[String] = Array("murmurHash3_64")
26 |
27 | override def applyHash(input: Array[Any]): Long = hash.Murmurhash3_64(input)
28 | }
29 |
30 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L519
31 | object MurmurHash3_32 extends MultiStringArgsHash {
32 |
33 | override protected def funcName: String = "clickhouse_murmurHash3_32"
34 |
35 | override val ckFuncNames: Array[String] = Array("murmurHash3_32")
36 |
37 | override def applyHash(input: Array[Any]): Long = HashUtils.toUInt32(hash.Murmurhash3_32(input))
38 | }
39 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/read/ClickHouseMetadataColumn.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.read
16 |
17 | import org.apache.spark.sql.connector.catalog.MetadataColumn
18 | import org.apache.spark.sql.types.{DataType, DoubleType, IntegerType, LongType, StringType, StructField}
19 |
20 | object ClickHouseMetadataColumn {
21 | val mergeTreeMetadataCols: Array[MetadataColumn] = Array(
22 | ClickHouseMetadataColumn("_part", StringType),
23 | ClickHouseMetadataColumn("_part_index", LongType),
24 | ClickHouseMetadataColumn("_part_uuid", StringType),
25 | ClickHouseMetadataColumn("_partition_id", StringType),
26 | // ClickHouseMetadataColumn("_partition_value", StringType),
27 | ClickHouseMetadataColumn("_sample_factor", DoubleType)
28 | )
29 |
30 | val distributeMetadataCols: Array[MetadataColumn] = Array(
31 | ClickHouseMetadataColumn("_table", StringType),
32 | ClickHouseMetadataColumn("_part", StringType),
33 | ClickHouseMetadataColumn("_part_index", LongType),
34 | ClickHouseMetadataColumn("_part_uuid", StringType),
35 | ClickHouseMetadataColumn("_partition_id", StringType),
36 | ClickHouseMetadataColumn("_sample_factor", DoubleType),
37 | ClickHouseMetadataColumn("_shard_num", IntegerType)
38 | )
39 | }
40 |
41 | case class ClickHouseMetadataColumn(
42 | override val name: String,
43 | override val dataType: DataType,
44 | override val isNullable: Boolean = false
45 | ) extends MetadataColumn {
46 | def toStructField: StructField = StructField(name, dataType, isNullable)
47 | }
48 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/read/ScanJobDescription.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.read
16 |
17 | import org.apache.spark.sql.clickhouse.ReadOptions
18 | import org.apache.spark.sql.types.StructType
19 | import com.clickhouse.spark.spec._
20 |
21 | import java.time.ZoneId
22 |
23 | case class ScanJobDescription(
24 | node: NodeSpec,
25 | tz: ZoneId,
26 | tableSpec: TableSpec,
27 | tableEngineSpec: TableEngineSpec,
28 | cluster: Option[ClusterSpec],
29 | localTableSpec: Option[TableSpec],
30 | localTableEngineSpec: Option[TableEngineSpec],
31 | readOptions: ReadOptions,
32 | // Below fields will be constructed in ScanBuilder.
33 | readSchema: StructType = new StructType,
34 | // We should pass compiled ClickHouse SQL snippets(or ClickHouse SQL AST data structure) instead of Spark Expression
35 | // into Scan tasks because the check happens in planing phase on driver side.
36 | filtersExpr: String = "1=1",
37 | groupByClause: Option[String] = None,
38 | limit: Option[Int] = None
39 | ) {
40 |
41 | def database: String = tableEngineSpec match {
42 | case dist: DistributedEngineSpec if readOptions.convertDistributedToLocal => dist.local_db
43 | case _ => tableSpec.database
44 | }
45 |
46 | def table: String = tableEngineSpec match {
47 | case dist: DistributedEngineSpec if readOptions.convertDistributedToLocal => dist.local_table
48 | case _ => tableSpec.name
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/write/format/ClickHouseJsonEachRowWriter.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under th e License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.write.format
16 |
17 | import org.apache.commons.io.IOUtils
18 | import org.apache.spark.sql.catalyst.InternalRow
19 | import org.apache.spark.sql.clickhouse.JsonWriter
20 | import com.clickhouse.spark.write.{ClickHouseWriter, WriteJobDescription}
21 |
22 | class ClickHouseJsonEachRowWriter(writeJob: WriteJobDescription) extends ClickHouseWriter(writeJob) {
23 |
24 | override def format: String = "JSONEachRow"
25 |
26 | val jsonWriter: JsonWriter = new JsonWriter(revisedDataSchema, writeJob.tz, output)
27 |
28 | override def writeRow(record: InternalRow): Unit = jsonWriter.write(record)
29 |
30 | override def doSerialize(): Array[Byte] = {
31 | jsonWriter.flush()
32 | output.close()
33 | serializedBuffer.toByteArray
34 | }
35 |
36 | override def close(): Unit = {
37 | IOUtils.closeQuietly(jsonWriter)
38 | super.close()
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark/src/main/scala/org/apache/spark/sql/clickhouse/JsonWriter.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse
16 |
17 | import org.apache.spark.sql.catalyst.InternalRow
18 | import org.apache.spark.sql.catalyst.json.{JSONOptions, JacksonGenerator}
19 | import org.apache.spark.sql.types.StructType
20 |
21 | import java.io.{Closeable, Flushable, OutputStream, OutputStreamWriter}
22 | import java.nio.charset.StandardCharsets
23 | import java.time.ZoneId
24 |
25 | class JsonWriter(schema: StructType, tz: ZoneId, output: OutputStream) extends Closeable with Flushable {
26 | private val option: Map[String, String] = Map(
27 | "timestampFormat" -> "yyyy-MM-dd HH:mm:ss",
28 | "timestampNTZFormat" -> "yyyy-MM-dd HH:mm:ss"
29 | )
30 | private val utf8Writer = new OutputStreamWriter(output, StandardCharsets.UTF_8)
31 | private val jsonWriter = new JacksonGenerator(schema, utf8Writer, new JSONOptions(option, tz.getId))
32 |
33 | def write(row: InternalRow): Unit = {
34 | jsonWriter.write(row)
35 | jsonWriter.writeLineEnding()
36 | }
37 |
38 | override def flush(): Unit = jsonWriter.flush()
39 |
40 | override def close(): Unit = jsonWriter.close()
41 | }
42 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark/src/main/scala/org/apache/spark/sql/clickhouse/SparkUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse
16 |
17 | import org.apache.arrow.memory.BufferAllocator
18 | import org.apache.arrow.vector.types.pojo.Schema
19 | import org.apache.spark._
20 | import org.apache.spark.sql.types.StructType
21 | import org.apache.spark.sql.util.ArrowUtils
22 | import org.apache.spark.util.VersionUtils
23 |
24 | object SparkUtils {
25 |
26 | lazy val MAJOR_MINOR_VERSION: (Int, Int) = VersionUtils.majorMinorVersion(SPARK_VERSION)
27 |
28 | def toArrowSchema(schema: StructType, timeZoneId: String): Schema = ArrowUtils.toArrowSchema(schema, timeZoneId)
29 |
30 | def spawnArrowAllocator(name: String): BufferAllocator =
31 | ArrowUtils.rootAllocator.newChildAllocator(name, 0, Long.MaxValue)
32 | }
33 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark/src/test/resources/log4j2.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/spark-3.4/clickhouse-spark/src/test/scala/org/apache/spark/sql/clickhouse/ClickHouseHelperSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse
16 |
17 | import org.apache.spark.sql.util.CaseInsensitiveStringMap
18 | import org.scalatest.funsuite.AnyFunSuite
19 | import com.clickhouse.spark.ClickHouseHelper
20 |
21 | import scala.collection.JavaConverters._
22 |
23 | class ClickHouseHelperSuite extends AnyFunSuite with ClickHouseHelper {
24 |
25 | test("buildNodeSpec") {
26 | val nodeSpec = buildNodeSpec(
27 | new CaseInsensitiveStringMap(Map(
28 | "database" -> "testing",
29 | "option.database" -> "production",
30 | "option.ssl" -> "true"
31 | ).asJava)
32 | )
33 | assert(nodeSpec.database === "testing")
34 | assert(nodeSpec.options.get("ssl") === "true")
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark-it/src/test/resources/log4j2.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/TPCDSTestUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse
16 |
17 | object TPCDSTestUtils {
18 | val tablePrimaryKeys: Map[String, Seq[String]] = Map(
19 | "call_center" -> Array("cc_call_center_sk"),
20 | "catalog_page" -> Array("cp_catalog_page_sk"),
21 | "catalog_returns" -> Array("cr_item_sk", "cr_order_number"),
22 | "catalog_sales" -> Array("cs_item_sk", "cs_order_number"),
23 | "customer" -> Array("c_customer_sk"),
24 | "customer_address" -> Array("ca_address_sk"),
25 | "customer_demographics" -> Array("cd_demo_sk"),
26 | "date_dim" -> Array("d_date_sk"),
27 | "household_demographics" -> Array("hd_demo_sk"),
28 | "income_band" -> Array("ib_income_band_sk"),
29 | "inventory" -> Array("inv_date_sk", "inv_item_sk", "inv_warehouse_sk"),
30 | "item" -> Array("i_item_sk"),
31 | "promotion" -> Array("p_promo_sk"),
32 | "reason" -> Array("r_reason_sk"),
33 | "ship_mode" -> Array("sm_ship_mode_sk"),
34 | "store" -> Array("s_store_sk"),
35 | "store_returns" -> Array("sr_item_sk", "sr_ticket_number"),
36 | "store_sales" -> Array("ss_item_sk", "ss_ticket_number"),
37 | "time_dim" -> Array("t_time_sk"),
38 | "warehouse" -> Array("w_warehouse_sk"),
39 | "web_page" -> Array("wp_web_page_sk"),
40 | "web_returns" -> Array("wr_item_sk", "wr_order_number"),
41 | "web_sales" -> Array("ws_item_sk", "ws_order_number"),
42 | "web_site" -> Array("web_site_sk")
43 | )
44 | }
45 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/TestUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse
16 |
17 | import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
18 | import com.fasterxml.jackson.module.scala.ClassTagExtensions
19 |
20 | object TestUtils {
21 |
22 | @transient lazy val om: ObjectMapper with ClassTagExtensions = {
23 | val _om = new ObjectMapper() with ClassTagExtensions
24 | _om.findAndRegisterModules()
25 | _om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
26 | _om
27 | }
28 |
29 | def toJson(value: Any): String = om.writeValueAsString(value)
30 | }
31 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterDeleteSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse.cluster
16 |
17 | class ClusterDeleteSuite extends SparkClickHouseClusterTest {
18 |
19 | test("truncate distribute table") {
20 | withSimpleDistTable("single_replica", "db_truncate", "tbl_truncate", true) { (_, db, tbl_dist, _) =>
21 | assert(spark.table(s"$db.$tbl_dist").count() === 4)
22 | spark.sql(s"TRUNCATE TABLE $db.$tbl_dist")
23 | assert(spark.table(s"$db.$tbl_dist").count() === 0)
24 | }
25 | }
26 |
27 | test("delete from distribute table") {
28 | withSimpleDistTable("single_replica", "db_delete", "tbl_delete", true) { (_, db, tbl_dist, _) =>
29 | assert(spark.table(s"$db.$tbl_dist").count() === 4)
30 | spark.sql(s"DELETE FROM $db.$tbl_dist WHERE m = 1")
31 | assert(spark.table(s"$db.$tbl_dist").count() === 3)
32 | }
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterPartitionManagementSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse.cluster
16 |
17 | import org.apache.spark.sql.Row
18 |
19 | class ClusterPartitionManagementSuite extends SparkClickHouseClusterTest {
20 |
21 | test("distribute table partition") {
22 | withSimpleDistTable("single_replica", "db_part", "tbl_part", true) { (_, db, tbl_dist, _) =>
23 | checkAnswer(
24 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist"),
25 | Seq(Row("m=1"), Row("m=2"), Row("m=3"), Row("m=4"))
26 | )
27 | checkAnswer(
28 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist PARTITION(m = 2)"),
29 | Seq(Row("m=2"))
30 | )
31 | spark.sql(s"ALTER TABLE $db.$tbl_dist DROP PARTITION(m = 2)")
32 | checkAnswer(
33 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist"),
34 | Seq(Row("m=1"), Row("m=3"), Row("m=4"))
35 | )
36 | }
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterTableManagementSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse.cluster
16 |
17 | class ClusterTableManagementSuite extends SparkClickHouseClusterTest {
18 |
19 | test("create or replace distribute table") {
20 | autoCleanupDistTable("single_replica", "db_cor", "tbl_cor_dist") { (cluster, db, _, tbl_local) =>
21 | def createLocalTable(): Unit = spark.sql(
22 | s"""CREATE TABLE $db.$tbl_local (
23 | | id Long NOT NULL
24 | |) USING ClickHouse
25 | |TBLPROPERTIES (
26 | | cluster = '$cluster',
27 | | engine = 'MergeTree()',
28 | | order_by = 'id',
29 | | settings.index_granularity = 8192
30 | |)
31 | |""".stripMargin
32 | )
33 |
34 | def createOrReplaceLocalTable(): Unit = spark.sql(
35 | s"""CREATE OR REPLACE TABLE `$db`.`$tbl_local` (
36 | | id Long NOT NULL
37 | |) USING ClickHouse
38 | |TBLPROPERTIES (
39 | | engine = 'MergeTree()',
40 | | order_by = 'id',
41 | | settings.index_granularity = 8192
42 | |)
43 | |""".stripMargin
44 | )
45 | createLocalTable()
46 | createOrReplaceLocalTable()
47 | createOrReplaceLocalTable()
48 | }
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/ClickHouseTableDDLSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse.single
16 |
17 | import com.clickhouse.spark.base.{ClickHouseCloudMixIn, ClickHouseSingleMixIn}
18 | import org.apache.spark.sql.Row
19 | import org.scalatest.tags.Cloud
20 |
21 | @Cloud
22 | class ClickHouseCloudTableDDLSuite extends ClickHouseTableDDLSuite with ClickHouseCloudMixIn
23 |
24 | class ClickHouseSingleTableDDLSuite extends ClickHouseTableDDLSuite with ClickHouseSingleMixIn
25 |
26 | abstract class ClickHouseTableDDLSuite extends SparkClickHouseSingleTest {
27 |
28 | import testImplicits._
29 |
30 | test("clickhouse command runner") {
31 | withTable("default.abc") {
32 | runClickHouseSQL("CREATE TABLE default.abc(a UInt8) ENGINE=Memory()")
33 | checkAnswer(
34 | spark.sql("""DESC default.abc""").select($"col_name", $"data_type").limit(1),
35 | Row("a", "smallint") :: Nil
36 | )
37 | }
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark-runtime/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/spark-3.5/clickhouse-spark-runtime/.gitkeep
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/ClickHouseCommandRunner.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark
16 |
17 | import com.clickhouse.spark.client.NodeClient
18 | import org.apache.spark.sql.connector.ExternalCommandRunner
19 | import org.apache.spark.sql.util.CaseInsensitiveStringMap
20 |
21 | class ClickHouseCommandRunner extends ExternalCommandRunner with ClickHouseHelper {
22 |
23 | override def executeCommand(sql: String, options: CaseInsensitiveStringMap): Array[String] =
24 | Utils.tryWithResource(client.NodeClient(buildNodeSpec(options))) { nodeClient =>
25 | nodeClient.syncQueryAndCheckOutputJSONEachRow(sql).records.map(_.toString).toArray
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/CommitMessage.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark
16 |
17 | import org.apache.spark.sql.connector.write.WriterCommitMessage
18 |
19 | case class CommitMessage(msg: String = "") extends WriterCommitMessage
20 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/Constants.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark
16 |
17 | import com.clickhouse.client.config.ClickHouseClientOption._
18 |
19 | object Constants {
20 | // format: off
21 | //////////////////////////////////////////////////////////
22 | //////// clickhouse datasource catalog properties ////////
23 | //////////////////////////////////////////////////////////
24 | final val CATALOG_PROP_HOST = "host"
25 | final val CATALOG_PROP_TCP_PORT = "tcp_port"
26 | final val CATALOG_PROP_HTTP_PORT = "http_port"
27 | final val CATALOG_PROP_PROTOCOL = "protocol"
28 | final val CATALOG_PROP_USER = "user"
29 | final val CATALOG_PROP_PASSWORD = "password"
30 | final val CATALOG_PROP_DATABASE = "database"
31 | final val CATALOG_PROP_TZ = "timezone" // server(default), client, UTC+3, Asia/Shanghai, etc.
32 | final val CATALOG_PROP_OPTION_PREFIX = "option."
33 | final val CATALOG_PROP_IGNORE_OPTIONS = Seq(
34 | DATABASE.getKey, COMPRESS.getKey, DECOMPRESS.getKey, FORMAT.getKey, RETRY.getKey,
35 | USE_SERVER_TIME_ZONE.getKey, USE_SERVER_TIME_ZONE_FOR_DATES.getKey, SERVER_TIME_ZONE.getKey, USE_TIME_ZONE.getKey)
36 |
37 | //////////////////////////////////////////////////////////
38 | ////////// clickhouse datasource read properties /////////
39 | //////////////////////////////////////////////////////////
40 |
41 | //////////////////////////////////////////////////////////
42 | ///////// clickhouse datasource write properties /////////
43 | //////////////////////////////////////////////////////////
44 | // format: on
45 | }
46 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/func/CityHash64.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.func
16 |
17 | import com.clickhouse.spark.hash
18 |
19 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L694
20 | object CityHash64 extends MultiStringArgsHash {
21 |
22 | override protected def funcName: String = "clickhouse_cityHash64"
23 |
24 | override val ckFuncNames: Array[String] = Array("cityHash64")
25 |
26 | override def applyHash(input: Array[Any]): Long = hash.CityHash64(input)
27 | }
28 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/func/MurmurHash2.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.func
16 |
17 | import com.clickhouse.spark.hash.{HashUtils, Murmurhash2_32, Murmurhash2_64}
18 | import com.clickhouse.spark.hash
19 |
20 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L460
21 | object MurmurHash2_64 extends MultiStringArgsHash {
22 |
23 | override protected def funcName: String = "clickhouse_murmurHash2_64"
24 |
25 | override val ckFuncNames: Array[String] = Array("murmurHash2_64")
26 |
27 | override def applyHash(input: Array[Any]): Long = Murmurhash2_64(input)
28 | }
29 |
30 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L519
31 | object MurmurHash2_32 extends MultiStringArgsHash {
32 |
33 | override protected def funcName: String = "clickhouse_murmurHash2_32"
34 |
35 | override val ckFuncNames: Array[String] = Array("murmurHash2_32")
36 |
37 | override def applyHash(input: Array[Any]): Long = HashUtils.toUInt32(Murmurhash2_32(input))
38 | }
39 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/func/MurmurHash3.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.func
16 |
17 | import com.clickhouse.spark.hash.{HashUtils, Murmurhash3_32, Murmurhash3_64}
18 | import com.clickhouse.spark.hash
19 |
20 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L543
21 | object MurmurHash3_64 extends MultiStringArgsHash {
22 |
23 | override protected def funcName: String = "clickhouse_murmurHash3_64"
24 |
25 | override val ckFuncNames: Array[String] = Array("murmurHash3_64")
26 |
27 | override def applyHash(input: Array[Any]): Long = Murmurhash3_64(input)
28 | }
29 |
30 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L519
31 | object MurmurHash3_32 extends MultiStringArgsHash {
32 |
33 | override protected def funcName: String = "clickhouse_murmurHash3_32"
34 |
35 | override val ckFuncNames: Array[String] = Array("murmurHash3_32")
36 |
37 | override def applyHash(input: Array[Any]): Long = HashUtils.toUInt32(Murmurhash3_32(input))
38 | }
39 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/read/ClickHouseMetadataColumn.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.read
16 |
17 | import org.apache.spark.sql.connector.catalog.MetadataColumn
18 | import org.apache.spark.sql.types.{DataType, DoubleType, IntegerType, LongType, StringType, StructField}
19 |
20 | object ClickHouseMetadataColumn {
21 | val mergeTreeMetadataCols: Array[MetadataColumn] = Array(
22 | ClickHouseMetadataColumn("_part", StringType),
23 | ClickHouseMetadataColumn("_part_index", LongType),
24 | ClickHouseMetadataColumn("_part_uuid", StringType),
25 | ClickHouseMetadataColumn("_partition_id", StringType),
26 | // ClickHouseMetadataColumn("_partition_value", StringType),
27 | ClickHouseMetadataColumn("_sample_factor", DoubleType)
28 | )
29 |
30 | val distributeMetadataCols: Array[MetadataColumn] = Array(
31 | ClickHouseMetadataColumn("_table", StringType),
32 | ClickHouseMetadataColumn("_part", StringType),
33 | ClickHouseMetadataColumn("_part_index", LongType),
34 | ClickHouseMetadataColumn("_part_uuid", StringType),
35 | ClickHouseMetadataColumn("_partition_id", StringType),
36 | ClickHouseMetadataColumn("_sample_factor", DoubleType),
37 | ClickHouseMetadataColumn("_shard_num", IntegerType)
38 | )
39 | }
40 |
41 | case class ClickHouseMetadataColumn(
42 | override val name: String,
43 | override val dataType: DataType,
44 | override val isNullable: Boolean = false
45 | ) extends MetadataColumn {
46 | def toStructField: StructField = StructField(name, dataType, isNullable)
47 | }
48 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/read/ScanJobDescription.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.read
16 |
17 | import com.clickhouse.spark.spec.{ClusterSpec, DistributedEngineSpec, NodeSpec, TableEngineSpec, TableSpec}
18 | import org.apache.spark.sql.clickhouse.ReadOptions
19 | import org.apache.spark.sql.types.StructType
20 |
21 | import java.time.ZoneId
22 |
23 | case class ScanJobDescription(
24 | node: NodeSpec,
25 | tz: ZoneId,
26 | tableSpec: TableSpec,
27 | tableEngineSpec: TableEngineSpec,
28 | cluster: Option[ClusterSpec],
29 | localTableSpec: Option[TableSpec],
30 | localTableEngineSpec: Option[TableEngineSpec],
31 | readOptions: ReadOptions,
32 | // Below fields will be constructed in ScanBuilder.
33 | readSchema: StructType = new StructType,
34 | // We should pass compiled ClickHouse SQL snippets(or ClickHouse SQL AST data structure) instead of Spark Expression
35 | // into Scan tasks because the check happens in planing phase on driver side.
36 | filtersExpr: String = "1=1",
37 | groupByClause: Option[String] = None,
38 | limit: Option[Int] = None
39 | ) {
40 |
41 | def database: String = tableEngineSpec match {
42 | case dist: DistributedEngineSpec if readOptions.convertDistributedToLocal => dist.local_db
43 | case _ => tableSpec.database
44 | }
45 |
46 | def table: String = tableEngineSpec match {
47 | case dist: DistributedEngineSpec if readOptions.convertDistributedToLocal => dist.local_table
48 | case _ => tableSpec.name
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/write/format/ClickHouseJsonEachRowWriter.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under th e License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package com.clickhouse.spark.write.format
16 |
17 | import com.clickhouse.spark.write.{ClickHouseWriter, WriteJobDescription}
18 | import org.apache.commons.io.IOUtils
19 | import org.apache.spark.sql.catalyst.InternalRow
20 | import org.apache.spark.sql.clickhouse.JsonWriter
21 |
22 | class ClickHouseJsonEachRowWriter(writeJob: WriteJobDescription) extends ClickHouseWriter(writeJob) {
23 |
24 | override def format: String = "JSONEachRow"
25 |
26 | val jsonWriter: JsonWriter = new JsonWriter(revisedDataSchema, writeJob.tz, output)
27 |
28 | override def writeRow(record: InternalRow): Unit = jsonWriter.write(record)
29 |
30 | override def doSerialize(): Array[Byte] = {
31 | jsonWriter.flush()
32 | output.close()
33 | serializedBuffer.toByteArray
34 | }
35 |
36 | override def close(): Unit = {
37 | IOUtils.closeQuietly(jsonWriter)
38 | super.close()
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark/src/main/scala/org/apache/spark/sql/clickhouse/JsonWriter.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse
16 |
17 | import org.apache.spark.sql.catalyst.InternalRow
18 | import org.apache.spark.sql.catalyst.json.{JSONOptions, JacksonGenerator}
19 | import org.apache.spark.sql.types.StructType
20 |
21 | import java.io.{Closeable, Flushable, OutputStream, OutputStreamWriter}
22 | import java.nio.charset.StandardCharsets
23 | import java.time.ZoneId
24 |
25 | class JsonWriter(schema: StructType, tz: ZoneId, output: OutputStream) extends Closeable with Flushable {
26 | private val option: Map[String, String] = Map(
27 | "timestampFormat" -> "yyyy-MM-dd HH:mm:ss",
28 | "timestampNTZFormat" -> "yyyy-MM-dd HH:mm:ss"
29 | )
30 | private val utf8Writer = new OutputStreamWriter(output, StandardCharsets.UTF_8)
31 | private val jsonWriter = new JacksonGenerator(schema, utf8Writer, new JSONOptions(option, tz.getId))
32 |
33 | def write(row: InternalRow): Unit = {
34 | jsonWriter.write(row)
35 | jsonWriter.writeLineEnding()
36 | }
37 |
38 | override def flush(): Unit = jsonWriter.flush()
39 |
40 | override def close(): Unit = jsonWriter.close()
41 | }
42 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark/src/main/scala/org/apache/spark/sql/clickhouse/SparkUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse
16 |
17 | import org.apache.arrow.memory.BufferAllocator
18 | import org.apache.arrow.vector.types.pojo.Schema
19 | import org.apache.spark._
20 | import org.apache.spark.sql.types.StructType
21 | import org.apache.spark.sql.util.ArrowUtils
22 | import org.apache.spark.util.VersionUtils
23 |
24 | object SparkUtils {
25 |
26 | lazy val MAJOR_MINOR_VERSION: (Int, Int) = VersionUtils.majorMinorVersion(SPARK_VERSION)
27 |
28 | def toArrowSchema(schema: StructType, timeZoneId: String): Schema = ArrowUtils.toArrowSchema(schema, timeZoneId, true)
29 |
30 | def spawnArrowAllocator(name: String): BufferAllocator =
31 | ArrowUtils.rootAllocator.newChildAllocator(name, 0, Long.MaxValue)
32 | }
33 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark/src/test/resources/log4j2.xml:
--------------------------------------------------------------------------------
1 |
2 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/spark-3.5/clickhouse-spark/src/test/scala/org/apache/spark/sql/clickhouse/ClickHouseHelperSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * https://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.apache.spark.sql.clickhouse
16 |
17 | import com.clickhouse.spark.ClickHouseHelper
18 | import org.apache.spark.sql.util.CaseInsensitiveStringMap
19 | import org.scalatest.funsuite.AnyFunSuite
20 |
21 | import scala.collection.JavaConverters._
22 |
23 | class ClickHouseHelperSuite extends AnyFunSuite with ClickHouseHelper {
24 |
25 | test("buildNodeSpec") {
26 | val nodeSpec = buildNodeSpec(
27 | new CaseInsensitiveStringMap(Map(
28 | "database" -> "testing",
29 | "option.database" -> "production",
30 | "option.ssl" -> "true"
31 | ).asJava)
32 | )
33 | assert(nodeSpec.database === "testing")
34 | assert(nodeSpec.options.get("ssl") === "true")
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/version.txt:
--------------------------------------------------------------------------------
1 | 0.9.0-SNAPSHOT
2 |
--------------------------------------------------------------------------------