├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── dependabot.yml ├── pull_request_template.md └── workflows │ ├── build-and-test.yml │ ├── check-license.yml │ ├── cloud.yml │ ├── publish-release.yml │ ├── publish-snapshot.yml │ ├── sonar.yml │ ├── style.yml │ └── tpcds.yml ├── .gitignore ├── .scalafmt.conf ├── LICENSE ├── NOTICE ├── README.md ├── build.gradle ├── clickhouse-core-it └── src │ └── test │ ├── resources │ └── log4j.properties │ └── scala │ └── com │ └── clickhouse │ └── spark │ ├── HashSuite.scala │ └── UtilsSuite.scala ├── clickhouse-core └── src │ ├── main │ ├── antlr │ │ └── com.clickhouse │ │ │ └── ClickHouseSQL.g4 │ ├── java │ │ └── com │ │ │ └── clickhouse │ │ │ └── spark │ │ │ ├── exception │ │ │ └── ClickHouseErrCode.java │ │ │ └── hash │ │ │ └── cityhash │ │ │ ├── CityHash_v1_0_2.java │ │ │ └── UInt128.java │ └── scala │ │ └── com │ │ └── clickhouse │ │ └── spark │ │ ├── JsonProtocol.scala │ │ ├── Logging.scala │ │ ├── Utils.scala │ │ ├── client │ │ ├── ClusterClient.scala │ │ ├── NodeClient.scala │ │ └── NodesClient.scala │ │ ├── exception │ │ └── CHException.scala │ │ ├── expr │ │ └── Expressions.scala │ │ ├── format │ │ ├── InputFormat.scala │ │ ├── JSONOutputFormat.scala │ │ └── OutputFormat.scala │ │ ├── hash │ │ ├── CityHash64.scala │ │ ├── HashFunc.scala │ │ ├── HashUtils.scala │ │ ├── Murmurhash2_32.scala │ │ ├── Murmurhash2_64.scala │ │ ├── Murmurhash3_32.scala │ │ └── Murmurhash3_64.scala │ │ ├── io │ │ ├── ForwardingOutputStream.scala │ │ ├── ForwardingWriter.scala │ │ └── ObservableOutputStream.scala │ │ ├── parse │ │ ├── AstVisitor.scala │ │ ├── ParseUtils.scala │ │ └── SQLParser.scala │ │ └── spec │ │ ├── DatabaseSpec.scala │ │ ├── NodeSpec.scala │ │ ├── PartitionSpec.scala │ │ ├── ShardUtils.scala │ │ ├── TableEngineSpec.scala │ │ ├── TableEngineUtils.scala │ │ └── TableSpec.scala │ ├── test │ ├── resources │ │ └── log4j.properties │ └── scala │ │ └── com │ │ └── clickhouse │ │ └── spark │ │ ├── UtilsSuite.scala │ │ ├── parse │ │ └── SQLParserSuite.scala │ │ └── spec │ │ ├── NodeSpecHelper.scala │ │ ├── NodeSpecSuite.scala │ │ └── ShardUtilsSuite.scala │ └── testFixtures │ ├── conf │ ├── clickhouse-cluster │ │ ├── .env │ │ ├── clickhouse-s2r2-compose.yml │ │ ├── config.xml │ │ ├── remote_servers.xml │ │ ├── s1r1 │ │ │ ├── interserver_http_host.xml │ │ │ └── macros.xml │ │ ├── s1r2 │ │ │ ├── interserver_http_host.xml │ │ │ └── macros.xml │ │ ├── s2r1 │ │ │ ├── interserver_http_host.xml │ │ │ └── macros.xml │ │ ├── s2r2 │ │ │ ├── interserver_http_host.xml │ │ │ └── macros.xml │ │ ├── users.xml │ │ └── zookeeper.xml │ └── clickhouse-single │ │ └── users.xml │ ├── java │ └── org │ │ └── scalatest │ │ └── tags │ │ └── Cloud.java │ └── scala │ └── com │ └── clickhouse │ └── spark │ └── base │ ├── ClickHouseCloudMixIn.scala │ ├── ClickHouseClusterMixIn.scala │ ├── ClickHouseProvider.scala │ └── ClickHouseSingleMixIn.scala ├── deploy.gradle ├── dev ├── backport └── reformat ├── docker ├── .env ├── .env-dev ├── README.md ├── build-image.sh ├── compose-dev.yml ├── compose.yml ├── conf │ ├── cloudbeaver-conf │ │ ├── README.md │ │ ├── cloudbeaver.conf │ │ ├── initial-data-sources.conf │ │ ├── logback.xml │ │ └── product.conf │ ├── core-site.xml │ ├── hive-site.xml │ ├── kyuubi-defaults.conf │ └── spark-defaults.conf ├── image │ ├── scc-base.Dockerfile │ ├── scc-hadoop.Dockerfile │ ├── scc-kyuubi.Dockerfile │ ├── scc-metastore.Dockerfile │ └── scc-spark.Dockerfile └── script │ ├── hive-schema-2.3.0.postgres.sql │ └── hive-txn-schema-2.3.0.postgres.sql ├── docs ├── best_practices │ ├── 01_deployment.md │ └── index.md ├── configurations │ ├── 01_catalog_configurations.md │ ├── 02_sql_configurations.md │ └── index.md ├── developers │ ├── 01_build_and_test.md │ ├── 02_docs_and_website.md │ ├── 03_private_release.md │ ├── 04_public_release.md │ └── index.md ├── imgs │ ├── scc_overview.drawio │ ├── scc_overview.drawio.png │ ├── scc_read_bucket_join.drawio │ ├── scc_read_bucket_join.drawio.png │ ├── scc_read_pushdown_disable.drawio │ ├── scc_read_pushdown_disable.drawio.png │ ├── scc_read_pushdown_enable.drawio │ ├── scc_read_pushdown_enable.drawio.png │ ├── scc_read_sort_merge_join.drawio │ ├── scc_read_sort_merge_join.drawio.png │ ├── scc_write_rebalance_sort.drawio │ ├── scc_write_rebalance_sort.drawio.png │ ├── spark_centralized_metastore.drawio │ ├── spark_centralized_metastore.drawio.png │ ├── spark_multi_catalog.drawio │ └── spark_multi_catalog.drawio.png ├── index.md ├── internals │ ├── 01_catalog.md │ ├── 02_read.md │ ├── 03_write.md │ └── index.md └── quick_start │ ├── 01_get_the_library.md │ ├── 02_play_with_spark_sql.md │ └── 03_play_with_spark_shell.md ├── examples └── scala │ ├── README.md │ └── spark-3.5 │ ├── .bsp │ └── sbt.json │ ├── build.sbt │ ├── project │ └── build.properties │ └── src │ └── main │ └── scala │ └── Saprk-3.5.scala ├── gradle.properties ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── settings.gradle ├── spark-3.3 ├── build.gradle ├── clickhouse-spark-it │ └── src │ │ └── test │ │ ├── resources │ │ └── log4j2.xml │ │ └── scala │ │ └── org │ │ └── apache │ │ └── spark │ │ └── sql │ │ └── clickhouse │ │ ├── SparkTest.scala │ │ ├── TPCDSTestUtils.scala │ │ ├── TestUtils.scala │ │ ├── cluster │ │ ├── BaseClusterWriteSuite.scala │ │ ├── ClickHouseClusterReadSuite.scala │ │ ├── ClickHouseClusterUDFSuite.scala │ │ ├── ClusterDeleteSuite.scala │ │ ├── ClusterPartitionManagementSuite.scala │ │ ├── ClusterShardByRandSuite.scala │ │ ├── ClusterTableManagementSuite.scala │ │ ├── SparkClickHouseClusterTest.scala │ │ └── TPCDSClusterSuite.scala │ │ └── single │ │ ├── ClickHouseDataTypeSuite.scala │ │ ├── ClickHouseGenericSuite.scala │ │ ├── ClickHouseTableDDLSuite.scala │ │ ├── SparkClickHouseSingleTest.scala │ │ ├── TPCDSSuite.scala │ │ └── WriteDistributionAndOrderingSuite.scala ├── clickhouse-spark-runtime │ └── .gitkeep └── clickhouse-spark │ └── src │ ├── main │ └── scala │ │ ├── com │ │ └── clickhouse │ │ │ └── spark │ │ │ ├── ClickHouseCatalog.scala │ │ │ ├── ClickHouseCommandRunner.scala │ │ │ ├── ClickHouseHelper.scala │ │ │ ├── ClickHouseTable.scala │ │ │ ├── CommitMessage.scala │ │ │ ├── Constants.scala │ │ │ ├── Metrics.scala │ │ │ ├── SQLHelper.scala │ │ │ ├── func │ │ │ ├── ClickHouseXxHash64.scala │ │ │ └── FunctionRegistry.scala │ │ │ ├── read │ │ │ ├── ClickHouseMetadataColumn.scala │ │ │ ├── ClickHouseRead.scala │ │ │ ├── ClickHouseReader.scala │ │ │ ├── InputPartitions.scala │ │ │ ├── ScanJobDescription.scala │ │ │ └── format │ │ │ │ ├── ClickHouseBinaryReader.scala │ │ │ │ └── ClickHouseJsonReader.scala │ │ │ └── write │ │ │ ├── ClickHouseWrite.scala │ │ │ ├── ClickHouseWriter.scala │ │ │ ├── WriteJobDescription.scala │ │ │ └── format │ │ │ ├── ClickHouseArrowStreamWriter.scala │ │ │ └── ClickHouseJsonEachRowWriter.scala │ │ └── org │ │ └── apache │ │ └── spark │ │ └── sql │ │ └── clickhouse │ │ ├── ClickHouseSQLConf.scala │ │ ├── ExprUtils.scala │ │ ├── JsonWriter.scala │ │ ├── SchemaUtils.scala │ │ ├── SparkOptions.scala │ │ └── SparkUtils.scala │ └── test │ ├── resources │ └── log4j2.xml │ └── scala │ └── org │ └── apache │ └── spark │ └── sql │ └── clickhouse │ ├── ClickHouseHelperSuite.scala │ └── SchemaUtilsSuite.scala ├── spark-3.4 ├── build.gradle ├── clickhouse-spark-it │ └── src │ │ └── test │ │ ├── resources │ │ └── log4j2.xml │ │ └── scala │ │ └── org │ │ └── apache │ │ └── spark │ │ └── sql │ │ └── clickhouse │ │ ├── SparkTest.scala │ │ ├── TPCDSTestUtils.scala │ │ ├── TestUtils.scala │ │ ├── cluster │ │ ├── BaseClusterWriteSuite.scala │ │ ├── ClickHouseClusterHashUDFSuite.scala │ │ ├── ClickHouseClusterReadSuite.scala │ │ ├── ClusterDeleteSuite.scala │ │ ├── ClusterPartitionManagementSuite.scala │ │ ├── ClusterShardByRandSuite.scala │ │ ├── ClusterTableManagementSuite.scala │ │ ├── SparkClickHouseClusterTest.scala │ │ └── TPCDSClusterSuite.scala │ │ └── single │ │ ├── ClickHouseDataTypeSuite.scala │ │ ├── ClickHouseGenericSuite.scala │ │ ├── ClickHouseTableDDLSuite.scala │ │ ├── SparkClickHouseSingleTest.scala │ │ ├── TPCDSSuite.scala │ │ └── WriteDistributionAndOrderingSuite.scala ├── clickhouse-spark-runtime │ └── .gitkeep └── clickhouse-spark │ └── src │ ├── main │ └── scala │ │ ├── com │ │ └── clickhouse │ │ │ └── spark │ │ │ ├── ClickHouseCatalog.scala │ │ │ ├── ClickHouseCommandRunner.scala │ │ │ ├── ClickHouseHelper.scala │ │ │ ├── ClickHouseTable.scala │ │ │ ├── CommitMessage.scala │ │ │ ├── Constants.scala │ │ │ ├── Metrics.scala │ │ │ ├── SQLHelper.scala │ │ │ ├── func │ │ │ ├── CityHash64.scala │ │ │ ├── FunctionRegistry.scala │ │ │ ├── MultiStringArgsHash.scala │ │ │ ├── MurmurHash2.scala │ │ │ ├── MurmurHash3.scala │ │ │ └── XxHash64.scala │ │ │ ├── read │ │ │ ├── ClickHouseMetadataColumn.scala │ │ │ ├── ClickHouseRead.scala │ │ │ ├── ClickHouseReader.scala │ │ │ ├── InputPartitions.scala │ │ │ ├── ScanJobDescription.scala │ │ │ └── format │ │ │ │ ├── ClickHouseBinaryReader.scala │ │ │ │ └── ClickHouseJsonReader.scala │ │ │ └── write │ │ │ ├── ClickHouseWrite.scala │ │ │ ├── ClickHouseWriter.scala │ │ │ ├── WriteJobDescription.scala │ │ │ └── format │ │ │ ├── ClickHouseArrowStreamWriter.scala │ │ │ └── ClickHouseJsonEachRowWriter.scala │ │ └── org │ │ └── apache │ │ └── spark │ │ └── sql │ │ └── clickhouse │ │ ├── ClickHouseSQLConf.scala │ │ ├── ExprUtils.scala │ │ ├── JsonWriter.scala │ │ ├── SchemaUtils.scala │ │ ├── SparkOptions.scala │ │ └── SparkUtils.scala │ └── test │ ├── resources │ └── log4j2.xml │ └── scala │ └── org │ └── apache │ └── spark │ └── sql │ └── clickhouse │ ├── ClickHouseHelperSuite.scala │ ├── ConfigurationSuite.scala │ ├── FunctionRegistrySuite.scala │ └── SchemaUtilsSuite.scala ├── spark-3.5 ├── build.gradle ├── clickhouse-spark-it │ └── src │ │ └── test │ │ ├── resources │ │ └── log4j2.xml │ │ └── scala │ │ └── org │ │ └── apache │ │ └── spark │ │ └── sql │ │ └── clickhouse │ │ ├── SparkTest.scala │ │ ├── TPCDSTestUtils.scala │ │ ├── TestUtils.scala │ │ ├── cluster │ │ ├── BaseClusterWriteSuite.scala │ │ ├── ClickHouseClusterHashUDFSuite.scala │ │ ├── ClickHouseClusterReadSuite.scala │ │ ├── ClusterDeleteSuite.scala │ │ ├── ClusterPartitionManagementSuite.scala │ │ ├── ClusterShardByRandSuite.scala │ │ ├── ClusterTableManagementSuite.scala │ │ ├── SparkClickHouseClusterTest.scala │ │ └── TPCDSClusterSuite.scala │ │ └── single │ │ ├── ClickHouseDataTypeSuite.scala │ │ ├── ClickHouseGenericSuite.scala │ │ ├── ClickHouseTableDDLSuite.scala │ │ ├── SparkClickHouseSingleTest.scala │ │ ├── TPCDSSuite.scala │ │ └── WriteDistributionAndOrderingSuite.scala ├── clickhouse-spark-runtime │ └── .gitkeep └── clickhouse-spark │ └── src │ ├── main │ └── scala │ │ ├── com │ │ └── clickhouse │ │ │ └── spark │ │ │ ├── ClickHouseCatalog.scala │ │ │ ├── ClickHouseCommandRunner.scala │ │ │ ├── ClickHouseHelper.scala │ │ │ ├── ClickHouseTable.scala │ │ │ ├── CommitMessage.scala │ │ │ ├── Constants.scala │ │ │ ├── Metrics.scala │ │ │ ├── SQLHelper.scala │ │ │ ├── func │ │ │ ├── CityHash64.scala │ │ │ ├── FunctionRegistry.scala │ │ │ ├── MultiStringArgsHash.scala │ │ │ ├── MurmurHash2.scala │ │ │ ├── MurmurHash3.scala │ │ │ └── XxHash64.scala │ │ │ ├── read │ │ │ ├── ClickHouseMetadataColumn.scala │ │ │ ├── ClickHouseRead.scala │ │ │ ├── ClickHouseReader.scala │ │ │ ├── InputPartitions.scala │ │ │ ├── ScanJobDescription.scala │ │ │ └── format │ │ │ │ ├── ClickHouseBinaryReader.scala │ │ │ │ └── ClickHouseJsonReader.scala │ │ │ └── write │ │ │ ├── ClickHouseWrite.scala │ │ │ ├── ClickHouseWriter.scala │ │ │ ├── WriteJobDescription.scala │ │ │ └── format │ │ │ ├── ClickHouseArrowStreamWriter.scala │ │ │ └── ClickHouseJsonEachRowWriter.scala │ │ └── org │ │ └── apache │ │ └── spark │ │ └── sql │ │ └── clickhouse │ │ ├── ClickHouseSQLConf.scala │ │ ├── ExprUtils.scala │ │ ├── JsonWriter.scala │ │ ├── SchemaUtils.scala │ │ ├── SparkOptions.scala │ │ └── SparkUtils.scala │ └── test │ ├── resources │ └── log4j2.xml │ └── scala │ └── org │ └── apache │ └── spark │ └── sql │ └── clickhouse │ ├── ClickHouseHelperSuite.scala │ ├── ConfigurationSuite.scala │ ├── FunctionRegistrySuite.scala │ └── SchemaUtilsSuite.scala └── version.txt /.gitattributes: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # 14 | 15 | *.bat text eol=crlf 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | name: Bug report 4 | about: Create a report to help us improve 5 | title: '' 6 | labels: bug 7 | assignees: '' 8 | 9 | --- 10 | 11 | 12 | ### Describe the bug 13 | 14 | ### Steps to reproduce 15 | 1. 16 | 2. 17 | 3. 18 | 19 | ### Expected behaviour 20 | 21 | ### Error log 22 | 23 | ### Configuration 24 | #### Environment 25 | * Apache Spark version: 26 | * Scala version 27 | * Connector configuration configuration: 28 | * OS: 29 | 30 | #### ClickHouse server 31 | * ClickHouse Server version: 32 | * ClickHouse Server non-default settings, if any: 33 | * `CREATE TABLE` statements for tables involved: 34 | * Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary 35 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: What would you like to add to the project? 4 | title: '' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "gradle" 9 | directory: "/" 10 | schedule: 11 | interval: "monthly" 12 | labels: 13 | - "dependencies" 14 | open-pull-requests-limit: 5 -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Summary 2 | 3 | 4 | ## Checklist 5 | Delete items not relevant to your PR: 6 | - [ ] Unit and integration tests covering the common scenarios were added 7 | - [ ] A human-readable description of the changes was provided to include in CHANGELOG 8 | - [ ] For significant changes, documentation in https://github.com/ClickHouse/clickhouse-docs was updated with further explanations or tutorials -------------------------------------------------------------------------------- /.github/workflows/check-license.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # 14 | 15 | name: "Check License" 16 | on: 17 | push: 18 | branches: 19 | - "branch-*" 20 | - "main" 21 | pull_request: 22 | branches: 23 | - "branch-*" 24 | - "main" 25 | 26 | jobs: 27 | check-license: 28 | runs-on: ubuntu-22.04 29 | strategy: 30 | fail-fast: false 31 | matrix: 32 | spark: [ 3.3, 3.4, 3.5 ] 33 | steps: 34 | - uses: actions/checkout@v4 35 | - uses: actions/setup-java@v4 36 | with: 37 | distribution: zulu 38 | java-version: 8 39 | - run: >- 40 | ./gradlew rat --no-daemon 41 | -Dspark_binary_version=${{ matrix.spark }} 42 | -PmavenCentralMirror=https://maven-central.storage-download.googleapis.com/maven2/ 43 | -------------------------------------------------------------------------------- /.github/workflows/cloud.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # 14 | 15 | name: "ClickHouse Cloud" 16 | 17 | on: 18 | push: 19 | branches: 20 | - "branch-*" 21 | - "main" 22 | pull_request: 23 | branches: 24 | - "branch-*" 25 | - "main" 26 | 27 | jobs: 28 | run-tests-with-clickhouse-cloud: 29 | runs-on: ubuntu-22.04 30 | strategy: 31 | max-parallel: 1 32 | fail-fast: false 33 | matrix: 34 | spark: [ 3.3, 3.4, 3.5 ] 35 | scala: [ 2.12, 2.13 ] 36 | env: 37 | CLICKHOUSE_CLOUD_HOST: ${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_HOST_SMT }} 38 | CLICKHOUSE_CLOUD_PASSWORD: ${{ secrets.INTEGRATIONS_TEAM_TESTS_CLOUD_PASSWORD_SMT }} 39 | steps: 40 | - uses: actions/checkout@v4 41 | - uses: actions/setup-java@v4 42 | with: 43 | distribution: zulu 44 | java-version: 8 45 | cache: gradle 46 | - run: >- 47 | ./gradlew clean cloudTest --no-daemon --refresh-dependencies 48 | -Dspark_binary_version=${{ matrix.spark }} 49 | -Dscala_binary_version=${{ matrix.scala }} 50 | -PmavenCentralMirror=https://maven-central.storage-download.googleapis.com/maven2/ 51 | - name: Upload test logs 52 | if: failure() 53 | uses: actions/upload-artifact@v3 54 | with: 55 | name: log-clickhouse-cloud-spark-${{ matrix.spark }}-scala-${{ matrix.scala }} 56 | path: | 57 | **/build/unit-tests.log 58 | log/** 59 | -------------------------------------------------------------------------------- /.github/workflows/publish-snapshot.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # 14 | 15 | name: "Publish Snapshot" 16 | 17 | on: 18 | schedule: 19 | - cron: '0 0 * * *' 20 | 21 | jobs: 22 | publish-snapshot: 23 | if: ${{ startsWith(github.repository, 'clickhouse/') }} 24 | runs-on: ubuntu-22.04 25 | strategy: 26 | fail-fast: false 27 | matrix: 28 | branch: 29 | - "main" 30 | scala: [ 2.12, 2.13 ] 31 | spark: [ 3.3, 3.4, 3.5 ] 32 | steps: 33 | - uses: actions/checkout@v4 34 | with: 35 | ref: ${{ matrix.branch }} 36 | - uses: actions/setup-java@v4 37 | with: 38 | distribution: zulu 39 | java-version: 8 40 | cache: gradle 41 | - name: Publish Snapshot - ${{ matrix.branch }} 42 | run: >- 43 | ./gradlew clean publish --no-daemon --refresh-dependencies 44 | -Dscala_binary_version=${{ matrix.scala }} 45 | -Dspark_binary_version=${{ matrix.spark }} 46 | -PmavenUser='${{ secrets.NEXUS_USER }}' 47 | -PmavenPassword='${{ secrets.NEXUS_PW }}' 48 | -------------------------------------------------------------------------------- /.github/workflows/sonar.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # 14 | 15 | name: "SonarQube" 16 | 17 | on: 18 | schedule: 19 | - cron: '0 0 * * *' 20 | 21 | jobs: 22 | sonar-report: 23 | if: ${{ startsWith(github.repository, 'clickhouse/') }} 24 | runs-on: ubuntu-22.04 25 | steps: 26 | - uses: actions/checkout@v4 27 | - uses: actions/setup-java@v4 28 | with: 29 | distribution: zulu 30 | java-version: 8 31 | cache: gradle 32 | - run: >- 33 | ./gradlew sonarqube 34 | -Dsonar.projectKey=spark-clickhouse-connector 35 | -Dsonar.host.url=${{ secrets.SONAR_URL }} 36 | -Dsonar.login=${{ secrets.SONAR_TOKEN }} 37 | -PmavenCentralMirror=https://maven-central.storage-download.googleapis.com/maven2/ 38 | -------------------------------------------------------------------------------- /.github/workflows/style.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # 14 | 15 | name: "Check Style" 16 | 17 | on: 18 | push: 19 | branches: 20 | - "branch-*" 21 | - "main" 22 | pull_request: 23 | branches: 24 | - "branch-*" 25 | - "main" 26 | 27 | jobs: 28 | check-style: 29 | runs-on: ubuntu-22.04 30 | strategy: 31 | fail-fast: false 32 | matrix: 33 | spark: [ 3.3, 3.4, 3.5 ] 34 | steps: 35 | - uses: actions/checkout@v4 36 | - uses: actions/setup-java@v4 37 | with: 38 | distribution: zulu 39 | java-version: 8 40 | cache: gradle 41 | - run: >- 42 | ./gradlew spotlessCheck --no-daemon --refresh-dependencies 43 | -Dspark_binary_version=${{ matrix.spark }} 44 | -PmavenCentralMirror=https://maven-central.storage-download.googleapis.com/maven2/ 45 | -------------------------------------------------------------------------------- /.github/workflows/tpcds.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # 14 | 15 | name: "TPC-DS" 16 | 17 | on: 18 | push: 19 | branches: 20 | - "branch-*" 21 | - "main" 22 | pull_request: 23 | branches: 24 | - "branch-*" 25 | - "main" 26 | 27 | jobs: 28 | run-tpcds-sf1: 29 | runs-on: ubuntu-22.04 30 | strategy: 31 | fail-fast: false 32 | matrix: 33 | spark: [ 3.3, 3.4, 3.5 ] 34 | scala: [ 2.12, 2.13 ] 35 | steps: 36 | - uses: actions/checkout@v4 37 | - uses: actions/setup-java@v4 38 | with: 39 | distribution: zulu 40 | java-version: 8 41 | cache: gradle 42 | - run: >- 43 | ./gradlew clean slowTest --no-daemon --refresh-dependencies 44 | -Dspark_binary_version=${{ matrix.spark }} 45 | -Dscala_binary_version=${{ matrix.scala }} 46 | -PmavenCentralMirror=https://maven-central.storage-download.googleapis.com/maven2/ 47 | - name: Upload test logs 48 | if: failure() 49 | uses: actions/upload-artifact@v3 50 | with: 51 | name: log-tpcds-spark-${{ matrix.spark }}-scala-${{ matrix.scala }} 52 | path: | 53 | **/build/unit-tests.log 54 | log/** 55 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | .DS_Store 3 | .cache 4 | tmp/ 5 | 6 | # intellij files 7 | .idea 8 | .idea_modules/ 9 | *.ipr 10 | *.iws 11 | *.iml 12 | 13 | # gradle build 14 | .gradle 15 | .out/ 16 | build 17 | dependencies.lock 18 | **/dependencies.lock 19 | 20 | # rat library install location 21 | lib/ 22 | 23 | # web site build 24 | site/ 25 | 26 | __pycache__/ 27 | *.py[cod] 28 | .eggs/ 29 | .tox/ 30 | env/ 31 | venv/ 32 | *.egg-info/ 33 | test-reports 34 | build/ 35 | dist/ 36 | sdist/ 37 | .coverage 38 | coverage.xml 39 | .pytest_cache/ 40 | .python-version 41 | 42 | # vscode/eclipse files 43 | .classpath 44 | .project 45 | .settings 46 | bin/ 47 | 48 | # Hive/metastore files 49 | metastore_db/ 50 | 51 | # Spark/metastore files 52 | spark-warehouse/ 53 | derby.log 54 | 55 | # Python stuff 56 | python/.mypy_cache/ 57 | 58 | *.patch 59 | **/*.drawio.bkp 60 | 61 | log/ 62 | 63 | # sbt 64 | dist/* 65 | target/ 66 | lib_managed/ 67 | src_managed/ 68 | project/boot/ 69 | project/plugins/project/ 70 | .history 71 | .cache 72 | .lib/ 73 | -------------------------------------------------------------------------------- /.scalafmt.conf: -------------------------------------------------------------------------------- 1 | // Licensed under the Apache License, Version 2.0 (the "License"); 2 | // you may not use this file except in compliance with the License. 3 | // You may obtain a copy of the License at 4 | // 5 | // https://www.apache.org/licenses/LICENSE-2.0 6 | // 7 | // Unless required by applicable law or agreed to in writing, software 8 | // distributed under the License is distributed on an "AS IS" BASIS, 9 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | // See the License for the specific language governing permissions and 11 | // limitations under the License. 12 | 13 | version=3.6.1 14 | runner.dialect=scala212 15 | project.git=true 16 | 17 | align.preset=none 18 | align.stripMargin=true 19 | docstrings.style=keep 20 | maxColumn=120 21 | newlines.source=keep 22 | continuationIndent.defnSite=2 23 | danglingParentheses.callSite=true 24 | assumeStandardLibraryStripMargin=true 25 | rewrite.rules=[SortImports, RedundantBraces, RedundantParens, SortModifiers] 26 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | 2 | Spark ClickHouse Connector 3 | Was donated from The HousePower Organization (https://github.com/housepower) -------------------------------------------------------------------------------- /clickhouse-core-it/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. See accompanying LICENSE file. 13 | # 14 | 15 | log4j.rootLogger=INFO, file 16 | 17 | log4j.appender.console=org.apache.log4j.ConsoleAppender 18 | log4j.appender.console.target=System.out 19 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss.SSS} %p %c: %m%n 21 | 22 | log4j.appender.file=org.apache.log4j.FileAppender 23 | log4j.appender.file.append=true 24 | log4j.appender.file.file=build/unit-tests.log 25 | log4j.appender.file.layout=org.apache.log4j.PatternLayout 26 | log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n 27 | 28 | log4j.logger.org.apache.hadoop.util.Shell=ERROR 29 | log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR 30 | log4j.logger.com.clickhouse.spark=DEBUG 31 | -------------------------------------------------------------------------------- /clickhouse-core-it/src/test/scala/com/clickhouse/spark/UtilsSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark 16 | 17 | import com.clickhouse.spark.base.{ClickHouseCloudMixIn, ClickHouseProvider, ClickHouseSingleMixIn} 18 | import org.scalatest.funsuite.AnyFunSuite 19 | import org.scalatest.tags.Cloud 20 | 21 | import java.time.{LocalDateTime, ZoneId} 22 | 23 | @Cloud 24 | class ClickHouseCloudUtilsSuite extends UtilsSuite with ClickHouseCloudMixIn 25 | 26 | class ClickHouseSingleUtilsSuite extends UtilsSuite with ClickHouseSingleMixIn 27 | 28 | abstract class UtilsSuite extends AnyFunSuite with ClickHouseProvider with Logging { 29 | 30 | test("parse date with nano seconds") { 31 | withNodeClient() { client => 32 | val tz = ZoneId.systemDefault() 33 | val sql = s"SELECT toDateTime64('2023-03-29 15:25:25.977654', 3, '$tz')" 34 | val output = client.syncQueryAndCheckOutputJSONCompactEachRowWithNamesAndTypes(sql) 35 | assert(output.rows === 1L) 36 | val row = output.records.head 37 | assert(row.length === 1L) 38 | val actual = LocalDateTime.parse(row.head.asText, Utils.dateTimeFmt) 39 | val expected = LocalDateTime.of(2023, 3, 29, 15, 25, 25, 977000000) 40 | assert(actual === expected) 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/java/com/clickhouse/spark/hash/cityhash/UInt128.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.hash.cityhash; 16 | 17 | /** 18 | * copied from https://github.com/dpoluyanov/achord/blob/master/src/main/java/com/github/mangelion/achord/UInt128.java 19 | */ 20 | final public class UInt128 { 21 | final public long first; 22 | final public long second; 23 | 24 | public UInt128(long first, long second) { 25 | this.first = first; 26 | this.second = second; 27 | } 28 | 29 | static UInt128 of(long first, long second) { 30 | return new UInt128(first, second); 31 | } 32 | } -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/JsonProtocol.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark 16 | 17 | import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature} 18 | import com.fasterxml.jackson.module.scala.ClassTagExtensions 19 | 20 | trait ToJson { 21 | 22 | def toJson: String = JsonProtocol.toJson(this) 23 | 24 | override def toString: String = toJson 25 | } 26 | 27 | trait FromJson[T] { 28 | def fromJson(json: String): T 29 | } 30 | 31 | trait JsonProtocol[T] extends FromJson[T] with ToJson 32 | 33 | object JsonProtocol { 34 | 35 | @transient lazy val om: ObjectMapper with ClassTagExtensions = { 36 | val _om = new ObjectMapper() with ClassTagExtensions 37 | _om.findAndRegisterModules() 38 | _om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) 39 | _om 40 | } 41 | 42 | def toJson(value: Any): String = om.writeValueAsString(value) 43 | 44 | def toPrettyJson(value: Any): String = om.writer(SerializationFeature.INDENT_OUTPUT).writeValueAsString(value) 45 | } 46 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/Logging.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark 16 | 17 | import org.slf4j.{Logger, LoggerFactory} 18 | 19 | trait Logging { 20 | 21 | @transient lazy val log: Logger = LoggerFactory.getLogger(logName) 22 | 23 | protected def logName: String = this.getClass.getName.stripSuffix("$") 24 | } 25 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/client/NodesClient.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.client 16 | 17 | import com.clickhouse.spark.Logging 18 | import com.clickhouse.spark.spec.{NodeSpec, Nodes} 19 | 20 | import java.util.concurrent.ConcurrentHashMap 21 | import scala.collection.JavaConverters._ 22 | import scala.util.Random.shuffle 23 | 24 | object NodesClient { 25 | def apply(nodes: Nodes) = new NodesClient(nodes) 26 | } 27 | 28 | class NodesClient(nodes: Nodes) extends AutoCloseable with Logging { 29 | assert(nodes.nodes.nonEmpty) 30 | 31 | @transient lazy val cache = new ConcurrentHashMap[NodeSpec, NodeClient] 32 | 33 | def node: NodeClient = { 34 | 35 | val nodeSpec = shuffle(nodes.nodes.toSeq).head 36 | cache.computeIfAbsent( 37 | nodeSpec, 38 | { nodeSpec => 39 | log.info(s"Create client of $nodeSpec") 40 | new NodeClient(nodeSpec) 41 | } 42 | ) 43 | } 44 | 45 | override def close(): Unit = cache.asScala.values.foreach(_.close()) 46 | } 47 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/exception/CHException.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.exception 16 | 17 | import com.clickhouse.spark.spec.NodeSpec 18 | 19 | abstract class CHException( 20 | val code: Int, 21 | val reason: String, 22 | val node: Option[NodeSpec], 23 | val cause: Option[Throwable] 24 | ) extends RuntimeException(s"${node.getOrElse("")} [$code] $reason", cause.orNull) 25 | 26 | case class CHServerException( 27 | override val code: Int, 28 | override val reason: String, 29 | override val node: Option[NodeSpec], 30 | override val cause: Option[Throwable] 31 | ) extends CHException(code, reason, node, cause) 32 | 33 | case class CHClientException( 34 | override val reason: String, 35 | override val node: Option[NodeSpec] = None, 36 | override val cause: Option[Throwable] = None 37 | ) extends CHException(ClickHouseErrCode.CLIENT_ERROR.code(), reason, node, cause) 38 | 39 | case class RetryableCHException( 40 | override val code: Int, 41 | override val reason: String, 42 | override val node: Option[NodeSpec], 43 | override val cause: Option[Throwable] = None 44 | ) extends CHException(code, reason, node, cause) 45 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/expr/Expressions.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.expr 16 | 17 | trait Expr extends Serializable { 18 | def sql: String // ClickHouse dialect 19 | def sparkSql: String = sql 20 | def desc: String = s"${this.getClass.getSimpleName.stripSuffix("$")}[$sql]" 21 | override def toString: String = desc 22 | } 23 | 24 | case class StringLiteral(value: String) extends Expr { 25 | override def sql: String = s"'$value'" 26 | } 27 | 28 | case class FieldRef(name: String) extends Expr { 29 | override def sql: String = name 30 | } 31 | 32 | case class SQLExpr(sqlText: String) extends Expr { 33 | override def sql: String = sqlText 34 | } 35 | 36 | case class FuncExpr(name: String, args: List[Expr]) extends Expr { 37 | override def sql: String = s"$name(${args.map(_.sql).mkString(",")})" 38 | } 39 | 40 | // If the direction is not specified, ASC is assumed ... 41 | // By default or with the NULLS LAST modifier: first the values, then NaN, then NULL ... 42 | // https://clickhouse.com/docs/en/sql-reference/statements/select/order-by 43 | case class OrderExpr(expr: Expr, asc: Boolean = true, nullFirst: Boolean = false) extends Expr { 44 | override def sql: String = s"$expr ${if (asc) "ASC" else "DESC"} NULLS ${if (nullFirst) "FIRST" else "LAST"}" 45 | } 46 | 47 | case class TupleExpr(exprList: List[Expr]) extends Expr { 48 | override def sql: String = exprList.mkString("(", ",", ")") 49 | } 50 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/format/InputFormat.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.format 16 | 17 | trait InputFormat 18 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/format/OutputFormat.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.format 16 | 17 | import scala.collection.immutable.ListMap 18 | 19 | trait OutputFormat 20 | 21 | trait NamesAndTypes { self: OutputFormat => 22 | 23 | def namesAndTypes: ListMap[String, String] 24 | } 25 | 26 | trait SimpleOutput[T] extends OutputFormat { 27 | 28 | def records: Seq[T] 29 | 30 | def rows: Long = records.length 31 | 32 | def isEmpty: Boolean = records.isEmpty 33 | } 34 | 35 | trait StreamOutput[T] extends Iterator[T] with OutputFormat 36 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/hash/CityHash64.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.hash 16 | 17 | import com.clickhouse.spark.hash.cityhash.{CityHash_v1_0_2, UInt128} 18 | 19 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L694 20 | object CityHash64 extends HashFunc[Long] { 21 | override def applyHash(input: Array[Byte]): Long = 22 | CityHash_v1_0_2.CityHash64(input, 0, input.length) 23 | 24 | override def combineHashes(h1: Long, h2: Long): Long = 25 | CityHash_v1_0_2.Hash128to64(new UInt128(h1, h2)) 26 | } 27 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/hash/HashFunc.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.hash 16 | 17 | import java.nio.charset.StandardCharsets 18 | import scala.reflect.ClassTag 19 | 20 | abstract class HashFunc[T: ClassTag] { 21 | def applyHash(input: Array[Byte]): T 22 | def combineHashes(h1: T, h2: T): T 23 | 24 | final def executeAny(input: Any): T = 25 | input match { 26 | // Here Array[Byte] means raw byte array, not Clickhouse's Array[UInt8] or Array[Int8]. 27 | // Note that Array[UInt8] is handled differently in Clickhouse, so passing it here as Array[Byte] will cause different result. 28 | // This is left for performance issue, as sometimes raw bytes is better than constructing the real type 29 | // see https://github.com/clickhouse/spark-clickhouse-connector/pull/261#discussion_r1271828750 30 | case bytes: Array[Byte] => applyHash(bytes) 31 | case string: String => applyHash(string.getBytes(StandardCharsets.UTF_8)) 32 | case _ => throw new IllegalArgumentException(s"Unsupported input type: ${input.getClass}") 33 | } 34 | final def apply(input: Array[Any]): T = input.map(executeAny).reduce(combineHashes) 35 | } 36 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/hash/HashUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.hash 16 | 17 | object HashUtils { 18 | def intHash64Impl(x: Long): Long = 19 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L140 20 | intHash64(x ^ 0x4cf2d2baae6da887L) 21 | 22 | def intHash64(l: Long): Long = { 23 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Common/HashTable/Hash.h#L26 24 | var x = l 25 | x ^= x >>> 33 26 | x *= 0xff51afd7ed558ccdL 27 | x ^= x >>> 33 28 | x *= 0xc4ceb9fe1a85ec53L 29 | x ^= x >>> 33 30 | x 31 | } 32 | 33 | def int32Impl(x: Long): Int = 34 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L133 35 | intHash32(x, 0x75d9543de018bf45L) 36 | 37 | def intHash32(l: Long, salt: Long): Int = { 38 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Common/HashTable/Hash.h#L502 39 | var x = l 40 | 41 | x ^= salt 42 | x = (~x) + (x << 18) 43 | x = x ^ ((x >>> 31) | (x << 33)) 44 | x = x * 21 45 | x = x ^ ((x >>> 11) | (x << 53)) 46 | x = x + (x << 6) 47 | x = x ^ ((x >>> 22) | (x << 42)) 48 | x.toInt 49 | } 50 | 51 | def toUInt32(v: Int): Long = if (v < 0) v + (1L << 32) else v 52 | } 53 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/hash/Murmurhash2_32.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.hash 16 | 17 | import org.apache.commons.codec.digest.MurmurHash2 18 | 19 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L519 20 | object Murmurhash2_32 extends HashFunc[Int] { 21 | override def applyHash(input: Array[Byte]): Int = 22 | MurmurHash2.hash32(input, input.length, 0) 23 | 24 | override def combineHashes(h1: Int, h2: Int): Int = 25 | HashUtils.int32Impl(HashUtils.toUInt32(h1)) ^ h2 26 | } 27 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/hash/Murmurhash2_64.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.hash 16 | 17 | import org.apache.commons.codec.digest.MurmurHash2 18 | 19 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L460 20 | object Murmurhash2_64 extends HashFunc[Long] { 21 | override def applyHash(input: Array[Byte]): Long = 22 | MurmurHash2.hash64(input, input.length, 0) 23 | 24 | override def combineHashes(h1: Long, h2: Long): Long = 25 | HashUtils.intHash64Impl(h1) ^ h2 26 | } 27 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/hash/Murmurhash3_32.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.hash 16 | 17 | import org.apache.commons.codec.digest.MurmurHash3 18 | 19 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L519 20 | object Murmurhash3_32 extends HashFunc[Int] { 21 | override def applyHash(input: Array[Byte]): Int = 22 | MurmurHash3.hash32x86(input, 0, input.length, 0) 23 | 24 | override def combineHashes(h1: Int, h2: Int): Int = 25 | HashUtils.int32Impl(HashUtils.toUInt32(h1)) ^ h2 26 | } 27 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/hash/Murmurhash3_64.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.hash 16 | 17 | import org.apache.commons.codec.digest.MurmurHash3 18 | 19 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L543 20 | object Murmurhash3_64 extends HashFunc[Long] { 21 | override def applyHash(input: Array[Byte]): Long = { 22 | val hashes = MurmurHash3.hash128x64(input, 0, input.length, 0) 23 | hashes(0) ^ hashes(1) 24 | } 25 | 26 | override def combineHashes(h1: Long, h2: Long): Long = 27 | HashUtils.intHash64Impl(h1) ^ h2 28 | } 29 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/io/ForwardingOutputStream.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.io 16 | 17 | import java.io.OutputStream 18 | 19 | class ForwardingOutputStream(@volatile var delegate: OutputStream = null) extends OutputStream { 20 | 21 | def updateDelegate(delegate: OutputStream): Unit = this.delegate = delegate 22 | 23 | override def write(b: Int): Unit = delegate.write(b) 24 | 25 | override def write(b: Array[Byte]): Unit = delegate.write(b) 26 | 27 | override def write(b: Array[Byte], off: Int, len: Int): Unit = delegate.write(b, off, len) 28 | 29 | override def flush(): Unit = delegate.flush() 30 | 31 | override def close(): Unit = delegate.close() 32 | } 33 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/io/ForwardingWriter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.io 16 | 17 | import java.io.Writer 18 | 19 | class ForwardingWriter(@volatile var delegate: Writer = null) extends Writer { 20 | 21 | def updateDelegate(delegate: Writer): Unit = this.delegate = delegate 22 | 23 | override def write(cbuf: Array[Char], off: Int, len: Int): Unit = delegate.write(cbuf, off, len) 24 | 25 | override def flush(): Unit = delegate.flush() 26 | 27 | override def close(): Unit = delegate.close() 28 | } 29 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/spec/DatabaseSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.spec 16 | 17 | import com.clickhouse.spark.ToJson 18 | 19 | import java.util 20 | import scala.collection.JavaConverters._ 21 | 22 | case class DatabaseSpec( 23 | name: String, 24 | engine: String, 25 | data_path: String, 26 | metadata_path: String, 27 | uuid: String 28 | ) extends ToJson { 29 | 30 | def toMap: Map[String, String] = Map( 31 | "name" -> name, 32 | "engine" -> engine, 33 | "data_path" -> data_path, 34 | "metadata_path" -> metadata_path, 35 | "uuid" -> uuid 36 | ) 37 | 38 | def toJavaMap: util.Map[String, String] = toMap.asJava 39 | } 40 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/spec/PartitionSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.spec 16 | 17 | import com.clickhouse.spark.ToJson 18 | 19 | case class PartitionSpec( 20 | partition_value: String, 21 | partition_id: String, 22 | row_count: Long, 23 | size_in_bytes: Long 24 | ) extends ToJson 25 | 26 | object NoPartitionSpec extends PartitionSpec("", "", 0, 0) 27 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/spec/ShardUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.spec 16 | 17 | import java.lang.{Long => JLong} 18 | 19 | object ShardUtils { 20 | 21 | def calcShard(cluster: ClusterSpec, hashVal: Long): ShardSpec = { 22 | val shards = cluster.shards.sorted 23 | val weights = shards.map(_.weight) 24 | val lowerBounds = weights.indices.map(i => weights.slice(0, i).sum) 25 | val upperBounds = weights.indices.map(i => weights.slice(0, i + 1).sum) 26 | val ranges = (lowerBounds zip upperBounds).map { case (l, u) => l until u } 27 | val rem = JLong.remainderUnsigned(hashVal, weights.sum) 28 | (shards zip ranges).find(_._2 contains rem).map(_._1).get 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /clickhouse-core/src/main/scala/com/clickhouse/spark/spec/TableEngineUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.spec 16 | 17 | import com.clickhouse.spark.Logging 18 | import com.clickhouse.spark.exception.CHClientException 19 | import com.clickhouse.spark.parse.{ParseException, ParseUtils} 20 | 21 | object TableEngineUtils extends Logging { 22 | 23 | def resolveTableEngine(tableSpec: TableSpec): TableEngineSpec = synchronized { 24 | try ParseUtils.parser.parseEngineClause(tableSpec.engine_full) 25 | catch { 26 | case cause: ParseException => 27 | log.warn(s"Unknown table engine for table ${tableSpec.database}.${tableSpec.name}: ${tableSpec.engine_full}") 28 | UnknownTableEngineSpec(tableSpec.engine_full) 29 | } 30 | } 31 | 32 | def resolveTableCluster(distributedEngineSpec: DistributedEngineSpec, clusterSpecs: Seq[ClusterSpec]): ClusterSpec = 33 | clusterSpecs.find(_.name == distributedEngineSpec.cluster) 34 | .getOrElse(throw CHClientException(s"Unknown cluster: ${distributedEngineSpec.cluster}")) 35 | } 36 | -------------------------------------------------------------------------------- /clickhouse-core/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. See accompanying LICENSE file. 13 | # 14 | 15 | log4j.rootLogger=INFO, file 16 | 17 | log4j.appender.console=org.apache.log4j.ConsoleAppender 18 | log4j.appender.console.target=System.out 19 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss.SSS} %p %c: %m%n 21 | 22 | log4j.appender.file=org.apache.log4j.FileAppender 23 | log4j.appender.file.append=true 24 | log4j.appender.file.file=build/unit-tests.log 25 | log4j.appender.file.layout=org.apache.log4j.PatternLayout 26 | log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n 27 | 28 | log4j.logger.org.apache.hadoop.util.Shell=ERROR 29 | log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR 30 | log4j.logger.com.clickhouse.spark=DEBUG 31 | -------------------------------------------------------------------------------- /clickhouse-core/src/test/scala/com/clickhouse/spark/spec/NodeSpecHelper.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.spec 16 | 17 | trait NodeSpecHelper { 18 | 19 | val node_s1r1: NodeSpec = NodeSpec("s1r1") 20 | val node_s1r2: NodeSpec = NodeSpec("s1r2") 21 | val node_s2r1: NodeSpec = NodeSpec("s2r1") 22 | val node_s2r2: NodeSpec = NodeSpec("s2r2") 23 | 24 | val replica_s1r1: ReplicaSpec = ReplicaSpec(1, node_s1r1) 25 | val replica_s1r2: ReplicaSpec = ReplicaSpec(2, node_s1r2) 26 | val replica_s2r1: ReplicaSpec = ReplicaSpec(1, node_s2r1) 27 | val replica_s2r2: ReplicaSpec = ReplicaSpec(2, node_s2r2) 28 | 29 | val shard_s1: ShardSpec = ShardSpec( 30 | num = 1, 31 | weight = 1, 32 | replicas = Array(replica_s1r1, replica_s1r2) // sorted 33 | ) 34 | 35 | val shard_s2: ShardSpec = ShardSpec( 36 | num = 2, 37 | weight = 2, 38 | replicas = Array(replica_s2r2, replica_s2r1) // unsorted 39 | ) 40 | 41 | val cluster: ClusterSpec = 42 | ClusterSpec( 43 | name = "cluster-s2r2", 44 | shards = Array(shard_s2, shard_s1) // unsorted 45 | ) 46 | } 47 | -------------------------------------------------------------------------------- /clickhouse-core/src/test/scala/com/clickhouse/spark/spec/NodeSpecSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.spec 16 | 17 | import org.scalatest.funsuite.AnyFunSuite 18 | 19 | class NodeSpecSuite extends AnyFunSuite with NodeSpecHelper { 20 | 21 | test("nodes should be sorted") { 22 | assert(shard_s1.nodes.map(_.host) === Array("s1r1", "s1r2")) 23 | assert(shard_s2.nodes.map(_.host) === Array("s2r1", "s2r2")) 24 | 25 | assert(cluster.nodes.map(_.host) === Array("s1r1", "s1r2", "s2r1", "s2r2")) 26 | assert(cluster.nodes.map(_.host) === Array("s1r1", "s1r2", "s2r1", "s2r2")) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /clickhouse-core/src/test/scala/com/clickhouse/spark/spec/ShardUtilsSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.spec 16 | 17 | import org.scalatest.funsuite.AnyFunSuite 18 | 19 | class ShardUtilsSuite extends AnyFunSuite with NodeSpecHelper { 20 | 21 | test("test calculate shard") { 22 | assert(ShardUtils.calcShard(cluster, 0).num === 1) 23 | assert(ShardUtils.calcShard(cluster, 1).num === 2) 24 | assert(ShardUtils.calcShard(cluster, 2).num === 2) 25 | assert(ShardUtils.calcShard(cluster, 3).num === 1) 26 | assert(ShardUtils.calcShard(cluster, 4).num === 2) 27 | assert(ShardUtils.calcShard(cluster, 5).num === 2) 28 | assert(ShardUtils.calcShard(cluster, 6).num === 1) 29 | assert(ShardUtils.calcShard(cluster, 7).num === 2) 30 | assert(ShardUtils.calcShard(cluster, 8).num === 2) 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /clickhouse-core/src/testFixtures/conf/clickhouse-cluster/.env: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # 14 | 15 | CLICKHOUSE_IMAGE=clickhouse/clickhouse-server:23.8 16 | ZOOKEEPER_VERSION=3.6.3 17 | -------------------------------------------------------------------------------- /clickhouse-core/src/testFixtures/conf/clickhouse-cluster/s1r1/interserver_http_host.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | clickhouse-s1r1 17 | -------------------------------------------------------------------------------- /clickhouse-core/src/testFixtures/conf/clickhouse-cluster/s1r1/macros.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 1 18 | 1 19 | 20 | -------------------------------------------------------------------------------- /clickhouse-core/src/testFixtures/conf/clickhouse-cluster/s1r2/interserver_http_host.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | clickhouse-s1r2 17 | -------------------------------------------------------------------------------- /clickhouse-core/src/testFixtures/conf/clickhouse-cluster/s1r2/macros.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 1 18 | 2 19 | 20 | -------------------------------------------------------------------------------- /clickhouse-core/src/testFixtures/conf/clickhouse-cluster/s2r1/interserver_http_host.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | clickhouse-s2r1 17 | -------------------------------------------------------------------------------- /clickhouse-core/src/testFixtures/conf/clickhouse-cluster/s2r1/macros.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 2 18 | 1 19 | 20 | -------------------------------------------------------------------------------- /clickhouse-core/src/testFixtures/conf/clickhouse-cluster/s2r2/interserver_http_host.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | clickhouse-s2r2 17 | -------------------------------------------------------------------------------- /clickhouse-core/src/testFixtures/conf/clickhouse-cluster/s2r2/macros.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 2 18 | 2 19 | 20 | -------------------------------------------------------------------------------- /clickhouse-core/src/testFixtures/conf/clickhouse-cluster/zookeeper.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 18 | zookeeper 19 | 2181 20 | 21 | 22 | -------------------------------------------------------------------------------- /clickhouse-core/src/testFixtures/java/org/scalatest/tags/Cloud.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.scalatest.tags; 16 | 17 | import java.lang.annotation.*; 18 | import org.scalatest.TagAnnotation; 19 | 20 | @TagAnnotation 21 | @Retention(RetentionPolicy.RUNTIME) 22 | @Target({ElementType.METHOD, ElementType.TYPE}) 23 | @Inherited 24 | public @interface Cloud {} 25 | -------------------------------------------------------------------------------- /clickhouse-core/src/testFixtures/scala/com/clickhouse/spark/base/ClickHouseCloudMixIn.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.base 16 | 17 | import com.clickhouse.data.ClickHouseVersion 18 | import com.clickhouse.spark.Utils 19 | 20 | trait ClickHouseCloudMixIn extends ClickHouseProvider { 21 | 22 | override def clickhouseHost: String = Utils.load("CLICKHOUSE_CLOUD_HOST") 23 | 24 | override def clickhouseHttpPort: Int = Utils.load("CLICKHOUSE_CLOUD_HTTP_PORT", "8443").toInt 25 | 26 | override def clickhouseTcpPort: Int = Utils.load("CLICKHOUSE_CLOUD_TCP_PORT", "9000").toInt 27 | 28 | override def clickhouseUser: String = Utils.load("CLICKHOUSE_CLOUD_USER", "default") 29 | 30 | override def clickhousePassword: String = Utils.load("CLICKHOUSE_CLOUD_PASSWORD") 31 | 32 | override def clickhouseDatabase: String = "default" 33 | 34 | override def clickhouseVersion: ClickHouseVersion = ClickHouseVersion.of("latest") 35 | 36 | override def isSslEnabled: Boolean = true 37 | override def isCloud: Boolean = true 38 | } 39 | -------------------------------------------------------------------------------- /clickhouse-core/src/testFixtures/scala/com/clickhouse/spark/base/ClickHouseProvider.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.base 16 | 17 | import com.clickhouse.client.ClickHouseProtocol 18 | import com.clickhouse.client.ClickHouseProtocol.HTTP 19 | import com.clickhouse.data.ClickHouseVersion 20 | import com.clickhouse.spark.Utils 21 | import com.clickhouse.spark.client.NodeClient 22 | import com.clickhouse.spark.spec.NodeSpec 23 | import scala.collection.JavaConverters._ 24 | 25 | trait ClickHouseProvider { 26 | def clickhouseHost: String 27 | def clickhouseHttpPort: Int 28 | def clickhouseTcpPort: Int 29 | def clickhouseUser: String 30 | def clickhousePassword: String 31 | def clickhouseDatabase: String 32 | def clickhouseVersion: ClickHouseVersion 33 | def isSslEnabled: Boolean 34 | def isCloud: Boolean = false 35 | 36 | def withNodeClient(protocol: ClickHouseProtocol = HTTP)(block: NodeClient => Unit): Unit = 37 | Utils.tryWithResource { 38 | NodeClient(NodeSpec( 39 | clickhouseHost, 40 | Some(clickhouseHttpPort), 41 | Some(clickhouseTcpPort), 42 | protocol, 43 | username = clickhouseUser, 44 | database = clickhouseDatabase, 45 | password = clickhousePassword, 46 | options = Map("ssl" -> isSslEnabled.toString).asJava 47 | )) 48 | } { 49 | client => block(client) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /dev/backport: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | 16 | function usage { 17 | set +x 18 | echo "./dev/backport - Tool for back port patch" 19 | echo "" 20 | echo "Usage:" 21 | echo "+----------------------------------------------------+" 22 | echo "| ./dev/backport |" 23 | echo "+----------------------------------------------------+" 24 | echo "commit-id: - git commit hash id" 25 | echo "spark-from: - options: 3.4, 3.5" 26 | echo "spark-to: - options: 3.3, 3.4" 27 | echo "" 28 | } 29 | 30 | function exit_with_usage { 31 | usage 32 | exit 1 33 | } 34 | 35 | if [[ $# -eq 3 ]]; then 36 | COMMIT_ID=$1 37 | SPARK_FROM=$2 38 | SPARK_TO=$3 39 | else 40 | exit_with_usage 41 | fi 42 | 43 | echo "Backport $COMMIT_ID from spark-$SPARK_FROM to spark-$SPARK_TO - `git log $COMMIT_ID --pretty="%s" -1`" 44 | 45 | PROJECT_DIR="$(cd "`dirname "$0"`/.."; pwd)" 46 | mkdir -p "${PROJECT_DIR}/tmp" 47 | git format-patch $COMMIT_ID -1 --stdout -- "spark-$SPARK_FROM/" > "${PROJECT_DIR}/tmp/$COMMIT_ID-$SPARK_FROM.patch" 48 | cp "${PROJECT_DIR}/tmp/$COMMIT_ID-$SPARK_FROM.patch" "${PROJECT_DIR}/tmp/$COMMIT_ID-$SPARK_TO.patch" 49 | SED_I="sed -i" 50 | if [[ $(uname) == 'Darwin' ]]; then 51 | SED_I="sed -i .bak" 52 | fi 53 | $SED_I "s/spark-$SPARK_FROM/spark-$SPARK_TO/g" "${PROJECT_DIR}/tmp/$COMMIT_ID-$SPARK_TO.patch" 54 | git apply "${PROJECT_DIR}/tmp/$COMMIT_ID-$SPARK_TO.patch" 55 | -------------------------------------------------------------------------------- /dev/reformat: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | 16 | set -x 17 | 18 | PROJECT_DIR="$(cd "`dirname "$0"`/.."; pwd)" 19 | ${PROJECT_DIR}/gradlew spotlessApply -Dspark_binary_version=3.3 20 | ${PROJECT_DIR}/gradlew spotlessApply -Dspark_binary_version=3.4 21 | ${PROJECT_DIR}/gradlew spotlessApply -Dspark_binary_version=3.5 22 | -------------------------------------------------------------------------------- /docker/.env: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # 14 | 15 | # latest stable version 16 | PROJECT_VERSION=0.8.0 17 | 18 | CLICKHOUSE_IMAGE=clickhouse/clickhouse-server:23.8 19 | AWS_JAVA_SDK_VERSION=1.12.367 20 | CLICKHOUSE_JDBC_VERSION=0.6.3 21 | HADOOP_VERSION=3.3.6 22 | HIVE_VERSION=2.3.9 23 | ICEBERG_VERSION=1.6.0 24 | KYUUBI_VERSION=1.9.2 25 | KYUUBI_HADOOP_VERSION=3.3.6 26 | POSTGRES_VERSION=12 27 | POSTGRES_JDBC_VERSION=42.3.4 28 | SCALA_BINARY_VERSION=2.12 29 | SPARK_VERSION=3.5.2 30 | SPARK_BINARY_VERSION=3.5 31 | SPARK_HADOOP_VERSION=3.3.4 32 | ZOOKEEPER_VERSION=3.6.3 33 | -------------------------------------------------------------------------------- /docker/.env-dev: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # 14 | 15 | # current developing version 16 | PROJECT_VERSION=0.9.0-SNAPSHOT 17 | 18 | CLICKHOUSE_IMAGE=clickhouse/clickhouse-server:23.8 19 | AWS_JAVA_SDK_VERSION=1.12.367 20 | CLICKHOUSE_JDBC_VERSION=0.6.3 21 | HADOOP_VERSION=3.3.6 22 | HIVE_VERSION=2.3.9 23 | ICEBERG_VERSION=1.6.0 24 | KYUUBI_VERSION=1.9.2 25 | KYUUBI_HADOOP_VERSION=3.3.6 26 | POSTGRES_VERSION=12 27 | POSTGRES_JDBC_VERSION=42.3.4 28 | SCALA_BINARY_VERSION=2.12 29 | SPARK_VERSION=3.5.2 30 | SPARK_BINARY_VERSION=3.5 31 | SPARK_HADOOP_VERSION=3.3.4 32 | ZOOKEEPER_VERSION=3.6.3 33 | -------------------------------------------------------------------------------- /docker/conf/cloudbeaver-conf/README.md: -------------------------------------------------------------------------------- 1 | ## Generate Admin Password MD5 2 | ``` 3 | CB_ADMIN_PASSWORD_MD5=`echo -n "$CB_ADMIN_PASSWORD" | md5sum | tr 'a-z' 'A-Z'` 4 | CB_ADMIN_PASSWORD_MD5=${PASSWORD_MD5:0:32} 5 | ``` 6 | 7 | ## Authenticate as Admin 8 | ``` 9 | curl 'http://0.0.0.0:8978/api/gql' \ 10 | -X POST \ 11 | -H 'content-type: application/json' \ 12 | --cookie-jar /tmp/cookie.txt \ 13 | --data '{ 14 | "query": " 15 | query authLogin($provider: ID!, $credentials: Object!, $linkUser: Boolean) { 16 | authToken: authLogin( 17 | provider: $provider 18 | credentials: $credentials 19 | linkUser: $linkUser 20 | ) { 21 | authProvider 22 | } 23 | } 24 | ", 25 | "variables": { 26 | "provider": "local", 27 | "credentials": { 28 | "user": "kyuubi", 29 | "password": "4E212BBF8F138808DB96B969716D1580" 30 | }, 31 | "linkUser": true 32 | } 33 | }' 34 | ``` 35 | 36 | ## Expose Connection to Anonymous 37 | ``` 38 | curl 'http://0.0.0.0:8978/api/gql' \ 39 | -X POST \ 40 | -H 'content-type: application/json' \ 41 | --cookie /tmp/cookie.txt \ 42 | --data '{ 43 | "query": " 44 | query setConnectionAccess($connectionId: ID!, $subjects: [ID!]!) { 45 | setConnectionSubjectAccess(connectionId: $connectionId, subjects: $subjects) 46 | } 47 | ", 48 | "variables": { 49 | "connectionId": "kyuubi_hive-180f13452e0-749c09a3cdb63869", 50 | "subjects": ["user"] 51 | } 52 | }' 53 | ``` 54 | -------------------------------------------------------------------------------- /docker/conf/cloudbeaver-conf/initial-data-sources.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # 14 | 15 | { 16 | "connections": { 17 | "kyuubi": { 18 | "provider": "generic", 19 | "driver": "kyuubi_hive", 20 | "name": "Kyuubi on Spark", 21 | "save-password": false, 22 | "show-system-objects": true, 23 | "read-only": false, 24 | "template": false, 25 | "configuration": { 26 | "host": "kyuubi", 27 | "port": "10009", 28 | "url": "jdbc:kyuubi://kyuubi:10009/", 29 | "auth-model": "native" 30 | } 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /docker/conf/cloudbeaver-conf/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 18 | 19 | %d{dd-MM-yyyy HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /docker/conf/cloudbeaver-conf/product.conf: -------------------------------------------------------------------------------- 1 | // Licensed under the Apache License, Version 2.0 (the "License"); 2 | // you may not use this file except in compliance with the License. 3 | // You may obtain a copy of the License at 4 | // 5 | // https://www.apache.org/licenses/LICENSE-2.0 6 | // 7 | // Unless required by applicable law or agreed to in writing, software 8 | // distributed under the License is distributed on an "AS IS" BASIS, 9 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | // See the License for the specific language governing permissions and 11 | // limitations under the License. 12 | 13 | // Product configuration. Customized web application behavior 14 | // It is in JSONC format 15 | { 16 | // Global properties 17 | core: { 18 | // User defaults 19 | user: { 20 | defaultTheme: "light", 21 | defaultLanguage: "en" 22 | }, 23 | app: { 24 | // Log viewer config 25 | logViewer: { 26 | refreshTimeout: 3000, 27 | logBatchSize: 1000, 28 | maxLogRecords: 2000, 29 | maxFailedRequests: 3 30 | } 31 | } 32 | }, 33 | // Notifications config 34 | core_events: { 35 | notificationsPool: 20 36 | }, 37 | plugin_data_spreadsheet_new: { 38 | hidden: false 39 | }, 40 | plugin_data_export: { 41 | disabled: false 42 | }, 43 | sql.proposals.insert.table.alias: true 44 | } 45 | -------------------------------------------------------------------------------- /docker/conf/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 18 | fs.s3a.access.key 19 | minio 20 | 21 | 22 | fs.s3a.secret.key 23 | minio_minio 24 | 25 | 26 | fs.s3a.connection.ssl.enabled 27 | false 28 | 29 | 30 | fs.s3a.path.style.access 31 | true 32 | 33 | 34 | fs.s3a.endpoint 35 | http://minio:9000 36 | 37 | 38 | fs.s3a.committer.name 39 | magic 40 | 41 | 42 | fs.defaultFS 43 | s3a://spark-bucket/ 44 | 45 | 46 | -------------------------------------------------------------------------------- /docker/conf/hive-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 18 | javax.jdo.option.ConnectionURL 19 | jdbc:postgresql://postgres:5432/metastore 20 | 21 | 22 | javax.jdo.option.ConnectionDriverName 23 | org.postgresql.Driver 24 | 25 | 26 | javax.jdo.option.ConnectionUserName 27 | postgres 28 | 29 | 30 | javax.jdo.option.ConnectionPassword 31 | postgres 32 | 33 | 34 | hive.metastore.warehouse.dir 35 | s3a://spark-bucket/warehouse 36 | location of default database for the warehouse 37 | 38 | 39 | hive.metastore.uris 40 | thrift://metastore:9083 41 | 42 | 43 | -------------------------------------------------------------------------------- /docker/conf/kyuubi-defaults.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | ## Kyuubi Configurations 19 | 20 | kyuubi.authentication=NONE 21 | kyuubi.frontend.bind.host=0.0.0.0 22 | kyuubi.frontend.bind.port=10009 23 | kyuubi.ha.zookeeper.quorum=zookeeper:2181 24 | kyuubi.operation.progress.enabled=true 25 | 26 | kyuubi.engine.session.initialize.sql \ 27 | show namespaces in tpcds; \ 28 | show namespaces in postgres; \ 29 | show namespaces in clickhouse_s1r1; \ 30 | show namespaces in clickhouse_s1r2; \ 31 | show namespaces in clickhouse_s2r1; \ 32 | show namespaces in clickhouse_s2r2 33 | -------------------------------------------------------------------------------- /docker/image/scc-base.Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # 14 | 15 | FROM eclipse-temurin:8-focal 16 | LABEL org.opencontainers.image.authors="Cheng Pan" 17 | 18 | RUN set -x && \ 19 | ln -snf /usr/bin/bash /usr/bin/sh && \ 20 | apt-get update -q && \ 21 | apt-get install -yq retry busybox && \ 22 | rm -rf /var/lib/apt/lists/* && \ 23 | mkdir /opt/busybox && \ 24 | busybox --install /opt/busybox 25 | 26 | ENV PATH=${PATH}:/opt/busybox 27 | -------------------------------------------------------------------------------- /docker/image/scc-hadoop.Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # 14 | 15 | ARG PROJECT_VERSION 16 | 17 | FROM pan3793/scc-base:${PROJECT_VERSION} 18 | 19 | ARG AWS_JAVA_SDK_VERSION 20 | ARG HADOOP_VERSION 21 | 22 | ARG APACHE_MIRROR 23 | ARG MAVEN_MIRROR 24 | 25 | ENV HADOOP_HOME=/opt/hadoop 26 | ENV HADOOP_CONF_DIR=/etc/hadoop/conf 27 | 28 | RUN set -x && \ 29 | if [ $(uname -m) = "aarch64" ]; then HADOOP_TAR_NAME=hadoop-${HADOOP_VERSION}-aarch64; else HADOOP_TAR_NAME=hadoop-${HADOOP_VERSION}; fi && \ 30 | wget -q ${APACHE_MIRROR}/hadoop/core/hadoop-${HADOOP_VERSION}/${HADOOP_TAR_NAME}.tar.gz && \ 31 | tar -xzf ${HADOOP_TAR_NAME}.tar.gz -C /opt && \ 32 | ln -s /opt/hadoop-${HADOOP_VERSION} ${HADOOP_HOME} && \ 33 | rm ${HADOOP_TAR_NAME}.tar.gz && \ 34 | HADOOP_CLOUD_STORAGE_JAR_NAME=hadoop-cloud-storage && \ 35 | wget -q ${MAVEN_MIRROR}/org/apache/hadoop/${HADOOP_CLOUD_STORAGE_JAR_NAME}/${HADOOP_VERSION}/${HADOOP_CLOUD_STORAGE_JAR_NAME}-${HADOOP_VERSION}.jar -P ${HADOOP_HOME}/share/hadoop/hdfs/lib && \ 36 | HADOOP_AWS_JAR_NAME=hadoop-aws && \ 37 | wget -q ${MAVEN_MIRROR}/org/apache/hadoop/${HADOOP_AWS_JAR_NAME}/${HADOOP_VERSION}/${HADOOP_AWS_JAR_NAME}-${HADOOP_VERSION}.jar -P ${HADOOP_HOME}/share/hadoop/hdfs/lib && \ 38 | AWS_JAVA_SDK_BUNDLE_JAR_NAME=aws-java-sdk-bundle && \ 39 | wget -q ${MAVEN_MIRROR}/com/amazonaws/${AWS_JAVA_SDK_BUNDLE_JAR_NAME}/${AWS_JAVA_SDK_VERSION}/${AWS_JAVA_SDK_BUNDLE_JAR_NAME}-${AWS_JAVA_SDK_VERSION}.jar -P ${HADOOP_HOME}/share/hadoop/hdfs/lib 40 | -------------------------------------------------------------------------------- /docker/image/scc-kyuubi.Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # 14 | 15 | ARG PROJECT_VERSION 16 | 17 | FROM pan3793/scc-spark:${PROJECT_VERSION} 18 | 19 | ARG AWS_JAVA_SDK_VERSION 20 | ARG KYUUBI_HADOOP_VERSION 21 | ARG KYUUBI_VERSION 22 | 23 | ARG APACHE_MIRROR 24 | ARG MAVEN_MIRROR 25 | 26 | ENV KYUUBI_HOME=/opt/kyuubi 27 | ENV KYUUBI_CONF_DIR=/etc/kyuubi/conf 28 | 29 | RUN set -x && \ 30 | wget -q ${APACHE_MIRROR}/kyuubi/kyuubi-${KYUUBI_VERSION}/apache-kyuubi-${KYUUBI_VERSION}-bin.tgz && \ 31 | tar -xzf apache-kyuubi-${KYUUBI_VERSION}-bin.tgz -C /opt && \ 32 | ln -s /opt/apache-kyuubi-${KYUUBI_VERSION}-bin ${KYUUBI_HOME} && \ 33 | rm apache-kyuubi-${KYUUBI_VERSION}-bin.tgz && \ 34 | HADOOP_CLOUD_STORAGE_JAR_NAME=hadoop-cloud-storage && \ 35 | wget -q ${MAVEN_MIRROR}/org/apache/hadoop/${HADOOP_CLOUD_STORAGE_JAR_NAME}/${KYUUBI_HADOOP_VERSION}/${HADOOP_CLOUD_STORAGE_JAR_NAME}-${KYUUBI_HADOOP_VERSION}.jar -P ${KYUUBI_HOME}/jars && \ 36 | HADOOP_AWS_JAR_NAME=hadoop-aws && \ 37 | wget -q ${MAVEN_MIRROR}/org/apache/hadoop/${HADOOP_AWS_JAR_NAME}/${KYUUBI_HADOOP_VERSION}/${HADOOP_AWS_JAR_NAME}-${KYUUBI_HADOOP_VERSION}.jar -P ${KYUUBI_HOME}/jars && \ 38 | AWS_JAVA_SDK_BUNDLE_JAR_NAME=aws-java-sdk-bundle && \ 39 | wget -q ${MAVEN_MIRROR}/com/amazonaws/${AWS_JAVA_SDK_BUNDLE_JAR_NAME}/${AWS_JAVA_SDK_VERSION}/${AWS_JAVA_SDK_BUNDLE_JAR_NAME}-${AWS_JAVA_SDK_VERSION}.jar -P ${KYUUBI_HOME}/jars && \ 40 | useradd anonymous 41 | 42 | ENTRYPOINT ["/opt/kyuubi/bin/kyuubi", "run"] 43 | -------------------------------------------------------------------------------- /docker/image/scc-metastore.Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # 14 | 15 | ARG PROJECT_VERSION 16 | 17 | FROM pan3793/scc-hadoop:${PROJECT_VERSION} 18 | 19 | ARG HIVE_VERSION 20 | 21 | ARG APACHE_MIRROR 22 | 23 | ENV HIVE_HOME=/opt/hive 24 | ENV HIVE_CONF_DIR=/etc/hive/conf 25 | 26 | RUN set -x && \ 27 | wget -q https://archive.apache.org/dist/hive/hive-${HIVE_VERSION}/apache-hive-${HIVE_VERSION}-bin.tar.gz && \ 28 | tar -xzf apache-hive-${HIVE_VERSION}-bin.tar.gz -C /opt && \ 29 | ln -s /opt/apache-hive-${HIVE_VERSION}-bin ${HIVE_HOME} && \ 30 | rm apache-hive-${HIVE_VERSION}-bin.tar.gz 31 | 32 | ENTRYPOINT ["/opt/hive/bin/hive", "--service", "metastore"] 33 | -------------------------------------------------------------------------------- /docs/best_practices/01_deployment.md: -------------------------------------------------------------------------------- 1 | --- 2 | license: | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | https://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | --- 15 | 16 | Deployment 17 | === 18 | 19 | ## Jar 20 | 21 | Put `clickhouse-spark-runtime-{{ spark_binary_version }}_{{ scala_binary_version }}-{{ stable_version }}.jar` and 22 | `clickhouse-jdbc-{{ clickhouse_jdbc_version }}-all.jar` into `$SPARK_HOME/jars/`, then you don't need to bundle the jar 23 | into your Spark application, and `--jar` is not required when using `spark-shell` or `spark-sql`(again, for SQL-only 24 | use cases, [Apache Kyuubi](https://github.com/apache/kyuubi) is recommended for Production). 25 | 26 | ## Configuration 27 | 28 | Persist catalog configurations into `$SPARK_HOME/conf/spark-defaults.conf`, then `--conf`s are not required when using 29 | `spark-shell` or `spark-sql`. 30 | 31 | ``` 32 | spark.sql.catalog.ck_01=com.clickhouse.spark.ClickHouseCatalog 33 | spark.sql.catalog.ck_01.host=10.0.0.1 34 | spark.sql.catalog.ck_01.protocol=http 35 | spark.sql.catalog.ck_01.http_port=8123 36 | spark.sql.catalog.ck_01.user=app 37 | spark.sql.catalog.ck_01.password=pwd 38 | spark.sql.catalog.ck_01.database=default 39 | 40 | spark.sql.catalog.ck_02=com.clickhouse.spark.ClickHouseCatalog 41 | spark.sql.catalog.ck_02.host=10.0.0.2 42 | spark.sql.catalog.ck_02.protocol=http 43 | spark.sql.catalog.ck_02.http_port=8123 44 | spark.sql.catalog.ck_02.user=app 45 | spark.sql.catalog.ck_02.password=pwd 46 | spark.sql.catalog.ck_02.database=default 47 | ``` 48 | -------------------------------------------------------------------------------- /docs/best_practices/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | license: | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | https://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | --- 15 | 16 | TODO 17 | === 18 | -------------------------------------------------------------------------------- /docs/configurations/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | hide: 3 | - navigation 4 | license: | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | https://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | --- 17 | 18 | Configurations 19 | === 20 | 21 | ## Catalog Configurations 22 | 23 | {! 24 | include-markdown "./01_catalog_configurations.md" 25 | start="" 26 | end="" 27 | !} 28 | 29 | ## SQL Configurations 30 | 31 | SQL Configurations could be overwritten by `SET =` in runtime. 32 | 33 | {! 34 | include-markdown "./02_sql_configurations.md" 35 | start="" 36 | end="" 37 | !} 38 | -------------------------------------------------------------------------------- /docs/developers/01_build_and_test.md: -------------------------------------------------------------------------------- 1 | --- 2 | license: | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | https://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | --- 15 | 16 | Build and Test 17 | === 18 | 19 | ## Build 20 | 21 | Check out source code from GitHub 22 | 23 | ``` 24 | git checkout https://github.com/ClickHouse/spark-clickhouse-connector.git 25 | ``` 26 | 27 | Build w/o test 28 | 29 | ```shell 30 | ./gradlew clean build -x test 31 | ``` 32 | 33 | Go to `spark-{{ spark_binary_version }}/clickhouse-spark-runtime/build/libs/` to find the output jar 34 | `clickhouse-spark-runtime-{{ spark_binary_version }}_{{ scala_binary_version }}-{{ version }}.jar`. 35 | 36 | ## Test 37 | 38 | The project leverage [Testcontainers](https://www.testcontainers.org/) and [Docker Compose](https://docs.docker.com/compose/) 39 | to do integration tests, you should install [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/) 40 | before running test, and check more details on [Testcontainers document](https://www.testcontainers.org/) if you'd 41 | like to run test with remote Docker daemon. 42 | 43 | Run all test 44 | 45 | `./gradlew clean test` 46 | 47 | Run single test 48 | 49 | `./gradlew test --tests=ConvertDistToLocalWriteSuite` 50 | 51 | Test against custom ClickHouse image 52 | 53 | `CLICKHOUSE_IMAGE=custom-org/clickhouse-server:custom-tag ./gradlew test` 54 | -------------------------------------------------------------------------------- /docs/developers/02_docs_and_website.md: -------------------------------------------------------------------------------- 1 | --- 2 | license: | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | https://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | --- 15 | 16 | Docs and Website 17 | === 18 | 19 | ## Setup Python 20 | 21 | Follow the [Python official document](https://wiki.python.org/moin/BeginnersGuide) to install. 22 | 23 | ## Setup `pyenv` on macOS (optional) 24 | 25 | Optionally, recommend to manage Python environments by [pyenv](https://github.com/pyenv/pyenv). 26 | 27 | Install from Homebrew 28 | 29 | ```bash 30 | brew install pyenv pyenv-virtualenv 31 | ``` 32 | 33 | Setup in `~/.zshrc` 34 | 35 | ```bash 36 | eval "$(pyenv init -)" 37 | eval "$(pyenv virtualenv-init -)" 38 | ``` 39 | 40 | Install `virtualenv` 41 | 42 | ```bash 43 | pyenv install 3.9.13 44 | pyenv virtualenv 3.9.13 scc 45 | ``` 46 | 47 | Localize `virtualenv` 48 | 49 | ```bash 50 | pyenv local scc 51 | ``` 52 | 53 | ## Install dependencies 54 | 55 | ```bash 56 | pip install -r requirements.txt 57 | ``` 58 | 59 | ## Preview website 60 | 61 | ``` 62 | mkdocs serve 63 | ``` 64 | 65 | Open [http://127.0.0.1:8000/](http://127.0.0.1:8000/) in browser. 66 | -------------------------------------------------------------------------------- /docs/developers/03_private_release.md: -------------------------------------------------------------------------------- 1 | --- 2 | license: | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | https://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | --- 15 | 16 | Private Release 17 | === 18 | 19 | !!! tip 20 | 21 | Internal Release means deploying to private Nexus Repository. Please make sure you are granted to access your 22 | company private Nexus Repository. 23 | 24 | ### Repository and Authentication 25 | 26 | Configure Gradle in `~/.gradle/gradle.properties`. 27 | 28 | ``` 29 | mavenUser=xxx 30 | mavenPassword=xxx 31 | mavenReleasesRepo=xxx 32 | mavenSnapshotsRepo=xxx 33 | ``` 34 | 35 | ### Upgrade Version 36 | 37 | Modify version in `version.txt` and `docker/.env-dev` 38 | 39 | ### Build and Deploy 40 | 41 | Publish to Maven Repository using `./gradlew publish` 42 | -------------------------------------------------------------------------------- /docs/developers/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | license: | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | https://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | --- 15 | 16 | TODO 17 | === 18 | -------------------------------------------------------------------------------- /docs/imgs/scc_overview.drawio: -------------------------------------------------------------------------------- 1 | 7Vtbd5s4EP41fqwPIG5+9K3JyWm72SZ7urtvBFRDghEVcmzvr98RCGMQju34Jp8mDzEaDbL45hvNSIM7aDhd3FAvDb+SAMcdQwsWHTTqGIauazZ8cMmykLiOEExoFAilSvAQ/YeFUBPSWRTgrKbICIlZlNaFPkkS7LOazKOUzOtqP0lc/9bUm2BJ8OB7sSz9EQUsFE9haZX8FkeTkK0eWPRMvVJZCLLQC8h8TYTGHTSkhLDiaroY4piDV+JS3Pd5Q+9qYhQnbJcbRv2bOPk+H6dJP3Lm5OvL091fn/gNfJhXL56JJxazZcsSAkpmSYD5KFoHDeZhxPBD6vm8dw5GB1nIpjG0dLj8GcXxkMSEQjshCSgN5ImKub9iyvBiTSQmfoPJFDO6BJWyt0RVsMhyRXte2WQlC9ft4QihJ3gwWY1dQQUXAq19kLOuAzm7jpzRQxJyptOCHEB3IuT0fYDTtwN3DJSMOkq2JvNrpbOOUu9U9ELqgWTYqoFkqgeSqakG0l7r1HlAskzVQNJ1CRQcQCIgmoSykExI4sXjSjqoL++VzhdCUgHWM2ZsKbIab8ZIHUpAkC7/5vd3rbL5jxgub4wWtdZStDaaICMz6uM3nlMkX8yjE8y2r9EcgzcNSnHssei1niy1WSe/tU+pt1xTSEmUsGxt5HsuWFtxrEbYtxopTkNfd9/Uh4tiBhVPVo9yAHVkf/qtqYOukjrIuAR10Ad1WuL5lVHHtC9BHfMS1Dk9BayrpIDd3J+egwK2lNiNF9gHm1E5mTn7hqpXB8REO2Z49skyPOv3WGudqwrTqHcFYdrZ6GiX30ohpJyj2dfkaBk4Cuvzg+LO9mO3CyUw2zOO5ilnMVFxV2XPfb1zr8xGuttqUNPSG7Ms1jJplsdyW3ej28pp99mPiRzl3LZ3TW77/vio7xogHSUCZNOJyvPFjQGymXhpZwiQJVHfOmNUeneyMyncY5PisBrURU4tzwCzfZW+19w1nsX3dDk7HVGAhCoX45B+6RhntFXV7Jjx6ivJLVnBZf+akbLjU5b7Qh8UDDNd5PCU/XA14Z/DOPJfbsksw6A1jGcZ4yYoBofJFuMXqpJlAFBWh59i+ErvKVfgbsq9sJhE3u3F0SSBax+sA1+DBtwqke/FfdExjYIg93bBcfgaa9CxRu115oxR8oIbwvo6cQQ+NIusTgsfjDY+GCfjQ1sBcT8+6O4GPvxLAFKcchLsS4e9HJWPJJZqeJxTvUZgNpY2R7Nk2+kttkMns518/HU0Xwa06Qso9NMUvBoiDEk+XLnmyo1CG2rZvhjaWV25JQxGAEb0NGPw5Ib2mJvg0jHRMu2u69Sws7WeHBY1u1u+YHQeb5I3yyrCZwIurqsgfL2rgM+w1YQPyftHFeHTDQ6fVv3pCkIpv133hUA0qUAs4tgTLUNYFno00PPNVB7tFCgebcHZdLpy9rERaetUSBuHIi0fkqiwICiCrrxV2wVdQyUet0UrRdCVNz57ont57ralUoqg2/bK5ZG2lbePj/cwqcn3e5gbBBHbm3Joi/98y5KXsozPdw9/fIMPzPzux9al7pSoxhnLaNm6tL4Qf4SQ/ec0fB3cPeuj5IfxHN0t/Wzg7PI+/FkqMUFrGbRxBvu+g1+33SQHlkGl89nm68FHKoNKhcyDyqDSLN9dBoVm9fOhQr36ERYa/w8= -------------------------------------------------------------------------------- /docs/imgs/scc_overview.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/docs/imgs/scc_overview.drawio.png -------------------------------------------------------------------------------- /docs/imgs/scc_read_bucket_join.drawio: -------------------------------------------------------------------------------- 1 | 7Vpdc5s4FP01zGwfnAGJD/sxdtK02XS3E+9Od/uyo4AKWsuIChHb/fUrgYBgkQ9v7ZikfTK6uhLinHMv0sUWnC3XFxxlyQcWYWoBO1pb8MwCwHFsX/4oy6ayjANtiDmJtFNrmJNvWBttbS1IhPOOo2CMCpJ1jSFLUxyKjg1xzlZdty+Mdu+aoRgbhnmIqGn9RCKR6Kfw7Nb+DpM4Ec0D654lqp21IU9QxFZ3TPDcgjPOmKiulusZpgq8Gpdq3Nt7epuFcZyKpwxYf7ahWK3ejt6PRsX1p4tF9jscwUk1zS2ihX5ivVqxqSHgrEgjrGaxLThdJUTgeYZC1buSpEtbIpZUthx5+YVQOmOUcdlOWSqdpuZC9dpvMRd4fcekF36B2RILvpEuda+rQdQqAnV71XLijbUtucMHqIlCWgdxM3cLlbzQaO2AnPMigfOeCpwTHAo4EBhA4UjGnG4yLhIWsxTR89Y67ULZ+lwxlmkA/8VCbHQCQYVgXXjxmoi/1PATT7f+1pOp67P13cZGN+7FP2cFD/FDz6gzFeIxFg+JSCdDBcCDdHJMkSC33aS0d2qg85OahppgUNSAXdKN83i62Udu8bdyi2/mFgf05JbJwVLL+PXrF+5blnroR0bkUhpyG5I0ue72+6AKID1qi7dmGd+Rioand2gPTu+T169398fQuzs8vbvbe8ej6x3aBiivTu/ej6F3b3h6b85Bg9G73wOST4U6LbKSzBYt/2vB6o5RXkr5VDoAN1uX6NT98ipWvzNKwsU7VuRYes1okQvM68nlWqv5K1eDGImn6KLPsbwluikdlPxVEFWLKLsRJXEqr0NJjrwNnCpSSIjoqe5YkigqgzVTeith9KaWd9Z/Ls4FZwu8ZeyG+R7ksL299QNTDqBPDuBQcgi+Ww7O+B45fGYSUZwpDeyqhp3CVM2kEy1wD1b1cLeSm+97JnVOD3XwUNSNDerOiJQxuSmEhA7Yf5TBc/wM6J+Mg60kODGToO2fAO8Z0TOLlENEz3UleuPhoVcLfeDwQXug8JmV3iHC5/gDhc+sXF0x+epvgateMze8fsPkCeKRU25xM7lLQWZRcgDY+nZwYr5U7kXXOxS6Zp1kR3SPv+/uCfyhoGueyp+CLhiSdnveSkNB1zwD7oju8bXbs2N6fnTJ7W/R9Z/R5sPi4+XXX9OLxaW/GJnKe4ZSidVXu/h/dZD609ijH2qg38/Q83yXccwz+jxEqandX+RRm/8j5wIztY4okmfm/I3pl6A8qSATRBCWkjTuDgU9gxiPMO942qfz2Zujh4cz8Tqx4To9JZVJz0HM38N2pDcszJTxwsIieBlhYdYqnhIWpY5l67XHhRscMS4ekEuHsPdpWtaDLhnpI+7KArCFtRxiX7e244MMYRfk5vvd0UB2zZP43gq6Ekq+UPrOyq2RCpGf9dxHDnGgp8IP7P2UdGWz/W9f9dWk/YckPP8P -------------------------------------------------------------------------------- /docs/imgs/scc_read_bucket_join.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/docs/imgs/scc_read_bucket_join.drawio.png -------------------------------------------------------------------------------- /docs/imgs/scc_read_pushdown_disable.drawio: -------------------------------------------------------------------------------- 1 | 7Zrbcps6FIafxjPthTOAANuXsZ2k56ZJOt3tzR4FFFCMERXCh/30W4AAg7AdT3xQk14ZLQlZfOvXQlqiA0bTxRWFkf+ZuCjoGJq76IBxxzB0XbP5T2pZ5pZ+Txg8il3RqDLc4v+QMGrCmmAXxbWGjJCA4ahudEgYIofVbJBSMq83eyBB/V8j6CHJcOvAQLb+wC7zxVNYWmV/h7Dns/KBRc0UFo2FIfahS+YrJnDRASNKCMuvposRClJ4BZf8vss1teXAKArZU25Y/NIAm88vu++73eTmx9Uk+gq6YJB3M4NBIp5YjJYtCwSUJKGL0l60DhjOfczQbQSdtHbOnc5tPpsGvKTzywccBCMSEMrLIQl5o6E8UDH2GaIMLVZMYuBXiEwRo0vepKg1BcRCRaI4r1xiFTZ/xR1mISMoZOCVXVek+IWAtQM4/Y/kZplPBKf3DgXO6EmgkMunnCgSynzikRAGF5V1WEdZtflESCQAPiLGliJ+wISROl60wOyf9PYzS5R+is7S6/FitbAUhbX8Y5JQB216RhGoIPUQ2yQiEQtTABvdSVEAGZ7VY9LeXQP0V+Oa/REXt14TzIdSTjyrV594ZnM+5doQdzX8Vg7jGbNsl/Ckbw9P+4hFdiMW2XIs0o2WWDQ4WCjqv3y9g+PovXTSifQO1NM70JTT++Dl6918HXo31dO72VxrnlzvQJOgvDi9W69D75Z6ei/3Tcro3W6BZAcs3V2SzJkVLft3QoqKbpxJ+Zw3MMxokdEp6vmVl/6OAuxM3pEkRrzVKEhihmjROR9r3n/eVHIM58nq9Cnifwnvswap/NNJlA8iq4YB9kJ+7XDn8L8Bw9Qp2IHBuaiYYtfNJmuU6i3DaA071rh9Hx0zSiaoYaxP8z3Iobm8tXuyHIw2ORiHkkPv2XLQ+2vk8ItwoihKNbCrGnaapmlPItAa5sGyJGYjuNm2JbtOb3EdOJTr+pLrxpjLGN8njKMztLts8pw+Atpn/V4jCA7kIKjZZ4Z1RHpyTlNFeqbJ6fXVo1cIXXF8QFMUn5wZVhGfbiuKT85cfSL81V+By18z97R4w8Q+pK6eLXEjvkqBchJTAba21juTXypr6VqHoivnSXake/p1d8vEV4WuvCt/Cl1DJe22vJVUoSvvAXeke3rttqyYjk8Xz764N9/d5efJ9YffH8OryQd70pWVd4RUyVqiW9MexcnZ1iM2oNYRmy7v0W8dGMrafcO32vRf3pcxSsfhunzPHL89uYCNRpKvPGBf0S5oO1C297BgWI4v8c8HnT7OvvlR5LrOxY35lMy/0koGLUpufVBdKSEDWcjXlDymn+Cs1fLp5QuK+CnkC4yWnN3gQPJtT+TLu629Je04SjrhDc6j7PXHMAn/5uy2LNTL77dWcz/agdJ2rfO8LW2naEDbFKe2xrO9n1w8i7q8a7+GXMDZ8vLc8yjyIGtZYr4Zff3+5Y63UTbElVv1Y4S4TY5eQftHA9VbvqHbF1BerD4Izc/Oqs9qwcX/ -------------------------------------------------------------------------------- /docs/imgs/scc_read_pushdown_disable.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/docs/imgs/scc_read_pushdown_disable.drawio.png -------------------------------------------------------------------------------- /docs/imgs/scc_read_pushdown_enable.drawio: -------------------------------------------------------------------------------- 1 | 7ZrbcpswEIafxjPthT2AODiXPuTQmbTpNOmkzU1HAQVUy4gKEdt9+goQYBB244kd06RXRqtFFt/+WrSye2AyX54zGAUfqYdIz9C8ZQ9Me4ah65otPlLLKrcMHWnwGfakU2W4xr+RNGrSmmAPxTVHTinhOKobXRqGyOU1G2SMLupuD5TUvzWCPlIM1y4kqvUWezyQT2Fplf0CYT/g5QPLnjksnKUhDqBHF2smcNoDE0Ypz6/mywkiKbyCS37f2YbecmIMhfwpNyzvNMAXi7P+h34/+XJ7PouuQB+c5MM8QpLIJ5az5asCAaNJ6KF0FK0HxosAc3QdQTftXYigC1vA50S0dHH5gAmZUEKZaIc0FE5jdaJy7o+IcbRcM8mJnyM6R5ythEvRa0qIUkXAlu1FFRNrKG3BejyANEKpA78cu0IlLiStHcjp/yQ4y3wqOOdQ4AxHAYU8seZkkzIeUJ+GkJxW1nEdZeVzSWkkAf5EnK9kAoEJp3W8aIn5t/T2gSVb3+Vg6fV0ud5YycZG/jFNmIu2PaPMVJD5iG8TkUyGKYCt4WSIQI4f60lp76EB+psJzf6Iy1s/UyymUi48y6kvPLO5nnJtyLsacSun8YxVtkt60v+envaRi+xGLmpJ4rrRkotODpaKhq9f7+Bl9F4G6Uh6B93TO9A6p/eT1693823o3eye3s3mXvPoei82/69Z79bb0LvVPb2XdVNn9G63QLIJT6tLmgWzomX/SmjR0Y8zKY+Eg2FGy4xO0S+u/PRzQrA7u6BJjITXhCQxR6wYXMw1Hz93VQIjePI6fYbEV8L7zCGVf7qI8klk3ZBgPxTXrgiO+BowToOCXUhGsmOOPS9brFGqtwyjNe5Z0/Y6OuaMzlDDWF/me5BDc3trO6ocjDY5GIeSg/NsOejDDXK4o4IoilIN7KqGnZZpOpJMtIZ5sFMSs5HcbNtSQ6e3hO5gp0tDJXRTLGSM7xMu0BnaTbZ4jp8B7cHQaSTBEzUJavbAsF6Qnnqo2UV6pinoDbtHrxB6x/EBraP41JPhLuLT7Y7iU0+uLql49Vfg8tfMPSveMHEAmadnW9xI7FKgeojZAba25gzUl8pGutah6KrnJDvSPf6+u2Xhd4WuWpU/ha7RJe22vJW6QletAXeke3zttuyYukJXLR5Hvs+QD3kL2XeTq6+fboS7KAjZD+y9PzpZo3H6ZOpq+QXafum0D/UmM9V91N7KcYGSzYTDKMqEzTEN/1fj26rxMvTrJZ22n2pcNKu/ceQHXtWfYcDpHw== -------------------------------------------------------------------------------- /docs/imgs/scc_read_pushdown_enable.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/docs/imgs/scc_read_pushdown_enable.drawio.png -------------------------------------------------------------------------------- /docs/imgs/scc_read_sort_merge_join.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/docs/imgs/scc_read_sort_merge_join.drawio.png -------------------------------------------------------------------------------- /docs/imgs/scc_write_rebalance_sort.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/docs/imgs/scc_write_rebalance_sort.drawio.png -------------------------------------------------------------------------------- /docs/imgs/spark_centralized_metastore.drawio: -------------------------------------------------------------------------------- 1 | 7Vjfc6IwEP5rfNRBKGof1dp6U522Y2+uvZdOhAg5A8uExR/96y+BIFg463TOq525BzX77Qay334bgg1rGGxuBIn8KbiUN0zD3TSsq4ZptttGR/4oZJshva4GPMFcHVQAM/ZKNWhoNGEujfcCEYAji/ZBB8KQOriHESFgvR+2AL5/14h4tALMHMKr6A/moq+zsI0CH1Pm+bhLWHsCkgdrIPaJC+sSZI0a1lAAYDYKNkPKFXk5L9m86z94dwsTNMRjJoxnd+70id2Nmsnt/CEhbuK2m/oqK8ITnbBeLG5zBqgrCdEmCPTBg5DwUYEOBCShS9VtDGkVMROASIJtCf6iiFtdXZIgSMjHgGtvNRW9rhgS4eh1fCO3jzNndi8er0fN7c/4dTIZNvVykQiP4oE4O4tTuZRuoIm6oRBQFFsZICgnyFb7xSdaQ94urqBZDjTT9awfWvUh1qVaIjVkQSrQwYoKZFKWEzKn/B5ihgxC6Z8DIgQygCvHgDhLLy3HEDgI6XfpgiQcS1foc+apmaiKMyBxlLXNgm1UCQfpDfs5auSIHPuIqun6KnfzOok4ELe1ZksWUJeRFghPwsqOlC3HDgQBhLEcoZ8Ec/m7UB9LfvUj4vj0ZSZrvnzhUi6teKWmm72eEW2ate5WFHqHxKISpJuD5dVeq6N7Um9Kdt6j66LF2/ZFy+hmsF/q8J5xIlHYFVGMlRDNTlq9uZAjT42mFEmMIGhFM0UfqqZa+wypZDDtn7XcnY/sueNptC/f0GjV0JhTW+awcyoO2xefsX3RDcMnNb1la+u55Lna6CunxjY3QplvaZIyn8u+Ylpq5fM+vlV2vuRW2al0xXQ7e5icnfYvukdq3zqZ9u3z0b7xIe23T6b97pfUfrei/SFnznIMSXx+m3+n7hl6qgaoPctWH6Cf0RAfF2nvSJGaZyXSXoX17zEVFebz82wS8L6D6mj6/pG2/sRa4hoS5Cykw92738Fd4nhtm/b7Bxvrn55rqo9BQT0Wo2JaKmbOqxuCzBX36RI0Zq9pbMaUEm+c6Vi5iabZkcRRUcN/wFw3bZIIWIhpjvagYV9JZME4z185QghVUIwClvQNuN9ef6FUnbelMuxKqXo1pTJPVqrqrv2/VPWlujxZqaRZ/LGS+kp/T1mj3w== -------------------------------------------------------------------------------- /docs/imgs/spark_centralized_metastore.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/docs/imgs/spark_centralized_metastore.drawio.png -------------------------------------------------------------------------------- /docs/imgs/spark_multi_catalog.drawio: -------------------------------------------------------------------------------- 1 | 1VdRc5swDP41eWyOQKHpY0LTdrf0rrt0t24vPQcU8GIQZ0RC+utnExPIaNPsbl2zhwTpk2yjTzKye46flDeSZfEdhiB6thWWPeeqZ9uDgeWph0Y2W2R4YYBI8tA4NcCMP4MBLYMWPIR8z5EQBfFsHwwwTSGgPYxJiet9twWK/VUzFkEHmAVMdNFvPKTYROFaDX4LPIppF7CxJKx2NkAesxDXLciZ9BxfItJWSkofhCav5mU77voV6+7FJKR0zIBP7PPDLJjdy4frydnmR/48nfpng+F2mhUThYnYvC1tagogVIwYFSXFGGHKxKRBxxKLNAS9jqW0xmeKmClwoMCfQLQx6WUFoYJiSoSxdmMx4eVYyAAOBGBel5iMgA74uVs/HUtrAcPUDWACJDfKQYJgxFf72WemiKKdX8OzEgzVf0L75UewDCWnx5b8XU/Vd412VZqZK2VTK6mK97GttEZptRlWafW498+od1IZ3e3yf5vRk83OxWll5+2vnPo8Z1rkSdURxiuQxFUfmLI5iHvMOXFMlX2ORJgoB6ENYxYsoyqNPgqUyh7CghWCWjOMBI/0SNJJHbM82/apBS916sfVgqMatWpEyTGR7nIjHbt9XWQCWdhf8yVPIOSsjzJSsNYzrSs5wCTBNFcSxUUyV8+F/jnqb5SxIIanmcr58kmoMuvnKz3cHg6trDx70dzP0uhQsegAoTyYXmN1PLM9zCnArdv7uumpA/e8b5myiVstdWi9U1G4naK41YVoe1X25lJJkZbugFhOKKFTM83+1ZtxHXMCxWC1f9bqOHRkjzueRvfyNxrtF2issTaH3ntx6HU4vNvMvkxPjqnz849m6qLDlC94sLzFIj+9wvKsj6ZrF8P/ejAdHtko7ZNqlN3rwNcc5KvNskjEKCDd997uly+3wxbXWJDgKfi7m9zBk8rxxW27b381nb9T20ptLnSVrXUtdia/AA== -------------------------------------------------------------------------------- /docs/imgs/spark_multi_catalog.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/docs/imgs/spark_multi_catalog.drawio.png -------------------------------------------------------------------------------- /docs/internals/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | license: | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | https://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | --- 15 | 16 | Overview Design 17 | === 18 | 19 | In high level, Spark ClickHouse Connector is a connector build on top of Spark DataSource V2 and 20 | ClickHouse HTTP protocol. 21 | 22 |
23 | ![Overview](../imgs/scc_overview.drawio.png) 24 |
25 | -------------------------------------------------------------------------------- /examples/scala/README.md: -------------------------------------------------------------------------------- 1 | # Spark ClickHouse Connector Examples 2 | 3 | Provide examples for Spark ClickHouse Connector. In various languages. 4 | Ready to use as simple standalone application. 5 | 6 | -------------------------------------------------------------------------------- /examples/scala/spark-3.5/.bsp/sbt.json: -------------------------------------------------------------------------------- 1 | {"name":"sbt","version":"1.9.2","bspVersion":"2.1.0-M1","languages":["scala"],"argv":["/Users/mark.zitnik/Library/Java/JavaVirtualMachines/openjdk-21.0.1/Contents/Home/bin/java","-Xms100m","-Xmx100m","-classpath","/Users/mark.zitnik/Library/Application Support/JetBrains/IntelliJIdea2023.2/plugins/Scala/launcher/sbt-launch.jar","-Dsbt.script=/opt/homebrew/bin/sbt","xsbt.boot.Boot","-bsp"]} -------------------------------------------------------------------------------- /examples/scala/spark-3.5/build.sbt: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | name := "ClickHouse Apache Spark 3.5 Example Project" 16 | 17 | version := "1.0" 18 | 19 | scalaVersion := "2.12.18" 20 | 21 | libraryDependencies += "org.apache.spark" %% "spark-sql" % "3.5.0" 22 | libraryDependencies += "org.apache.spark" %% "spark-core" % "3.5.0" 23 | 24 | libraryDependencies += "com.clickhouse" % "clickhouse-jdbc" % "0.6.3" classifier "all" 25 | libraryDependencies += "com.clickhouse.spark" %% "clickhouse-spark-runtime-3.5" % "0.8.0" 26 | 27 | -------------------------------------------------------------------------------- /examples/scala/spark-3.5/project/build.properties: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # https://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | sbt.version=1.9.2 13 | -------------------------------------------------------------------------------- /examples/scala/spark-3.5/src/main/scala/Saprk-3.5.scala: -------------------------------------------------------------------------------- 1 | 2 | import org.apache.spark.sql.SparkSession 3 | 4 | object SparkTestApp { 5 | def main(args: Array[String]): Unit = { 6 | val username = "default" 7 | val password = "replace with password" 8 | val host = "replace with host" 9 | 10 | val spark = SparkSession.builder.appName("ClickHouse Apache Spark 3.5 Example") 11 | .master("local[2]") 12 | .config("spark.sql.catalog.clickhouse","com.clickhouse.spark.ClickHouseCatalog") 13 | .config("spark.sql.catalog.clickhouse.host", host) 14 | .config("spark.sql.catalog.clickhouse.protocol","http") 15 | .config("spark.sql.catalog.clickhouse.http_port","8443") 16 | .config("spark.sql.catalog.clickhouse.user", username) 17 | .config("spark.sql.catalog.clickhouse.password", password) 18 | .config("spark.sql.catalog.clickhouse.database","default") 19 | .config("spark.sql.catalog.clickhouse.option.ssl","true") 20 | .getOrCreate() 21 | 22 | spark.sql("use clickhouse") 23 | spark.sql("show tables").show() 24 | spark.stop() 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # https://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # 14 | 15 | mavenCentralMirror=https://repo1.maven.org/maven2/ 16 | mavenSnapshotsRepo=https://s01.oss.sonatype.org/content/repositories/snapshots/ 17 | mavenReleasesRepo=https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/ 18 | 19 | systemProp.scala_binary_version=2.12 20 | systemProp.known_scala_binary_versions=2.12,2.13 21 | systemProp.spark_binary_version=3.5 22 | systemProp.known_spark_binary_versions=3.3,3.4,3.5 23 | 24 | group=com.clickhouse.spark 25 | 26 | clickhouse_jdbc_version=0.6.3 27 | 28 | spark_33_version=3.3.4 29 | spark_34_version=3.4.2 30 | spark_35_version=3.5.1 31 | 32 | spark_33_scala_212_version=2.12.15 33 | spark_34_scala_212_version=2.12.17 34 | spark_35_scala_212_version=2.12.18 35 | 36 | spark_33_scala_213_version=2.13.8 37 | spark_34_scala_213_version=2.13.8 38 | spark_35_scala_213_version=2.13.8 39 | 40 | spark_33_antlr_version=4.8 41 | spark_34_antlr_version=4.9.3 42 | spark_35_antlr_version=4.9.3 43 | 44 | spark_33_jackson_version=2.13.4 45 | spark_34_jackson_version=2.14.2 46 | spark_35_jackson_version=2.15.2 47 | 48 | spark_33_slf4j_version=1.7.32 49 | spark_34_slf4j_version=2.0.6 50 | spark_35_slf4j_version=2.0.7 51 | 52 | # Align with Apache Spark, and don't bundle them in release jar. 53 | commons_lang3_version=3.12.0 54 | commons_codec_version=1.16.0 55 | 56 | # javax annotations removed in jdk 11 57 | # fix build error with jakarta annotations 58 | jakarta_annotation_api_version=1.3.5 59 | 60 | # Test only 61 | kyuubi_version=1.9.2 62 | testcontainers_scala_version=0.41.2 63 | scalatest_version=3.2.16 64 | flexmark_version=0.62.2 65 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.9-bin.zip 4 | networkTimeout=10000 5 | validateDistributionUrl=true 6 | zipStoreBase=GRADLE_USER_HOME 7 | zipStorePath=wrapper/dists 8 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark-it/src/test/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/TPCDSTestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse 16 | 17 | object TPCDSTestUtils { 18 | val tablePrimaryKeys: Map[String, Seq[String]] = Map( 19 | "call_center" -> Array("cc_call_center_sk"), 20 | "catalog_page" -> Array("cp_catalog_page_sk"), 21 | "catalog_returns" -> Array("cr_item_sk", "cr_order_number"), 22 | "catalog_sales" -> Array("cs_item_sk", "cs_order_number"), 23 | "customer" -> Array("c_customer_sk"), 24 | "customer_address" -> Array("ca_address_sk"), 25 | "customer_demographics" -> Array("cd_demo_sk"), 26 | "date_dim" -> Array("d_date_sk"), 27 | "household_demographics" -> Array("hd_demo_sk"), 28 | "income_band" -> Array("ib_income_band_sk"), 29 | "inventory" -> Array("inv_date_sk", "inv_item_sk", "inv_warehouse_sk"), 30 | "item" -> Array("i_item_sk"), 31 | "promotion" -> Array("p_promo_sk"), 32 | "reason" -> Array("r_reason_sk"), 33 | "ship_mode" -> Array("sm_ship_mode_sk"), 34 | "store" -> Array("s_store_sk"), 35 | "store_returns" -> Array("sr_item_sk", "sr_ticket_number"), 36 | "store_sales" -> Array("ss_item_sk", "ss_ticket_number"), 37 | "time_dim" -> Array("t_time_sk"), 38 | "warehouse" -> Array("w_warehouse_sk"), 39 | "web_page" -> Array("wp_web_page_sk"), 40 | "web_returns" -> Array("wr_item_sk", "wr_order_number"), 41 | "web_sales" -> Array("ws_item_sk", "ws_order_number"), 42 | "web_site" -> Array("web_site_sk") 43 | ) 44 | } 45 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/TestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse 16 | 17 | import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} 18 | import com.fasterxml.jackson.module.scala.ClassTagExtensions 19 | 20 | object TestUtils { 21 | 22 | @transient lazy val om: ObjectMapper with ClassTagExtensions = { 23 | val _om = new ObjectMapper() with ClassTagExtensions 24 | _om.findAndRegisterModules() 25 | _om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) 26 | _om 27 | } 28 | 29 | def toJson(value: Any): String = om.writeValueAsString(value) 30 | } 31 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterDeleteSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse.cluster 16 | 17 | class ClusterDeleteSuite extends SparkClickHouseClusterTest { 18 | 19 | test("truncate distribute table") { 20 | withSimpleDistTable("single_replica", "db_truncate", "tbl_truncate", true) { (_, db, tbl_dist, _) => 21 | assert(spark.table(s"$db.$tbl_dist").count() === 4) 22 | spark.sql(s"TRUNCATE TABLE $db.$tbl_dist") 23 | assert(spark.table(s"$db.$tbl_dist").count() === 0) 24 | } 25 | } 26 | 27 | test("delete from distribute table") { 28 | withSimpleDistTable("single_replica", "db_delete", "tbl_delete", true) { (_, db, tbl_dist, _) => 29 | assert(spark.table(s"$db.$tbl_dist").count() === 4) 30 | spark.sql(s"DELETE FROM $db.$tbl_dist WHERE m = 1") 31 | assert(spark.table(s"$db.$tbl_dist").count() === 3) 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterPartitionManagementSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse.cluster 16 | 17 | import org.apache.spark.sql.Row 18 | 19 | class ClusterPartitionManagementSuite extends SparkClickHouseClusterTest { 20 | 21 | test("distribute table partition") { 22 | withSimpleDistTable("single_replica", "db_part", "tbl_part", true) { (_, db, tbl_dist, _) => 23 | checkAnswer( 24 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist"), 25 | Seq(Row("m=1"), Row("m=2"), Row("m=3"), Row("m=4")) 26 | ) 27 | checkAnswer( 28 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist PARTITION(m = 2)"), 29 | Seq(Row("m=2")) 30 | ) 31 | spark.sql(s"ALTER TABLE $db.$tbl_dist DROP PARTITION(m = 2)") 32 | checkAnswer( 33 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist"), 34 | Seq(Row("m=1"), Row("m=3"), Row("m=4")) 35 | ) 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterTableManagementSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse.cluster 16 | 17 | class ClusterTableManagementSuite extends SparkClickHouseClusterTest { 18 | 19 | test("create or replace distribute table") { 20 | autoCleanupDistTable("single_replica", "db_cor", "tbl_cor_dist") { (cluster, db, _, tbl_local) => 21 | def createLocalTable(): Unit = spark.sql( 22 | s"""CREATE TABLE $db.$tbl_local ( 23 | | id Long NOT NULL 24 | |) USING ClickHouse 25 | |TBLPROPERTIES ( 26 | | cluster = '$cluster', 27 | | engine = 'MergeTree()', 28 | | order_by = 'id', 29 | | settings.index_granularity = 8192 30 | |) 31 | |""".stripMargin 32 | ) 33 | 34 | def createOrReplaceLocalTable(): Unit = spark.sql( 35 | s"""CREATE OR REPLACE TABLE `$db`.`$tbl_local` ( 36 | | id Long NOT NULL 37 | |) USING ClickHouse 38 | |TBLPROPERTIES ( 39 | | engine = 'MergeTree()', 40 | | order_by = 'id', 41 | | settings.index_granularity = 8192 42 | |) 43 | |""".stripMargin 44 | ) 45 | createLocalTable() 46 | createOrReplaceLocalTable() 47 | createOrReplaceLocalTable() 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/ClickHouseTableDDLSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse.single 16 | 17 | import com.clickhouse.spark.base.{ClickHouseCloudMixIn, ClickHouseSingleMixIn} 18 | import org.apache.spark.sql.Row 19 | import org.scalatest.tags.Cloud 20 | 21 | @Cloud 22 | class ClickHouseCloudTableDDLSuite extends ClickHouseTableDDLSuite with ClickHouseCloudMixIn 23 | 24 | class ClickHouseSingleTableDDLSuite extends ClickHouseTableDDLSuite with ClickHouseSingleMixIn 25 | 26 | abstract class ClickHouseTableDDLSuite extends SparkClickHouseSingleTest { 27 | 28 | import testImplicits._ 29 | 30 | test("clickhouse command runner") { 31 | withTable("default.abc") { 32 | runClickHouseSQL("CREATE TABLE default.abc(a UInt8) ENGINE=Memory()") 33 | checkAnswer( 34 | spark.sql("""DESC default.abc""").select($"col_name", $"data_type").limit(1), 35 | Row("a", "smallint") :: Nil 36 | ) 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark-runtime/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/spark-3.3/clickhouse-spark-runtime/.gitkeep -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark/src/main/scala/com/clickhouse/spark/ClickHouseCommandRunner.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark 16 | 17 | import org.apache.spark.sql.connector.ExternalCommandRunner 18 | import org.apache.spark.sql.util.CaseInsensitiveStringMap 19 | import com.clickhouse.spark.client.NodeClient 20 | 21 | class ClickHouseCommandRunner extends ExternalCommandRunner with ClickHouseHelper { 22 | 23 | override def executeCommand(sql: String, options: CaseInsensitiveStringMap): Array[String] = 24 | Utils.tryWithResource(NodeClient(buildNodeSpec(options))) { nodeClient => 25 | nodeClient.syncQueryAndCheckOutputJSONEachRow(sql).records.map(_.toString).toArray 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark/src/main/scala/com/clickhouse/spark/CommitMessage.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark 16 | 17 | import org.apache.spark.sql.connector.write.WriterCommitMessage 18 | 19 | case class CommitMessage(msg: String = "") extends WriterCommitMessage 20 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark/src/main/scala/com/clickhouse/spark/Constants.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark 16 | 17 | import com.clickhouse.client.config.ClickHouseClientOption._ 18 | 19 | object Constants { 20 | // format: off 21 | ////////////////////////////////////////////////////////// 22 | //////// clickhouse datasource catalog properties //////// 23 | ////////////////////////////////////////////////////////// 24 | final val CATALOG_PROP_HOST = "host" 25 | final val CATALOG_PROP_TCP_PORT = "tcp_port" 26 | final val CATALOG_PROP_HTTP_PORT = "http_port" 27 | final val CATALOG_PROP_PROTOCOL = "protocol" 28 | final val CATALOG_PROP_USER = "user" 29 | final val CATALOG_PROP_PASSWORD = "password" 30 | final val CATALOG_PROP_DATABASE = "database" 31 | final val CATALOG_PROP_TZ = "timezone" // server(default), client, UTC+3, Asia/Shanghai, etc. 32 | final val CATALOG_PROP_OPTION_PREFIX = "option." 33 | final val CATALOG_PROP_IGNORE_OPTIONS = Seq( 34 | DATABASE.getKey, COMPRESS.getKey, DECOMPRESS.getKey, FORMAT.getKey, RETRY.getKey, 35 | USE_SERVER_TIME_ZONE.getKey, USE_SERVER_TIME_ZONE_FOR_DATES.getKey, SERVER_TIME_ZONE.getKey, USE_TIME_ZONE.getKey) 36 | 37 | ////////////////////////////////////////////////////////// 38 | ////////// clickhouse datasource read properties ///////// 39 | ////////////////////////////////////////////////////////// 40 | 41 | ////////////////////////////////////////////////////////// 42 | ///////// clickhouse datasource write properties ///////// 43 | ////////////////////////////////////////////////////////// 44 | // format: on 45 | } 46 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark/src/main/scala/com/clickhouse/spark/func/FunctionRegistry.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.func 16 | 17 | import org.apache.spark.sql.connector.catalog.functions.UnboundFunction 18 | 19 | import scala.collection.mutable 20 | 21 | trait FunctionRegistry { 22 | 23 | def list: Array[String] 24 | 25 | def load(name: String): Option[UnboundFunction] 26 | } 27 | 28 | class CompositeFunctionRegistry(registries: Array[FunctionRegistry]) extends FunctionRegistry { 29 | 30 | override def list: Array[String] = registries.flatMap(_.list) 31 | 32 | override def load(name: String): Option[UnboundFunction] = registries.flatMap(_.load(name)).headOption 33 | } 34 | 35 | object StaticFunctionRegistry extends FunctionRegistry { 36 | 37 | private val functions = Map[String, UnboundFunction]( 38 | "ck_xx_hash64" -> ClickHouseXxHash64, // for compatible 39 | "clickhouse_xxHash64" -> ClickHouseXxHash64 40 | ) 41 | 42 | override def list: Array[String] = functions.keys.toArray 43 | 44 | override def load(name: String): Option[UnboundFunction] = functions.get(name) 45 | } 46 | 47 | class DynamicFunctionRegistry extends FunctionRegistry { 48 | 49 | private val functions = mutable.Map[String, UnboundFunction]() 50 | 51 | def register(name: String, function: UnboundFunction): DynamicFunctionRegistry = { 52 | functions += (name -> function) 53 | this 54 | } 55 | 56 | override def list: Array[String] = functions.keys.toArray 57 | 58 | override def load(name: String): Option[UnboundFunction] = functions.get(name) 59 | } 60 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark/src/main/scala/com/clickhouse/spark/read/ClickHouseMetadataColumn.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.read 16 | 17 | import org.apache.spark.sql.connector.catalog.MetadataColumn 18 | import org.apache.spark.sql.types.{DataType, DoubleType, IntegerType, LongType, StringType, StructField} 19 | 20 | object ClickHouseMetadataColumn { 21 | val mergeTreeMetadataCols: Array[MetadataColumn] = Array( 22 | ClickHouseMetadataColumn("_part", StringType), 23 | ClickHouseMetadataColumn("_part_index", LongType), 24 | ClickHouseMetadataColumn("_part_uuid", StringType), 25 | ClickHouseMetadataColumn("_partition_id", StringType), 26 | // ClickHouseMetadataColumn("_partition_value", StringType), 27 | ClickHouseMetadataColumn("_sample_factor", DoubleType) 28 | ) 29 | 30 | val distributeMetadataCols: Array[MetadataColumn] = Array( 31 | ClickHouseMetadataColumn("_table", StringType), 32 | ClickHouseMetadataColumn("_part", StringType), 33 | ClickHouseMetadataColumn("_part_index", LongType), 34 | ClickHouseMetadataColumn("_part_uuid", StringType), 35 | ClickHouseMetadataColumn("_partition_id", StringType), 36 | ClickHouseMetadataColumn("_sample_factor", DoubleType), 37 | ClickHouseMetadataColumn("_shard_num", IntegerType) 38 | ) 39 | } 40 | 41 | case class ClickHouseMetadataColumn( 42 | override val name: String, 43 | override val dataType: DataType, 44 | override val isNullable: Boolean = false 45 | ) extends MetadataColumn { 46 | def toStructField: StructField = StructField(name, dataType, isNullable) 47 | } 48 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark/src/main/scala/com/clickhouse/spark/read/ScanJobDescription.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.read 16 | 17 | import org.apache.spark.sql.clickhouse.ReadOptions 18 | import org.apache.spark.sql.types.StructType 19 | import com.clickhouse.spark.spec._ 20 | 21 | import java.time.ZoneId 22 | 23 | case class ScanJobDescription( 24 | node: NodeSpec, 25 | tz: ZoneId, 26 | tableSpec: TableSpec, 27 | tableEngineSpec: TableEngineSpec, 28 | cluster: Option[ClusterSpec], 29 | localTableSpec: Option[TableSpec], 30 | localTableEngineSpec: Option[TableEngineSpec], 31 | readOptions: ReadOptions, 32 | // Below fields will be constructed in ScanBuilder. 33 | readSchema: StructType = new StructType, 34 | // We should pass compiled ClickHouse SQL snippets(or ClickHouse SQL AST data structure) instead of Spark Expression 35 | // into Scan tasks because the check happens in planing phase on driver side. 36 | filtersExpr: String = "1=1", 37 | groupByClause: Option[String] = None, 38 | limit: Option[Int] = None 39 | ) { 40 | 41 | def database: String = tableEngineSpec match { 42 | case dist: DistributedEngineSpec if readOptions.convertDistributedToLocal => dist.local_db 43 | case _ => tableSpec.database 44 | } 45 | 46 | def table: String = tableEngineSpec match { 47 | case dist: DistributedEngineSpec if readOptions.convertDistributedToLocal => dist.local_table 48 | case _ => tableSpec.name 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark/src/main/scala/com/clickhouse/spark/write/format/ClickHouseJsonEachRowWriter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under th e License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.write.format 16 | 17 | import org.apache.commons.io.IOUtils 18 | import org.apache.spark.sql.catalyst.InternalRow 19 | import org.apache.spark.sql.clickhouse.JsonWriter 20 | import com.clickhouse.spark.write.{ClickHouseWriter, WriteJobDescription} 21 | 22 | class ClickHouseJsonEachRowWriter(writeJob: WriteJobDescription) extends ClickHouseWriter(writeJob) { 23 | 24 | override def format: String = "JSONEachRow" 25 | 26 | val jsonWriter: JsonWriter = new JsonWriter(revisedDataSchema, writeJob.tz, output) 27 | 28 | override def writeRow(record: InternalRow): Unit = jsonWriter.write(record) 29 | 30 | override def doSerialize(): Array[Byte] = { 31 | jsonWriter.flush() 32 | output.close() 33 | serializedBuffer.toByteArray 34 | } 35 | 36 | override def close(): Unit = { 37 | IOUtils.closeQuietly(jsonWriter) 38 | super.close() 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark/src/main/scala/org/apache/spark/sql/clickhouse/JsonWriter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse 16 | 17 | import org.apache.spark.sql.catalyst.InternalRow 18 | import org.apache.spark.sql.catalyst.json.{JSONOptions, JacksonGenerator} 19 | import org.apache.spark.sql.types.StructType 20 | 21 | import java.io.{Closeable, Flushable, OutputStream, OutputStreamWriter} 22 | import java.nio.charset.StandardCharsets 23 | import java.time.ZoneId 24 | 25 | class JsonWriter(schema: StructType, tz: ZoneId, output: OutputStream) extends Closeable with Flushable { 26 | private val option: Map[String, String] = Map( 27 | "timestampFormat" -> "yyyy-MM-dd HH:mm:ss", 28 | "timestampNTZFormat" -> "yyyy-MM-dd HH:mm:ss" 29 | ) 30 | private val utf8Writer = new OutputStreamWriter(output, StandardCharsets.UTF_8) 31 | private val jsonWriter = new JacksonGenerator(schema, utf8Writer, new JSONOptions(option, tz.getId)) 32 | 33 | def write(row: InternalRow): Unit = { 34 | jsonWriter.write(row) 35 | jsonWriter.writeLineEnding() 36 | } 37 | 38 | override def flush(): Unit = jsonWriter.flush() 39 | 40 | override def close(): Unit = jsonWriter.close() 41 | } 42 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark/src/main/scala/org/apache/spark/sql/clickhouse/SparkUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse 16 | 17 | import org.apache.arrow.memory.BufferAllocator 18 | import org.apache.arrow.vector.types.pojo.Schema 19 | import org.apache.spark._ 20 | import org.apache.spark.sql.types.StructType 21 | import org.apache.spark.sql.util.ArrowUtils 22 | import org.apache.spark.util.VersionUtils 23 | 24 | object SparkUtils { 25 | 26 | lazy val MAJOR_MINOR_VERSION: (Int, Int) = VersionUtils.majorMinorVersion(SPARK_VERSION) 27 | 28 | def toArrowSchema(schema: StructType, timeZoneId: String): Schema = ArrowUtils.toArrowSchema(schema, timeZoneId) 29 | 30 | def spawnArrowAllocator(name: String): BufferAllocator = 31 | ArrowUtils.rootAllocator.newChildAllocator(name, 0, Long.MaxValue) 32 | } 33 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark/src/test/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /spark-3.3/clickhouse-spark/src/test/scala/org/apache/spark/sql/clickhouse/ClickHouseHelperSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse 16 | 17 | import org.apache.spark.sql.util.CaseInsensitiveStringMap 18 | import org.scalatest.funsuite.AnyFunSuite 19 | import com.clickhouse.spark.ClickHouseHelper 20 | 21 | import scala.collection.JavaConverters._ 22 | 23 | class ClickHouseHelperSuite extends AnyFunSuite with ClickHouseHelper { 24 | 25 | test("buildNodeSpec") { 26 | val nodeSpec = buildNodeSpec( 27 | new CaseInsensitiveStringMap(Map( 28 | "database" -> "testing", 29 | "option.database" -> "production", 30 | "option.ssl" -> "true" 31 | ).asJava) 32 | ) 33 | assert(nodeSpec.database === "testing") 34 | assert(nodeSpec.options.get("ssl") === "true") 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark-it/src/test/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/TPCDSTestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse 16 | 17 | object TPCDSTestUtils { 18 | val tablePrimaryKeys: Map[String, Seq[String]] = Map( 19 | "call_center" -> Array("cc_call_center_sk"), 20 | "catalog_page" -> Array("cp_catalog_page_sk"), 21 | "catalog_returns" -> Array("cr_item_sk", "cr_order_number"), 22 | "catalog_sales" -> Array("cs_item_sk", "cs_order_number"), 23 | "customer" -> Array("c_customer_sk"), 24 | "customer_address" -> Array("ca_address_sk"), 25 | "customer_demographics" -> Array("cd_demo_sk"), 26 | "date_dim" -> Array("d_date_sk"), 27 | "household_demographics" -> Array("hd_demo_sk"), 28 | "income_band" -> Array("ib_income_band_sk"), 29 | "inventory" -> Array("inv_date_sk", "inv_item_sk", "inv_warehouse_sk"), 30 | "item" -> Array("i_item_sk"), 31 | "promotion" -> Array("p_promo_sk"), 32 | "reason" -> Array("r_reason_sk"), 33 | "ship_mode" -> Array("sm_ship_mode_sk"), 34 | "store" -> Array("s_store_sk"), 35 | "store_returns" -> Array("sr_item_sk", "sr_ticket_number"), 36 | "store_sales" -> Array("ss_item_sk", "ss_ticket_number"), 37 | "time_dim" -> Array("t_time_sk"), 38 | "warehouse" -> Array("w_warehouse_sk"), 39 | "web_page" -> Array("wp_web_page_sk"), 40 | "web_returns" -> Array("wr_item_sk", "wr_order_number"), 41 | "web_sales" -> Array("ws_item_sk", "ws_order_number"), 42 | "web_site" -> Array("web_site_sk") 43 | ) 44 | } 45 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/TestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse 16 | 17 | import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} 18 | import com.fasterxml.jackson.module.scala.ClassTagExtensions 19 | 20 | object TestUtils { 21 | 22 | @transient lazy val om: ObjectMapper with ClassTagExtensions = { 23 | val _om = new ObjectMapper() with ClassTagExtensions 24 | _om.findAndRegisterModules() 25 | _om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) 26 | _om 27 | } 28 | 29 | def toJson(value: Any): String = om.writeValueAsString(value) 30 | } 31 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterDeleteSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse.cluster 16 | 17 | class ClusterDeleteSuite extends SparkClickHouseClusterTest { 18 | 19 | test("truncate distribute table") { 20 | withSimpleDistTable("single_replica", "db_truncate", "tbl_truncate", true) { (_, db, tbl_dist, _) => 21 | assert(spark.table(s"$db.$tbl_dist").count() === 4) 22 | spark.sql(s"TRUNCATE TABLE $db.$tbl_dist") 23 | assert(spark.table(s"$db.$tbl_dist").count() === 0) 24 | } 25 | } 26 | 27 | test("delete from distribute table") { 28 | withSimpleDistTable("single_replica", "db_delete", "tbl_delete", true) { (_, db, tbl_dist, _) => 29 | assert(spark.table(s"$db.$tbl_dist").count() === 4) 30 | spark.sql(s"DELETE FROM $db.$tbl_dist WHERE m = 1") 31 | assert(spark.table(s"$db.$tbl_dist").count() === 3) 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterPartitionManagementSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse.cluster 16 | 17 | import org.apache.spark.sql.Row 18 | 19 | class ClusterPartitionManagementSuite extends SparkClickHouseClusterTest { 20 | 21 | test("distribute table partition") { 22 | withSimpleDistTable("single_replica", "db_part", "tbl_part", true) { (_, db, tbl_dist, _) => 23 | checkAnswer( 24 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist"), 25 | Seq(Row("m=1"), Row("m=2"), Row("m=3"), Row("m=4")) 26 | ) 27 | checkAnswer( 28 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist PARTITION(m = 2)"), 29 | Seq(Row("m=2")) 30 | ) 31 | spark.sql(s"ALTER TABLE $db.$tbl_dist DROP PARTITION(m = 2)") 32 | checkAnswer( 33 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist"), 34 | Seq(Row("m=1"), Row("m=3"), Row("m=4")) 35 | ) 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterTableManagementSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse.cluster 16 | 17 | class ClusterTableManagementSuite extends SparkClickHouseClusterTest { 18 | 19 | test("create or replace distribute table") { 20 | autoCleanupDistTable("single_replica", "db_cor", "tbl_cor_dist") { (cluster, db, _, tbl_local) => 21 | def createLocalTable(): Unit = spark.sql( 22 | s"""CREATE TABLE $db.$tbl_local ( 23 | | id Long NOT NULL 24 | |) USING ClickHouse 25 | |TBLPROPERTIES ( 26 | | cluster = '$cluster', 27 | | engine = 'MergeTree()', 28 | | order_by = 'id', 29 | | settings.index_granularity = 8192 30 | |) 31 | |""".stripMargin 32 | ) 33 | 34 | def createOrReplaceLocalTable(): Unit = spark.sql( 35 | s"""CREATE OR REPLACE TABLE `$db`.`$tbl_local` ( 36 | | id Long NOT NULL 37 | |) USING ClickHouse 38 | |TBLPROPERTIES ( 39 | | engine = 'MergeTree()', 40 | | order_by = 'id', 41 | | settings.index_granularity = 8192 42 | |) 43 | |""".stripMargin 44 | ) 45 | createLocalTable() 46 | createOrReplaceLocalTable() 47 | createOrReplaceLocalTable() 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/ClickHouseTableDDLSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse.single 16 | 17 | import com.clickhouse.spark.base.{ClickHouseCloudMixIn, ClickHouseSingleMixIn} 18 | import org.apache.spark.sql.Row 19 | import org.scalatest.tags.Cloud 20 | 21 | @Cloud 22 | class ClickHouseCloudTableDDLSuite extends ClickHouseTableDDLSuite with ClickHouseCloudMixIn 23 | 24 | class ClickHouseSingleTableDDLSuite extends ClickHouseTableDDLSuite with ClickHouseSingleMixIn 25 | 26 | abstract class ClickHouseTableDDLSuite extends SparkClickHouseSingleTest { 27 | 28 | import testImplicits._ 29 | 30 | test("clickhouse command runner") { 31 | withTable("default.abc") { 32 | runClickHouseSQL("CREATE TABLE default.abc(a UInt8) ENGINE=Memory()") 33 | checkAnswer( 34 | spark.sql("""DESC default.abc""").select($"col_name", $"data_type").limit(1), 35 | Row("a", "smallint") :: Nil 36 | ) 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark-runtime/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/spark-3.4/clickhouse-spark-runtime/.gitkeep -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/ClickHouseCommandRunner.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark 16 | 17 | import org.apache.spark.sql.connector.ExternalCommandRunner 18 | import org.apache.spark.sql.util.CaseInsensitiveStringMap 19 | import com.clickhouse.spark.client.NodeClient 20 | 21 | class ClickHouseCommandRunner extends ExternalCommandRunner with ClickHouseHelper { 22 | 23 | override def executeCommand(sql: String, options: CaseInsensitiveStringMap): Array[String] = 24 | Utils.tryWithResource(NodeClient(buildNodeSpec(options))) { nodeClient => 25 | nodeClient.syncQueryAndCheckOutputJSONEachRow(sql).records.map(_.toString).toArray 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/CommitMessage.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark 16 | 17 | import org.apache.spark.sql.connector.write.WriterCommitMessage 18 | 19 | case class CommitMessage(msg: String = "") extends WriterCommitMessage 20 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/Constants.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark 16 | 17 | import com.clickhouse.client.config.ClickHouseClientOption._ 18 | 19 | object Constants { 20 | // format: off 21 | ////////////////////////////////////////////////////////// 22 | //////// clickhouse datasource catalog properties //////// 23 | ////////////////////////////////////////////////////////// 24 | final val CATALOG_PROP_HOST = "host" 25 | final val CATALOG_PROP_TCP_PORT = "tcp_port" 26 | final val CATALOG_PROP_HTTP_PORT = "http_port" 27 | final val CATALOG_PROP_PROTOCOL = "protocol" 28 | final val CATALOG_PROP_USER = "user" 29 | final val CATALOG_PROP_PASSWORD = "password" 30 | final val CATALOG_PROP_DATABASE = "database" 31 | final val CATALOG_PROP_TZ = "timezone" // server(default), client, UTC+3, Asia/Shanghai, etc. 32 | final val CATALOG_PROP_OPTION_PREFIX = "option." 33 | final val CATALOG_PROP_IGNORE_OPTIONS = Seq( 34 | DATABASE.getKey, COMPRESS.getKey, DECOMPRESS.getKey, FORMAT.getKey, RETRY.getKey, 35 | USE_SERVER_TIME_ZONE.getKey, USE_SERVER_TIME_ZONE_FOR_DATES.getKey, SERVER_TIME_ZONE.getKey, USE_TIME_ZONE.getKey) 36 | 37 | ////////////////////////////////////////////////////////// 38 | ////////// clickhouse datasource read properties ///////// 39 | ////////////////////////////////////////////////////////// 40 | 41 | ////////////////////////////////////////////////////////// 42 | ///////// clickhouse datasource write properties ///////// 43 | ////////////////////////////////////////////////////////// 44 | // format: on 45 | } 46 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/func/CityHash64.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.func 16 | 17 | import com.clickhouse.spark.hash 18 | 19 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L694 20 | object CityHash64 extends MultiStringArgsHash { 21 | 22 | override protected def funcName: String = "clickhouse_cityHash64" 23 | 24 | override val ckFuncNames: Array[String] = Array("cityHash64") 25 | 26 | override def applyHash(input: Array[Any]): Long = hash.CityHash64(input) 27 | } 28 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/func/MurmurHash2.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.func 16 | 17 | import com.clickhouse.spark.hash 18 | import com.clickhouse.spark.hash.HashUtils 19 | 20 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L460 21 | object MurmurHash2_64 extends MultiStringArgsHash { 22 | 23 | override protected def funcName: String = "clickhouse_murmurHash2_64" 24 | 25 | override val ckFuncNames: Array[String] = Array("murmurHash2_64") 26 | 27 | override def applyHash(input: Array[Any]): Long = hash.Murmurhash2_64(input) 28 | } 29 | 30 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L519 31 | object MurmurHash2_32 extends MultiStringArgsHash { 32 | 33 | override protected def funcName: String = "clickhouse_murmurHash2_32" 34 | 35 | override val ckFuncNames: Array[String] = Array("murmurHash2_32") 36 | 37 | override def applyHash(input: Array[Any]): Long = HashUtils.toUInt32(hash.Murmurhash2_32(input)) 38 | } 39 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/func/MurmurHash3.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.func 16 | 17 | import com.clickhouse.spark.hash 18 | import com.clickhouse.spark.hash.HashUtils 19 | 20 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L543 21 | object MurmurHash3_64 extends MultiStringArgsHash { 22 | 23 | override protected def funcName: String = "clickhouse_murmurHash3_64" 24 | 25 | override val ckFuncNames: Array[String] = Array("murmurHash3_64") 26 | 27 | override def applyHash(input: Array[Any]): Long = hash.Murmurhash3_64(input) 28 | } 29 | 30 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L519 31 | object MurmurHash3_32 extends MultiStringArgsHash { 32 | 33 | override protected def funcName: String = "clickhouse_murmurHash3_32" 34 | 35 | override val ckFuncNames: Array[String] = Array("murmurHash3_32") 36 | 37 | override def applyHash(input: Array[Any]): Long = HashUtils.toUInt32(hash.Murmurhash3_32(input)) 38 | } 39 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/read/ClickHouseMetadataColumn.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.read 16 | 17 | import org.apache.spark.sql.connector.catalog.MetadataColumn 18 | import org.apache.spark.sql.types.{DataType, DoubleType, IntegerType, LongType, StringType, StructField} 19 | 20 | object ClickHouseMetadataColumn { 21 | val mergeTreeMetadataCols: Array[MetadataColumn] = Array( 22 | ClickHouseMetadataColumn("_part", StringType), 23 | ClickHouseMetadataColumn("_part_index", LongType), 24 | ClickHouseMetadataColumn("_part_uuid", StringType), 25 | ClickHouseMetadataColumn("_partition_id", StringType), 26 | // ClickHouseMetadataColumn("_partition_value", StringType), 27 | ClickHouseMetadataColumn("_sample_factor", DoubleType) 28 | ) 29 | 30 | val distributeMetadataCols: Array[MetadataColumn] = Array( 31 | ClickHouseMetadataColumn("_table", StringType), 32 | ClickHouseMetadataColumn("_part", StringType), 33 | ClickHouseMetadataColumn("_part_index", LongType), 34 | ClickHouseMetadataColumn("_part_uuid", StringType), 35 | ClickHouseMetadataColumn("_partition_id", StringType), 36 | ClickHouseMetadataColumn("_sample_factor", DoubleType), 37 | ClickHouseMetadataColumn("_shard_num", IntegerType) 38 | ) 39 | } 40 | 41 | case class ClickHouseMetadataColumn( 42 | override val name: String, 43 | override val dataType: DataType, 44 | override val isNullable: Boolean = false 45 | ) extends MetadataColumn { 46 | def toStructField: StructField = StructField(name, dataType, isNullable) 47 | } 48 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/read/ScanJobDescription.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.read 16 | 17 | import org.apache.spark.sql.clickhouse.ReadOptions 18 | import org.apache.spark.sql.types.StructType 19 | import com.clickhouse.spark.spec._ 20 | 21 | import java.time.ZoneId 22 | 23 | case class ScanJobDescription( 24 | node: NodeSpec, 25 | tz: ZoneId, 26 | tableSpec: TableSpec, 27 | tableEngineSpec: TableEngineSpec, 28 | cluster: Option[ClusterSpec], 29 | localTableSpec: Option[TableSpec], 30 | localTableEngineSpec: Option[TableEngineSpec], 31 | readOptions: ReadOptions, 32 | // Below fields will be constructed in ScanBuilder. 33 | readSchema: StructType = new StructType, 34 | // We should pass compiled ClickHouse SQL snippets(or ClickHouse SQL AST data structure) instead of Spark Expression 35 | // into Scan tasks because the check happens in planing phase on driver side. 36 | filtersExpr: String = "1=1", 37 | groupByClause: Option[String] = None, 38 | limit: Option[Int] = None 39 | ) { 40 | 41 | def database: String = tableEngineSpec match { 42 | case dist: DistributedEngineSpec if readOptions.convertDistributedToLocal => dist.local_db 43 | case _ => tableSpec.database 44 | } 45 | 46 | def table: String = tableEngineSpec match { 47 | case dist: DistributedEngineSpec if readOptions.convertDistributedToLocal => dist.local_table 48 | case _ => tableSpec.name 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark/src/main/scala/com/clickhouse/spark/write/format/ClickHouseJsonEachRowWriter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under th e License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.write.format 16 | 17 | import org.apache.commons.io.IOUtils 18 | import org.apache.spark.sql.catalyst.InternalRow 19 | import org.apache.spark.sql.clickhouse.JsonWriter 20 | import com.clickhouse.spark.write.{ClickHouseWriter, WriteJobDescription} 21 | 22 | class ClickHouseJsonEachRowWriter(writeJob: WriteJobDescription) extends ClickHouseWriter(writeJob) { 23 | 24 | override def format: String = "JSONEachRow" 25 | 26 | val jsonWriter: JsonWriter = new JsonWriter(revisedDataSchema, writeJob.tz, output) 27 | 28 | override def writeRow(record: InternalRow): Unit = jsonWriter.write(record) 29 | 30 | override def doSerialize(): Array[Byte] = { 31 | jsonWriter.flush() 32 | output.close() 33 | serializedBuffer.toByteArray 34 | } 35 | 36 | override def close(): Unit = { 37 | IOUtils.closeQuietly(jsonWriter) 38 | super.close() 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark/src/main/scala/org/apache/spark/sql/clickhouse/JsonWriter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse 16 | 17 | import org.apache.spark.sql.catalyst.InternalRow 18 | import org.apache.spark.sql.catalyst.json.{JSONOptions, JacksonGenerator} 19 | import org.apache.spark.sql.types.StructType 20 | 21 | import java.io.{Closeable, Flushable, OutputStream, OutputStreamWriter} 22 | import java.nio.charset.StandardCharsets 23 | import java.time.ZoneId 24 | 25 | class JsonWriter(schema: StructType, tz: ZoneId, output: OutputStream) extends Closeable with Flushable { 26 | private val option: Map[String, String] = Map( 27 | "timestampFormat" -> "yyyy-MM-dd HH:mm:ss", 28 | "timestampNTZFormat" -> "yyyy-MM-dd HH:mm:ss" 29 | ) 30 | private val utf8Writer = new OutputStreamWriter(output, StandardCharsets.UTF_8) 31 | private val jsonWriter = new JacksonGenerator(schema, utf8Writer, new JSONOptions(option, tz.getId)) 32 | 33 | def write(row: InternalRow): Unit = { 34 | jsonWriter.write(row) 35 | jsonWriter.writeLineEnding() 36 | } 37 | 38 | override def flush(): Unit = jsonWriter.flush() 39 | 40 | override def close(): Unit = jsonWriter.close() 41 | } 42 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark/src/main/scala/org/apache/spark/sql/clickhouse/SparkUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse 16 | 17 | import org.apache.arrow.memory.BufferAllocator 18 | import org.apache.arrow.vector.types.pojo.Schema 19 | import org.apache.spark._ 20 | import org.apache.spark.sql.types.StructType 21 | import org.apache.spark.sql.util.ArrowUtils 22 | import org.apache.spark.util.VersionUtils 23 | 24 | object SparkUtils { 25 | 26 | lazy val MAJOR_MINOR_VERSION: (Int, Int) = VersionUtils.majorMinorVersion(SPARK_VERSION) 27 | 28 | def toArrowSchema(schema: StructType, timeZoneId: String): Schema = ArrowUtils.toArrowSchema(schema, timeZoneId) 29 | 30 | def spawnArrowAllocator(name: String): BufferAllocator = 31 | ArrowUtils.rootAllocator.newChildAllocator(name, 0, Long.MaxValue) 32 | } 33 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark/src/test/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /spark-3.4/clickhouse-spark/src/test/scala/org/apache/spark/sql/clickhouse/ClickHouseHelperSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse 16 | 17 | import org.apache.spark.sql.util.CaseInsensitiveStringMap 18 | import org.scalatest.funsuite.AnyFunSuite 19 | import com.clickhouse.spark.ClickHouseHelper 20 | 21 | import scala.collection.JavaConverters._ 22 | 23 | class ClickHouseHelperSuite extends AnyFunSuite with ClickHouseHelper { 24 | 25 | test("buildNodeSpec") { 26 | val nodeSpec = buildNodeSpec( 27 | new CaseInsensitiveStringMap(Map( 28 | "database" -> "testing", 29 | "option.database" -> "production", 30 | "option.ssl" -> "true" 31 | ).asJava) 32 | ) 33 | assert(nodeSpec.database === "testing") 34 | assert(nodeSpec.options.get("ssl") === "true") 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark-it/src/test/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/TPCDSTestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse 16 | 17 | object TPCDSTestUtils { 18 | val tablePrimaryKeys: Map[String, Seq[String]] = Map( 19 | "call_center" -> Array("cc_call_center_sk"), 20 | "catalog_page" -> Array("cp_catalog_page_sk"), 21 | "catalog_returns" -> Array("cr_item_sk", "cr_order_number"), 22 | "catalog_sales" -> Array("cs_item_sk", "cs_order_number"), 23 | "customer" -> Array("c_customer_sk"), 24 | "customer_address" -> Array("ca_address_sk"), 25 | "customer_demographics" -> Array("cd_demo_sk"), 26 | "date_dim" -> Array("d_date_sk"), 27 | "household_demographics" -> Array("hd_demo_sk"), 28 | "income_band" -> Array("ib_income_band_sk"), 29 | "inventory" -> Array("inv_date_sk", "inv_item_sk", "inv_warehouse_sk"), 30 | "item" -> Array("i_item_sk"), 31 | "promotion" -> Array("p_promo_sk"), 32 | "reason" -> Array("r_reason_sk"), 33 | "ship_mode" -> Array("sm_ship_mode_sk"), 34 | "store" -> Array("s_store_sk"), 35 | "store_returns" -> Array("sr_item_sk", "sr_ticket_number"), 36 | "store_sales" -> Array("ss_item_sk", "ss_ticket_number"), 37 | "time_dim" -> Array("t_time_sk"), 38 | "warehouse" -> Array("w_warehouse_sk"), 39 | "web_page" -> Array("wp_web_page_sk"), 40 | "web_returns" -> Array("wr_item_sk", "wr_order_number"), 41 | "web_sales" -> Array("ws_item_sk", "ws_order_number"), 42 | "web_site" -> Array("web_site_sk") 43 | ) 44 | } 45 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/TestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse 16 | 17 | import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} 18 | import com.fasterxml.jackson.module.scala.ClassTagExtensions 19 | 20 | object TestUtils { 21 | 22 | @transient lazy val om: ObjectMapper with ClassTagExtensions = { 23 | val _om = new ObjectMapper() with ClassTagExtensions 24 | _om.findAndRegisterModules() 25 | _om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) 26 | _om 27 | } 28 | 29 | def toJson(value: Any): String = om.writeValueAsString(value) 30 | } 31 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterDeleteSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse.cluster 16 | 17 | class ClusterDeleteSuite extends SparkClickHouseClusterTest { 18 | 19 | test("truncate distribute table") { 20 | withSimpleDistTable("single_replica", "db_truncate", "tbl_truncate", true) { (_, db, tbl_dist, _) => 21 | assert(spark.table(s"$db.$tbl_dist").count() === 4) 22 | spark.sql(s"TRUNCATE TABLE $db.$tbl_dist") 23 | assert(spark.table(s"$db.$tbl_dist").count() === 0) 24 | } 25 | } 26 | 27 | test("delete from distribute table") { 28 | withSimpleDistTable("single_replica", "db_delete", "tbl_delete", true) { (_, db, tbl_dist, _) => 29 | assert(spark.table(s"$db.$tbl_dist").count() === 4) 30 | spark.sql(s"DELETE FROM $db.$tbl_dist WHERE m = 1") 31 | assert(spark.table(s"$db.$tbl_dist").count() === 3) 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterPartitionManagementSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse.cluster 16 | 17 | import org.apache.spark.sql.Row 18 | 19 | class ClusterPartitionManagementSuite extends SparkClickHouseClusterTest { 20 | 21 | test("distribute table partition") { 22 | withSimpleDistTable("single_replica", "db_part", "tbl_part", true) { (_, db, tbl_dist, _) => 23 | checkAnswer( 24 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist"), 25 | Seq(Row("m=1"), Row("m=2"), Row("m=3"), Row("m=4")) 26 | ) 27 | checkAnswer( 28 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist PARTITION(m = 2)"), 29 | Seq(Row("m=2")) 30 | ) 31 | spark.sql(s"ALTER TABLE $db.$tbl_dist DROP PARTITION(m = 2)") 32 | checkAnswer( 33 | spark.sql(s"SHOW PARTITIONS $db.$tbl_dist"), 34 | Seq(Row("m=1"), Row("m=3"), Row("m=4")) 35 | ) 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/cluster/ClusterTableManagementSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse.cluster 16 | 17 | class ClusterTableManagementSuite extends SparkClickHouseClusterTest { 18 | 19 | test("create or replace distribute table") { 20 | autoCleanupDistTable("single_replica", "db_cor", "tbl_cor_dist") { (cluster, db, _, tbl_local) => 21 | def createLocalTable(): Unit = spark.sql( 22 | s"""CREATE TABLE $db.$tbl_local ( 23 | | id Long NOT NULL 24 | |) USING ClickHouse 25 | |TBLPROPERTIES ( 26 | | cluster = '$cluster', 27 | | engine = 'MergeTree()', 28 | | order_by = 'id', 29 | | settings.index_granularity = 8192 30 | |) 31 | |""".stripMargin 32 | ) 33 | 34 | def createOrReplaceLocalTable(): Unit = spark.sql( 35 | s"""CREATE OR REPLACE TABLE `$db`.`$tbl_local` ( 36 | | id Long NOT NULL 37 | |) USING ClickHouse 38 | |TBLPROPERTIES ( 39 | | engine = 'MergeTree()', 40 | | order_by = 'id', 41 | | settings.index_granularity = 8192 42 | |) 43 | |""".stripMargin 44 | ) 45 | createLocalTable() 46 | createOrReplaceLocalTable() 47 | createOrReplaceLocalTable() 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/ClickHouseTableDDLSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse.single 16 | 17 | import com.clickhouse.spark.base.{ClickHouseCloudMixIn, ClickHouseSingleMixIn} 18 | import org.apache.spark.sql.Row 19 | import org.scalatest.tags.Cloud 20 | 21 | @Cloud 22 | class ClickHouseCloudTableDDLSuite extends ClickHouseTableDDLSuite with ClickHouseCloudMixIn 23 | 24 | class ClickHouseSingleTableDDLSuite extends ClickHouseTableDDLSuite with ClickHouseSingleMixIn 25 | 26 | abstract class ClickHouseTableDDLSuite extends SparkClickHouseSingleTest { 27 | 28 | import testImplicits._ 29 | 30 | test("clickhouse command runner") { 31 | withTable("default.abc") { 32 | runClickHouseSQL("CREATE TABLE default.abc(a UInt8) ENGINE=Memory()") 33 | checkAnswer( 34 | spark.sql("""DESC default.abc""").select($"col_name", $"data_type").limit(1), 35 | Row("a", "smallint") :: Nil 36 | ) 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark-runtime/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClickHouse/spark-clickhouse-connector/2f86084e68fb099121f47290b0fe05808407e478/spark-3.5/clickhouse-spark-runtime/.gitkeep -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/ClickHouseCommandRunner.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark 16 | 17 | import com.clickhouse.spark.client.NodeClient 18 | import org.apache.spark.sql.connector.ExternalCommandRunner 19 | import org.apache.spark.sql.util.CaseInsensitiveStringMap 20 | 21 | class ClickHouseCommandRunner extends ExternalCommandRunner with ClickHouseHelper { 22 | 23 | override def executeCommand(sql: String, options: CaseInsensitiveStringMap): Array[String] = 24 | Utils.tryWithResource(client.NodeClient(buildNodeSpec(options))) { nodeClient => 25 | nodeClient.syncQueryAndCheckOutputJSONEachRow(sql).records.map(_.toString).toArray 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/CommitMessage.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark 16 | 17 | import org.apache.spark.sql.connector.write.WriterCommitMessage 18 | 19 | case class CommitMessage(msg: String = "") extends WriterCommitMessage 20 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/Constants.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark 16 | 17 | import com.clickhouse.client.config.ClickHouseClientOption._ 18 | 19 | object Constants { 20 | // format: off 21 | ////////////////////////////////////////////////////////// 22 | //////// clickhouse datasource catalog properties //////// 23 | ////////////////////////////////////////////////////////// 24 | final val CATALOG_PROP_HOST = "host" 25 | final val CATALOG_PROP_TCP_PORT = "tcp_port" 26 | final val CATALOG_PROP_HTTP_PORT = "http_port" 27 | final val CATALOG_PROP_PROTOCOL = "protocol" 28 | final val CATALOG_PROP_USER = "user" 29 | final val CATALOG_PROP_PASSWORD = "password" 30 | final val CATALOG_PROP_DATABASE = "database" 31 | final val CATALOG_PROP_TZ = "timezone" // server(default), client, UTC+3, Asia/Shanghai, etc. 32 | final val CATALOG_PROP_OPTION_PREFIX = "option." 33 | final val CATALOG_PROP_IGNORE_OPTIONS = Seq( 34 | DATABASE.getKey, COMPRESS.getKey, DECOMPRESS.getKey, FORMAT.getKey, RETRY.getKey, 35 | USE_SERVER_TIME_ZONE.getKey, USE_SERVER_TIME_ZONE_FOR_DATES.getKey, SERVER_TIME_ZONE.getKey, USE_TIME_ZONE.getKey) 36 | 37 | ////////////////////////////////////////////////////////// 38 | ////////// clickhouse datasource read properties ///////// 39 | ////////////////////////////////////////////////////////// 40 | 41 | ////////////////////////////////////////////////////////// 42 | ///////// clickhouse datasource write properties ///////// 43 | ////////////////////////////////////////////////////////// 44 | // format: on 45 | } 46 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/func/CityHash64.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.func 16 | 17 | import com.clickhouse.spark.hash 18 | 19 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L694 20 | object CityHash64 extends MultiStringArgsHash { 21 | 22 | override protected def funcName: String = "clickhouse_cityHash64" 23 | 24 | override val ckFuncNames: Array[String] = Array("cityHash64") 25 | 26 | override def applyHash(input: Array[Any]): Long = hash.CityHash64(input) 27 | } 28 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/func/MurmurHash2.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.func 16 | 17 | import com.clickhouse.spark.hash.{HashUtils, Murmurhash2_32, Murmurhash2_64} 18 | import com.clickhouse.spark.hash 19 | 20 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L460 21 | object MurmurHash2_64 extends MultiStringArgsHash { 22 | 23 | override protected def funcName: String = "clickhouse_murmurHash2_64" 24 | 25 | override val ckFuncNames: Array[String] = Array("murmurHash2_64") 26 | 27 | override def applyHash(input: Array[Any]): Long = Murmurhash2_64(input) 28 | } 29 | 30 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L519 31 | object MurmurHash2_32 extends MultiStringArgsHash { 32 | 33 | override protected def funcName: String = "clickhouse_murmurHash2_32" 34 | 35 | override val ckFuncNames: Array[String] = Array("murmurHash2_32") 36 | 37 | override def applyHash(input: Array[Any]): Long = HashUtils.toUInt32(Murmurhash2_32(input)) 38 | } 39 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/func/MurmurHash3.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.func 16 | 17 | import com.clickhouse.spark.hash.{HashUtils, Murmurhash3_32, Murmurhash3_64} 18 | import com.clickhouse.spark.hash 19 | 20 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L543 21 | object MurmurHash3_64 extends MultiStringArgsHash { 22 | 23 | override protected def funcName: String = "clickhouse_murmurHash3_64" 24 | 25 | override val ckFuncNames: Array[String] = Array("murmurHash3_64") 26 | 27 | override def applyHash(input: Array[Any]): Long = Murmurhash3_64(input) 28 | } 29 | 30 | // https://github.com/ClickHouse/ClickHouse/blob/v23.5.3.24-stable/src/Functions/FunctionsHashing.h#L519 31 | object MurmurHash3_32 extends MultiStringArgsHash { 32 | 33 | override protected def funcName: String = "clickhouse_murmurHash3_32" 34 | 35 | override val ckFuncNames: Array[String] = Array("murmurHash3_32") 36 | 37 | override def applyHash(input: Array[Any]): Long = HashUtils.toUInt32(Murmurhash3_32(input)) 38 | } 39 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/read/ClickHouseMetadataColumn.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.read 16 | 17 | import org.apache.spark.sql.connector.catalog.MetadataColumn 18 | import org.apache.spark.sql.types.{DataType, DoubleType, IntegerType, LongType, StringType, StructField} 19 | 20 | object ClickHouseMetadataColumn { 21 | val mergeTreeMetadataCols: Array[MetadataColumn] = Array( 22 | ClickHouseMetadataColumn("_part", StringType), 23 | ClickHouseMetadataColumn("_part_index", LongType), 24 | ClickHouseMetadataColumn("_part_uuid", StringType), 25 | ClickHouseMetadataColumn("_partition_id", StringType), 26 | // ClickHouseMetadataColumn("_partition_value", StringType), 27 | ClickHouseMetadataColumn("_sample_factor", DoubleType) 28 | ) 29 | 30 | val distributeMetadataCols: Array[MetadataColumn] = Array( 31 | ClickHouseMetadataColumn("_table", StringType), 32 | ClickHouseMetadataColumn("_part", StringType), 33 | ClickHouseMetadataColumn("_part_index", LongType), 34 | ClickHouseMetadataColumn("_part_uuid", StringType), 35 | ClickHouseMetadataColumn("_partition_id", StringType), 36 | ClickHouseMetadataColumn("_sample_factor", DoubleType), 37 | ClickHouseMetadataColumn("_shard_num", IntegerType) 38 | ) 39 | } 40 | 41 | case class ClickHouseMetadataColumn( 42 | override val name: String, 43 | override val dataType: DataType, 44 | override val isNullable: Boolean = false 45 | ) extends MetadataColumn { 46 | def toStructField: StructField = StructField(name, dataType, isNullable) 47 | } 48 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/read/ScanJobDescription.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.read 16 | 17 | import com.clickhouse.spark.spec.{ClusterSpec, DistributedEngineSpec, NodeSpec, TableEngineSpec, TableSpec} 18 | import org.apache.spark.sql.clickhouse.ReadOptions 19 | import org.apache.spark.sql.types.StructType 20 | 21 | import java.time.ZoneId 22 | 23 | case class ScanJobDescription( 24 | node: NodeSpec, 25 | tz: ZoneId, 26 | tableSpec: TableSpec, 27 | tableEngineSpec: TableEngineSpec, 28 | cluster: Option[ClusterSpec], 29 | localTableSpec: Option[TableSpec], 30 | localTableEngineSpec: Option[TableEngineSpec], 31 | readOptions: ReadOptions, 32 | // Below fields will be constructed in ScanBuilder. 33 | readSchema: StructType = new StructType, 34 | // We should pass compiled ClickHouse SQL snippets(or ClickHouse SQL AST data structure) instead of Spark Expression 35 | // into Scan tasks because the check happens in planing phase on driver side. 36 | filtersExpr: String = "1=1", 37 | groupByClause: Option[String] = None, 38 | limit: Option[Int] = None 39 | ) { 40 | 41 | def database: String = tableEngineSpec match { 42 | case dist: DistributedEngineSpec if readOptions.convertDistributedToLocal => dist.local_db 43 | case _ => tableSpec.database 44 | } 45 | 46 | def table: String = tableEngineSpec match { 47 | case dist: DistributedEngineSpec if readOptions.convertDistributedToLocal => dist.local_table 48 | case _ => tableSpec.name 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark/src/main/scala/com/clickhouse/spark/write/format/ClickHouseJsonEachRowWriter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under th e License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.clickhouse.spark.write.format 16 | 17 | import com.clickhouse.spark.write.{ClickHouseWriter, WriteJobDescription} 18 | import org.apache.commons.io.IOUtils 19 | import org.apache.spark.sql.catalyst.InternalRow 20 | import org.apache.spark.sql.clickhouse.JsonWriter 21 | 22 | class ClickHouseJsonEachRowWriter(writeJob: WriteJobDescription) extends ClickHouseWriter(writeJob) { 23 | 24 | override def format: String = "JSONEachRow" 25 | 26 | val jsonWriter: JsonWriter = new JsonWriter(revisedDataSchema, writeJob.tz, output) 27 | 28 | override def writeRow(record: InternalRow): Unit = jsonWriter.write(record) 29 | 30 | override def doSerialize(): Array[Byte] = { 31 | jsonWriter.flush() 32 | output.close() 33 | serializedBuffer.toByteArray 34 | } 35 | 36 | override def close(): Unit = { 37 | IOUtils.closeQuietly(jsonWriter) 38 | super.close() 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark/src/main/scala/org/apache/spark/sql/clickhouse/JsonWriter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse 16 | 17 | import org.apache.spark.sql.catalyst.InternalRow 18 | import org.apache.spark.sql.catalyst.json.{JSONOptions, JacksonGenerator} 19 | import org.apache.spark.sql.types.StructType 20 | 21 | import java.io.{Closeable, Flushable, OutputStream, OutputStreamWriter} 22 | import java.nio.charset.StandardCharsets 23 | import java.time.ZoneId 24 | 25 | class JsonWriter(schema: StructType, tz: ZoneId, output: OutputStream) extends Closeable with Flushable { 26 | private val option: Map[String, String] = Map( 27 | "timestampFormat" -> "yyyy-MM-dd HH:mm:ss", 28 | "timestampNTZFormat" -> "yyyy-MM-dd HH:mm:ss" 29 | ) 30 | private val utf8Writer = new OutputStreamWriter(output, StandardCharsets.UTF_8) 31 | private val jsonWriter = new JacksonGenerator(schema, utf8Writer, new JSONOptions(option, tz.getId)) 32 | 33 | def write(row: InternalRow): Unit = { 34 | jsonWriter.write(row) 35 | jsonWriter.writeLineEnding() 36 | } 37 | 38 | override def flush(): Unit = jsonWriter.flush() 39 | 40 | override def close(): Unit = jsonWriter.close() 41 | } 42 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark/src/main/scala/org/apache/spark/sql/clickhouse/SparkUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse 16 | 17 | import org.apache.arrow.memory.BufferAllocator 18 | import org.apache.arrow.vector.types.pojo.Schema 19 | import org.apache.spark._ 20 | import org.apache.spark.sql.types.StructType 21 | import org.apache.spark.sql.util.ArrowUtils 22 | import org.apache.spark.util.VersionUtils 23 | 24 | object SparkUtils { 25 | 26 | lazy val MAJOR_MINOR_VERSION: (Int, Int) = VersionUtils.majorMinorVersion(SPARK_VERSION) 27 | 28 | def toArrowSchema(schema: StructType, timeZoneId: String): Schema = ArrowUtils.toArrowSchema(schema, timeZoneId, true) 29 | 30 | def spawnArrowAllocator(name: String): BufferAllocator = 31 | ArrowUtils.rootAllocator.newChildAllocator(name, 0, Long.MaxValue) 32 | } 33 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark/src/test/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /spark-3.5/clickhouse-spark/src/test/scala/org/apache/spark/sql/clickhouse/ClickHouseHelperSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * https://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.apache.spark.sql.clickhouse 16 | 17 | import com.clickhouse.spark.ClickHouseHelper 18 | import org.apache.spark.sql.util.CaseInsensitiveStringMap 19 | import org.scalatest.funsuite.AnyFunSuite 20 | 21 | import scala.collection.JavaConverters._ 22 | 23 | class ClickHouseHelperSuite extends AnyFunSuite with ClickHouseHelper { 24 | 25 | test("buildNodeSpec") { 26 | val nodeSpec = buildNodeSpec( 27 | new CaseInsensitiveStringMap(Map( 28 | "database" -> "testing", 29 | "option.database" -> "production", 30 | "option.ssl" -> "true" 31 | ).asJava) 32 | ) 33 | assert(nodeSpec.database === "testing") 34 | assert(nodeSpec.options.get("ssl") === "true") 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /version.txt: -------------------------------------------------------------------------------- 1 | 0.9.0-SNAPSHOT 2 | --------------------------------------------------------------------------------