├── .asf.yaml ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── config.yml │ ├── enhancement.yml │ └── feature-request.yml ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── approve-label-trigger.yml │ ├── approve-label.yml │ ├── build-extension.yml │ ├── license-eyes.yml │ ├── run-e2ecase.yml │ └── run-itcase.yml ├── .gitignore ├── .licenserc.yaml ├── .mvn └── wrapper │ ├── MavenWrapperDownloader.java │ └── maven-wrapper.properties ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── CONTRIBUTING_CN.md ├── LICENSE-dependencies.txt ├── LICENSE.txt ├── NOTICE.txt ├── README.md ├── custom_env.sh.tpl ├── env.sh ├── mvnw ├── spark-doris-connector ├── build.sh ├── pom.xml ├── spark-doris-connector-base │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── org │ │ │ │ └── apache │ │ │ │ └── doris │ │ │ │ └── spark │ │ │ │ ├── client │ │ │ │ ├── DorisBackendHttpClient.java │ │ │ │ ├── DorisBackendThriftClient.java │ │ │ │ ├── DorisFrontendClient.java │ │ │ │ ├── entity │ │ │ │ │ ├── Backend.java │ │ │ │ │ ├── CopyIntoResponse.java │ │ │ │ │ ├── DorisReaderPartition.java │ │ │ │ │ ├── Frontend.java │ │ │ │ │ └── StreamLoadResponse.java │ │ │ │ ├── read │ │ │ │ │ ├── AbstractThriftReader.java │ │ │ │ │ ├── DorisFlightSqlReader.java │ │ │ │ │ ├── DorisReader.java │ │ │ │ │ ├── DorisThriftReader.java │ │ │ │ │ ├── ReaderPartitionGenerator.java │ │ │ │ │ └── RowBatch.java │ │ │ │ └── write │ │ │ │ │ ├── AbstractCopyIntoProcessor.java │ │ │ │ │ ├── AbstractStreamLoadProcessor.java │ │ │ │ │ ├── CopyIntoProcessor.java │ │ │ │ │ ├── DorisCommitter.java │ │ │ │ │ ├── DorisWriter.java │ │ │ │ │ └── StreamLoadProcessor.java │ │ │ │ ├── config │ │ │ │ ├── ConfigOption.java │ │ │ │ ├── ConfigOptions.java │ │ │ │ ├── DorisConfig.java │ │ │ │ └── DorisOptions.java │ │ │ │ ├── exception │ │ │ │ ├── ConnectedFailedException.java │ │ │ │ ├── CopyIntoException.java │ │ │ │ ├── DorisException.java │ │ │ │ ├── DorisInternalException.java │ │ │ │ ├── DorisRuntimeException.java │ │ │ │ ├── IllegalArgumentException.java │ │ │ │ ├── ShouldNeverHappenException.java │ │ │ │ └── StreamLoadException.java │ │ │ │ ├── rest │ │ │ │ └── models │ │ │ │ │ ├── DataFormat.java │ │ │ │ │ ├── DataModel.java │ │ │ │ │ ├── Field.java │ │ │ │ │ ├── QueryPlan.java │ │ │ │ │ ├── RespContent.java │ │ │ │ │ ├── Schema.java │ │ │ │ │ └── Tablet.java │ │ │ │ └── util │ │ │ │ ├── CopySQLBuilder.java │ │ │ │ ├── ErrorMessages.java │ │ │ │ ├── EscapeHandler.java │ │ │ │ ├── HttpPostBuilder.java │ │ │ │ ├── HttpPutBuilder.java │ │ │ │ └── ResponseUtil.java │ │ └── scala │ │ │ └── org │ │ │ └── apache │ │ │ ├── doris │ │ │ └── spark │ │ │ │ ├── exception │ │ │ │ └── OptionRequiredException.scala │ │ │ │ ├── package.scala │ │ │ │ ├── rdd │ │ │ │ ├── AbstractDorisRDD.scala │ │ │ │ ├── AbstractDorisRDDIterator.scala │ │ │ │ ├── DorisRDD.scala │ │ │ │ └── DorisSpark.scala │ │ │ │ ├── sql │ │ │ │ ├── DorisRow.scala │ │ │ │ ├── DorisRowFlightSqlReader.scala │ │ │ │ ├── DorisRowThriftReader.scala │ │ │ │ ├── ScalaDorisRowRDD.scala │ │ │ │ ├── Utils.scala │ │ │ │ └── sources │ │ │ │ │ ├── DorisRelation.scala │ │ │ │ │ └── DorisSourceRegisterTrait.scala │ │ │ │ ├── testcase │ │ │ │ └── TestStreamLoadForArrowType.scala │ │ │ │ └── util │ │ │ │ ├── DorisDialects.scala │ │ │ │ ├── HttpUtils.scala │ │ │ │ ├── IPUtils.java │ │ │ │ ├── Retry.scala │ │ │ │ ├── RowConvertors.scala │ │ │ │ ├── SchemaConvertors.scala │ │ │ │ └── URLs.scala │ │ │ └── spark │ │ │ └── sql │ │ │ ├── execution │ │ │ └── arrow │ │ │ │ └── DorisArrowWriter.scala │ │ │ └── util │ │ │ └── DorisArrowUtils.scala │ │ └── test │ │ ├── java │ │ └── org │ │ │ └── apache │ │ │ └── doris │ │ │ └── spark │ │ │ ├── client │ │ │ ├── entity │ │ │ │ └── StreamLoadResponseTest.java │ │ │ └── read │ │ │ │ ├── ReaderPartitionGeneratorTest.java │ │ │ │ └── RowBatchTest.java │ │ │ └── sql │ │ │ └── ExpectedExceptionTest.java │ │ ├── resources │ │ └── data.csv │ │ └── scala │ │ └── org │ │ └── apache │ │ └── doris │ │ └── spark │ │ ├── sql │ │ └── TestUtils.scala │ │ └── util │ │ ├── DorisDialectsTest.scala │ │ ├── RowConvertorsTest.scala │ │ ├── SchemaConvertorsTest.scala │ │ └── URLsTest.scala ├── spark-doris-connector-it │ ├── pom.xml │ └── src │ │ └── test │ │ ├── java │ │ └── org │ │ │ └── apache │ │ │ └── doris │ │ │ └── spark │ │ │ ├── container │ │ │ ├── AbstractContainerTestBase.java │ │ │ ├── ContainerUtils.java │ │ │ └── instance │ │ │ │ ├── ContainerService.java │ │ │ │ ├── DorisContainer.java │ │ │ │ └── DorisCustomerContainer.java │ │ │ ├── example │ │ │ ├── DorisReadExample.scala │ │ │ ├── DorisWriteBatchExample.scala │ │ │ └── DorisWriteStreamExample.scala │ │ │ └── sql │ │ │ ├── Doris2DorisE2ECase.scala │ │ │ ├── DorisCatalogITCase.scala │ │ │ ├── DorisReaderITCase.scala │ │ │ ├── DorisStreamingWriterITCase.scala │ │ │ ├── DorisWriterFailoverITCase.scala │ │ │ ├── DorisWriterITCase.scala │ │ │ ├── TestConnectorWriteDoris.scala │ │ │ └── TestSparkConnector.scala │ │ └── resources │ │ ├── container │ │ └── ddl │ │ │ ├── read_all_type.sql │ │ │ ├── read_bitmap.sql │ │ │ ├── write_all_type.sql │ │ │ └── write_bitmap.sql │ │ ├── docker │ │ └── doris │ │ │ ├── be.conf │ │ │ └── fe.conf │ │ └── log4j2-test.properties ├── spark-doris-connector-spark-2 │ ├── pom.xml │ └── src │ │ └── main │ │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── org.apache.spark.sql.sources.DataSourceRegister │ │ └── scala │ │ └── org │ │ └── apache │ │ └── doris │ │ └── spark │ │ ├── load │ │ └── CommitMessage.scala │ │ ├── sql │ │ ├── DorisSourceProvider.scala │ │ └── DorisStreamLoadSink.scala │ │ ├── txn │ │ ├── TransactionHandler.scala │ │ └── listener │ │ │ ├── DorisTransactionListener.scala │ │ │ └── DorisTxnStreamingQueryListener.scala │ │ └── writer │ │ └── DorisWriter.scala ├── spark-doris-connector-spark-3-base │ ├── pom.xml │ └── src │ │ └── main │ │ └── scala │ │ └── org │ │ └── apache │ │ └── doris │ │ └── spark │ │ ├── catalog │ │ ├── DorisTableBase.scala │ │ ├── DorisTableCatalogBase.scala │ │ └── DorisTableProviderBase.scala │ │ ├── read │ │ ├── AbstractDorisScan.scala │ │ ├── DorisPartitionReader.scala │ │ ├── DorisPartitionReaderFactory.scala │ │ ├── DorisScan.scala │ │ ├── DorisScanBuilderBase.scala │ │ └── ScanMode.java │ │ ├── sql │ │ └── sources │ │ │ └── DorisSourceProvider.scala │ │ └── write │ │ ├── DorisDataWriter.scala │ │ ├── DorisDataWriterFactory.scala │ │ ├── DorisWrite.scala │ │ └── DorisWriteBuilder.scala ├── spark-doris-connector-spark-3.1 │ ├── pom.xml │ └── src │ │ └── main │ │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── org.apache.spark.sql.sources.DataSourceRegister │ │ └── scala │ │ └── org │ │ └── apache │ │ └── doris │ │ └── spark │ │ ├── catalog │ │ ├── DorisTable.scala │ │ └── DorisTableCatalog.scala │ │ ├── read │ │ └── DorisScanBuilder.scala │ │ └── sql │ │ └── sources │ │ └── DorisDataSource.scala ├── spark-doris-connector-spark-3.2 │ ├── pom.xml │ └── src │ │ └── main │ │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── org.apache.spark.sql.sources.DataSourceRegister │ │ └── scala │ │ └── org │ │ └── apache │ │ └── doris │ │ └── spark │ │ ├── catalog │ │ ├── DorisTable.scala │ │ └── DorisTableCatalog.scala │ │ ├── read │ │ └── DorisScanBuilder.scala │ │ └── sql │ │ └── sources │ │ └── DorisDataSource.scala ├── spark-doris-connector-spark-3.3 │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── resources │ │ │ └── META-INF │ │ │ │ └── services │ │ │ │ └── org.apache.spark.sql.sources.DataSourceRegister │ │ └── scala │ │ │ └── org │ │ │ └── apache │ │ │ └── doris │ │ │ └── spark │ │ │ ├── catalog │ │ │ ├── DorisTable.scala │ │ │ └── DorisTableCatalog.scala │ │ │ ├── read │ │ │ ├── DorisScanBuilder.scala │ │ │ ├── DorisScanV2.scala │ │ │ └── expression │ │ │ │ └── V2ExpressionBuilder.scala │ │ │ └── sql │ │ │ └── sources │ │ │ └── DorisDataSource.scala │ │ └── test │ │ └── scala │ │ └── org │ │ └── apache │ │ └── doris │ │ └── spark │ │ └── read │ │ └── expression │ │ └── V2ExpressionBuilderTest.scala ├── spark-doris-connector-spark-3.4 │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── resources │ │ │ └── META-INF │ │ │ │ └── services │ │ │ │ └── org.apache.spark.sql.sources.DataSourceRegister │ │ └── scala │ │ │ └── org │ │ │ └── apache │ │ │ └── doris │ │ │ └── spark │ │ │ ├── catalog │ │ │ ├── DorisTable.scala │ │ │ └── DorisTableCatalog.scala │ │ │ ├── read │ │ │ ├── DorisScanBuilder.scala │ │ │ ├── DorisScanV2.scala │ │ │ └── expression │ │ │ │ └── V2ExpressionBuilder.scala │ │ │ └── sql │ │ │ └── sources │ │ │ └── DorisDataSource.scala │ │ └── test │ │ └── scala │ │ └── org │ │ └── apache │ │ └── doris │ │ └── spark │ │ └── read │ │ └── expression │ │ └── V2ExpressionBuilderTest.scala └── spark-doris-connector-spark-3.5 │ ├── pom.xml │ └── src │ ├── main │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── org.apache.spark.sql.sources.DataSourceRegister │ └── scala │ │ └── org │ │ └── apache │ │ └── doris │ │ └── spark │ │ ├── catalog │ │ ├── DorisTable.scala │ │ └── DorisTableCatalog.scala │ │ ├── read │ │ ├── DorisScanBuilder.scala │ │ ├── DorisScanV2.scala │ │ └── expression │ │ │ └── V2ExpressionBuilder.scala │ │ └── sql │ │ └── sources │ │ └── DorisDataSource.scala │ └── test │ └── scala │ └── org │ └── apache │ └── doris │ └── spark │ └── read │ └── expression │ └── V2ExpressionBuilderTest.scala ├── spark-load ├── build.sh ├── pom.xml ├── spark-load-common │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── org │ │ │ └── apache │ │ │ └── doris │ │ │ ├── common │ │ │ ├── DppResult.java │ │ │ ├── io │ │ │ │ ├── BitmapValue.java │ │ │ │ ├── Codec.java │ │ │ │ ├── Hll.java │ │ │ │ └── Roaring64Map.java │ │ │ └── jmockit │ │ │ │ ├── AutoType.java │ │ │ │ ├── ConstructorReflection.java │ │ │ │ ├── Deencapsulation.java │ │ │ │ ├── FieldReflection.java │ │ │ │ ├── GeneratedClasses.java │ │ │ │ ├── MethodReflection.java │ │ │ │ ├── ParameterReflection.java │ │ │ │ └── ThrowOfCheckedException.java │ │ │ ├── config │ │ │ └── EtlJobConfig.java │ │ │ └── util │ │ │ └── JsonUtils.java │ │ └── test │ │ └── java │ │ └── org │ │ └── apache │ │ └── doris │ │ └── config │ │ └── EtlJobConfigTest.java ├── spark-load-core │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── org │ │ │ │ └── apache │ │ │ │ └── doris │ │ │ │ ├── SparkLoadRunner.java │ │ │ │ ├── client │ │ │ │ └── DorisClient.java │ │ │ │ ├── common │ │ │ │ ├── CommandLineOptions.java │ │ │ │ ├── Constants.java │ │ │ │ ├── LoadInfo.java │ │ │ │ ├── ResponseEntity.java │ │ │ │ ├── enums │ │ │ │ │ ├── JobStatus.java │ │ │ │ │ ├── LoadMode.java │ │ │ │ │ ├── StorageType.java │ │ │ │ │ └── TaskType.java │ │ │ │ └── meta │ │ │ │ │ ├── LoadInfoResponse.java │ │ │ │ │ ├── LoadMeta.java │ │ │ │ │ └── TableMeta.java │ │ │ │ ├── config │ │ │ │ └── JobConfig.java │ │ │ │ ├── exception │ │ │ │ └── SparkLoadException.java │ │ │ │ ├── load │ │ │ │ ├── LoaderFactory.java │ │ │ │ └── job │ │ │ │ │ ├── Loader.java │ │ │ │ │ ├── PullLoader.java │ │ │ │ │ └── Recoverable.java │ │ │ │ └── util │ │ │ │ ├── DateUtils.java │ │ │ │ ├── FileSystemUtils.java │ │ │ │ └── HttpUtils.java │ │ └── resources │ │ │ └── log4j.properties │ │ └── test │ │ ├── java │ │ └── org │ │ │ └── apache │ │ │ └── doris │ │ │ ├── SparkLoadRunnerTest.java │ │ │ ├── client │ │ │ └── DorisClientTest.java │ │ │ ├── common │ │ │ └── meta │ │ │ │ └── LoadMetaTest.java │ │ │ ├── config │ │ │ └── JobConfigTest.java │ │ │ ├── load │ │ │ ├── LoaderFactoryTest.java │ │ │ └── job │ │ │ │ └── PullLoaderTest.java │ │ │ └── util │ │ │ └── DateUtilsTest.java │ │ └── resources │ │ ├── core-site.xml │ │ ├── hdfs-site.xml │ │ └── yarn-site.xml ├── spark-load-dist │ ├── pom.xml │ └── src │ │ └── main │ │ ├── assembly │ │ └── assembly.xml │ │ └── bin │ │ └── spark-load.sh └── spark-load-dpp │ ├── pom.xml │ └── src │ ├── main │ └── java │ │ └── org │ │ └── apache │ │ └── doris │ │ ├── common │ │ └── SparkDppException.java │ │ └── load │ │ └── loadv2 │ │ ├── dpp │ │ ├── ColumnParser.java │ │ ├── DorisKryoRegistrator.java │ │ ├── DorisRangePartitioner.java │ │ ├── DppColumns.java │ │ ├── DppUtils.java │ │ ├── GlobalDictBuilder.java │ │ ├── MinimumCoverageRollupTreeBuilder.java │ │ ├── RollupTreeBuilder.java │ │ ├── RollupTreeNode.java │ │ ├── SparkDpp.java │ │ ├── SparkRDDAggregator.java │ │ └── StringAccumulator.java │ │ └── etl │ │ └── SparkEtlJob.java │ └── test │ └── java │ └── org │ └── apache │ └── doris │ └── load │ └── loadv2 │ ├── dpp │ ├── ColumnParserTest.java │ ├── DorisRangePartitionerTest.java │ ├── DppUtilsTest.java │ ├── MinimumCoverageRollupTreeBuilderTest.java │ └── SparkDppTest.java │ └── etl │ └── SparkEtlJobTest.java └── tools └── releasing ├── create_release_branch.sh ├── create_source_release.sh ├── deploy_staging_jars.sh └── update_branch_version.sh /.asf.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | github: 19 | description: Spark Connector for Apache Doris 20 | homepage: https://doris.apache.org/ 21 | labels: 22 | - data-warehousing 23 | - mpp 24 | - olap 25 | - dbms 26 | - apache 27 | - doris 28 | - spark 29 | - connector 30 | enabled_merge_buttons: 31 | squash: true 32 | merge: false 33 | rebase: false 34 | protected_branches: 35 | master: 36 | required_pull_request_reviews: 37 | dismiss_stale_reviews: true 38 | required_approving_review_count: 1 39 | features: 40 | issues: true 41 | projects: true 42 | notifications: 43 | issues: commits@doris.apache.org 44 | commits: commits@doris.apache.org 45 | pullrequests: commits@doris.apache.org 46 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | blank_issues_enabled: true 19 | contact_links: 20 | - name: Ask a question or get support 21 | url: https://github.com/apache/incubator-doris/discussions 22 | about: Ask a question or request support for using Apache Doris 23 | 24 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Proposed changes 2 | 3 | Issue Number: close #xxx 4 | 5 | ## Problem Summary: 6 | 7 | Describe the overview of changes. 8 | 9 | ## Checklist(Required) 10 | 11 | 1. Does it affect the original behavior: (Yes/No/I Don't know) 12 | 2. Has unit tests been added: (Yes/No/No Need) 13 | 3. Has document been added or modified: (Yes/No/No Need) 14 | 4. Does it need to update dependencies: (Yes/No) 15 | 5. Are there any changes that cannot be rolled back: (Yes/No) 16 | 17 | ## Further comments 18 | 19 | If this is a relatively large or complex change, kick off the discussion at [dev@doris.apache.org](mailto:dev@doris.apache.org) by explaining why you chose the solution you did and what alternatives you considered, etc... 20 | -------------------------------------------------------------------------------- /.github/workflows/approve-label-trigger.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | # 18 | --- 19 | name: Label when reviewed 20 | on: pull_request_review 21 | jobs: 22 | 23 | label-when-reviewed: 24 | name: "Label PRs when reviewed" 25 | runs-on: ubuntu-latest 26 | steps: 27 | - name: "Do nothing. Only trigger corresponding workflow_run event" 28 | run: echo 29 | -------------------------------------------------------------------------------- /.github/workflows/license-eyes.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | # 18 | --- 19 | name: License Check 20 | on: 21 | pull_request: 22 | push: 23 | branches: 24 | - master 25 | jobs: 26 | license-check: 27 | name: "License Check" 28 | runs-on: ubuntu-latest 29 | steps: 30 | - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" 31 | uses: actions/checkout@v3 32 | - name: Check License 33 | uses: apache/skywalking-eyes@v0.2.0 34 | env: 35 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDE 2 | .idea/** 3 | .vscode/** 4 | 5 | custom_env.sh 6 | spark-doris-connector/dependency-reduced-pom.xml 7 | spark-doris-connector/output/ 8 | spark-doris-connector/target/ 9 | spark-doris-connector/.idea/ 10 | 11 | dependency-reduced-pom.xml 12 | target 13 | .flattened-pom.xml 14 | 15 | spark-load/.idea/ 16 | spark-load/target 17 | spark-load/spark-load-core/dependency-reduced-pom.xml 18 | spark-load/spark-load-core/output/ 19 | spark-load/spark-load-core/target/ 20 | spark-load/spark-load-core/.idea/ 21 | spark-load/spark-load-dist/dependency-reduced-pom.xml 22 | spark-load/spark-load-dist/target/ 23 | spark-load/spark-load-dpp/dependency-reduced-pom.xml 24 | spark-load/spark-load-dpp/.flattened-pom.xml 25 | spark-load/spark-load-dpp/target/ 26 | spark-load/spark-load-common/dependency-reduced-pom.xml 27 | spark-load/spark-load-common/target/ 28 | 29 | 30 | ### Java template 31 | # Compiled class file 32 | *.class 33 | 34 | # Log file 35 | *.log 36 | 37 | # BlueJ files 38 | *.ctxt 39 | 40 | # Mobile Tools for Java (J2ME) 41 | .mtj.tmp/ 42 | 43 | # Package Files # 44 | *.jar 45 | *.war 46 | *.nar 47 | *.ear 48 | *.zip 49 | *.tar.gz 50 | *.rar 51 | 52 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 53 | hs_err_pid* 54 | replay_pid* 55 | 56 | *.iml 57 | -------------------------------------------------------------------------------- /.licenserc.yaml: -------------------------------------------------------------------------------- 1 | header: 2 | license: 3 | spdx-id: Apache-2.0 4 | copyright-owner: Apache Software Foundation 5 | 6 | paths-ignore: 7 | - 'dist' 8 | - 'LICENSE.txt' 9 | - 'NOTICE.txt' 10 | - 'NOTICE' 11 | - '.gitignore' 12 | - '.github/PULL_REQUEST_TEMPLATE.md' 13 | - '.licenserc.yaml' 14 | - 'custom_env.sh.tpl' 15 | - '**/*.csv' 16 | - '**/jmockit/**' 17 | - 'spark-doris-connector/spark-doris-connector-it/src/test/resources/container/' 18 | 19 | comment: on-failure 20 | -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # https://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.4/apache-maven-3.8.4-bin.zip 19 | wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.1.1/maven-wrapper-3.1.1.jar 20 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Contribute to Doris and its surrounding projects 21 | 22 | Thank you very much for your interest in the Doris and its surrounding projects. We welcome your suggestions, comments (including criticisms), comments and contributions to the Doris. 23 | 24 | For more information on how to contribute to Doris, please go to the Doris main repository [CONTRIBUTING](https://github.com/apache/doris/blob/master/CONTRIBUTING.md) 25 | -------------------------------------------------------------------------------- /CONTRIBUTING_CN.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | 21 | # 为 Doris 及其周边项目做贡献 22 | 23 | 非常感谢您对 Doris 及其周边项目感兴趣,我们非常欢迎您对 Doris 的各种建议、意见(包括批评)、评论和贡献。 24 | 25 | 关于如何为 Doris 及周边项目做贡献,请前往 Doris 主库查阅 [CONTRIBUTING_CN](https://github.com/apache/doris/blob/master/CONTRIBUTING_CN.md) 26 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | Spark Connector for Apache Doris 2 | Copyright 2018-2025 The Apache Software Foundation 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | -------------------------------------------------------------------------------- /custom_env.sh.tpl: -------------------------------------------------------------------------------- 1 | #export MVN_BIN= 2 | #export JAVA_HOME= 3 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/client/entity/CopyIntoResponse.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.client.entity; 19 | 20 | public class CopyIntoResponse { 21 | 22 | private Integer code; 23 | private String msg; 24 | private String content; 25 | 26 | public CopyIntoResponse(Integer code, String msg, String content) { 27 | this.code = code; 28 | this.msg = msg; 29 | this.content = content; 30 | } 31 | 32 | public Integer getCode() { 33 | return code; 34 | } 35 | 36 | public String getMsg() { 37 | return msg; 38 | } 39 | 40 | public String getContent() { 41 | return content; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/client/entity/Frontend.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.client.entity; 19 | 20 | import java.io.Serializable; 21 | 22 | public class Frontend implements Serializable { 23 | 24 | private String host; 25 | private int httpPort; 26 | private int queryPort; 27 | private int flightSqlPort; 28 | 29 | public Frontend(String host, int httpPort) { 30 | this(host, httpPort, -1, -1); 31 | } 32 | 33 | public Frontend(String host, int httpPort, int queryPort) { 34 | this(host, httpPort, queryPort, -1); 35 | } 36 | 37 | public Frontend(String host, int httpPort, int queryPort, int flightSqlPort) { 38 | this.host = host; 39 | this.httpPort = httpPort; 40 | this.queryPort = queryPort; 41 | this.flightSqlPort = flightSqlPort; 42 | } 43 | 44 | // Getters 45 | public String getHost() { 46 | return host; 47 | } 48 | 49 | public int getHttpPort() { 50 | return httpPort; 51 | } 52 | 53 | public int getQueryPort() { 54 | return queryPort; 55 | } 56 | 57 | public int getFlightSqlPort() { 58 | return flightSqlPort; 59 | } 60 | 61 | public String hostHttpPortString() { 62 | return host + ":" + httpPort; 63 | } 64 | 65 | public String hostQueryPortString() { 66 | return host + ":" + queryPort; 67 | } 68 | 69 | } -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/client/read/DorisReader.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.client.read; 19 | 20 | import org.apache.doris.spark.client.entity.DorisReaderPartition; 21 | import org.apache.doris.spark.config.DorisConfig; 22 | import org.apache.doris.spark.exception.DorisException; 23 | 24 | public abstract class DorisReader { 25 | 26 | protected DorisReaderPartition partition; 27 | protected DorisConfig config; 28 | protected RowBatch rowBatch; 29 | 30 | public DorisReader(DorisReaderPartition partition) { 31 | this.partition = partition; 32 | this.config = partition.getConfig(); 33 | } 34 | 35 | public abstract boolean hasNext() throws DorisException; 36 | 37 | public abstract Object next() throws DorisException; 38 | 39 | public abstract void close(); 40 | } -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/client/read/DorisThriftReader.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.client.read; 19 | 20 | import org.apache.doris.spark.client.entity.DorisReaderPartition; 21 | import org.apache.doris.spark.rest.models.Schema; 22 | import org.apache.doris.spark.util.SchemaConvertors; 23 | import scala.collection.JavaConverters; 24 | 25 | public class DorisThriftReader extends AbstractThriftReader { 26 | 27 | public DorisThriftReader(DorisReaderPartition partition) throws Exception { 28 | super(partition); 29 | } 30 | 31 | @Override 32 | protected Schema getDorisSchema() { 33 | return SchemaConvertors.convertToSchema( 34 | JavaConverters.asScalaBufferConverter(scanOpenResult.getSelectedColumns()).asScala().toSeq()); 35 | } 36 | 37 | } -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/client/write/CopyIntoProcessor.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.client.write; 19 | 20 | import org.apache.doris.spark.config.DorisConfig; 21 | import org.apache.doris.spark.util.RowConvertors; 22 | import org.apache.spark.sql.catalyst.InternalRow; 23 | import org.apache.spark.sql.types.StructField; 24 | import org.apache.spark.sql.types.StructType; 25 | 26 | public class CopyIntoProcessor extends AbstractCopyIntoProcessor { 27 | 28 | private StructType schema; 29 | 30 | public CopyIntoProcessor(DorisConfig config) throws Exception { 31 | super(config); 32 | this.schema = new StructType(new StructField[0]); 33 | } 34 | 35 | public CopyIntoProcessor(DorisConfig config, StructType schema) throws Exception { 36 | super(config); 37 | this.schema = schema; 38 | } 39 | 40 | @Override 41 | protected String toFormat(InternalRow row, String format) { 42 | switch (format) { 43 | case "csv": 44 | return RowConvertors.convertToCsv(row, schema, columnSeparator); 45 | case "json": 46 | return RowConvertors.convertToJson(row, schema); 47 | default: 48 | return null; 49 | } 50 | } 51 | 52 | public void setSchema(StructType schema) { 53 | this.schema = schema; 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/client/write/DorisCommitter.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.client.write; 19 | 20 | import java.io.Serializable; 21 | 22 | public interface DorisCommitter extends Serializable { 23 | 24 | void commit(String message) throws Exception; 25 | 26 | void abort(String message) throws Exception; 27 | } -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/client/write/DorisWriter.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.client.write; 19 | 20 | import org.apache.doris.spark.config.DorisOptions; 21 | 22 | import java.io.IOException; 23 | import java.io.Serializable; 24 | 25 | public abstract class DorisWriter implements Serializable { 26 | 27 | protected int batchSize; 28 | 29 | protected int currentBatchCount = 0; 30 | 31 | public DorisWriter(int batchSize) { 32 | if (batchSize <= 0) { 33 | throw new IllegalArgumentException(DorisOptions.DORIS_SINK_BATCH_SIZE.getName() + " must be greater than 0"); 34 | } 35 | this.batchSize = batchSize; 36 | } 37 | 38 | public abstract void load(R row) throws Exception; 39 | 40 | public abstract String stop() throws Exception; 41 | 42 | public abstract void close() throws IOException; 43 | 44 | public boolean endOfBatch() { 45 | return currentBatchCount >= batchSize; 46 | } 47 | 48 | public int getBatchCount() { 49 | return currentBatchCount; 50 | } 51 | 52 | public void resetBatchCount() { 53 | currentBatchCount = 0; 54 | } 55 | 56 | } -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/config/ConfigOption.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.config; 19 | 20 | public class ConfigOption { 21 | 22 | private final String name; 23 | private Class tClass; 24 | private T defaultValue; 25 | private String description; 26 | 27 | public ConfigOption(String name, Class tClass, T defaultValue) { 28 | this.name = name; 29 | this.tClass = tClass; 30 | this.defaultValue = defaultValue; 31 | } 32 | 33 | public ConfigOption withDescription(String desc) { 34 | this.description = desc; 35 | return this; 36 | } 37 | 38 | public String getName() { 39 | return name; 40 | } 41 | 42 | public T getDefaultValue() { 43 | return defaultValue; 44 | } 45 | 46 | public String getDescription() { 47 | return description; 48 | } 49 | 50 | public Class getTypeClass() { 51 | return tClass; 52 | } 53 | } -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/exception/ConnectedFailedException.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.exception; 19 | 20 | public class ConnectedFailedException extends DorisException { 21 | public ConnectedFailedException(String server, Throwable cause) { 22 | super("Connect to " + server + "failed.", cause); 23 | } 24 | 25 | public ConnectedFailedException(String server, int statusCode, Throwable cause) { 26 | super("Connect to " + server + "failed, status code is " + statusCode + ".", cause); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/exception/CopyIntoException.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.exception; 19 | 20 | public class CopyIntoException extends Exception { 21 | public CopyIntoException() { 22 | super(); 23 | } 24 | public CopyIntoException(String message) { 25 | super(message); 26 | } 27 | public CopyIntoException(String message, Throwable cause) { 28 | super(message, cause); 29 | } 30 | public CopyIntoException(Throwable cause) { 31 | super(cause); 32 | } 33 | protected CopyIntoException(String message, Throwable cause, 34 | boolean enableSuppression, 35 | boolean writableStackTrace) { 36 | super(message, cause, enableSuppression, writableStackTrace); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/exception/DorisException.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.exception; 19 | 20 | public class DorisException extends Exception { 21 | public DorisException() { 22 | super(); 23 | } 24 | public DorisException(String message) { 25 | super(message); 26 | } 27 | public DorisException(String message, Throwable cause) { 28 | super(message, cause); 29 | } 30 | public DorisException(Throwable cause) { 31 | super(cause); 32 | } 33 | protected DorisException(String message, Throwable cause, 34 | boolean enableSuppression, 35 | boolean writableStackTrace) { 36 | super(message, cause, enableSuppression, writableStackTrace); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/exception/DorisInternalException.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.exception; 19 | 20 | import org.apache.doris.sdk.thrift.TStatusCode; 21 | 22 | import java.util.List; 23 | 24 | public class DorisInternalException extends DorisException { 25 | public DorisInternalException(String server, TStatusCode statusCode, List errorMsgs) { 26 | super("Doris server " + server + " internal failed, status code [" + statusCode + "] error message is " + errorMsgs); 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/exception/DorisRuntimeException.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.exception; 19 | 20 | /** Doris runtime exception. */ 21 | public class DorisRuntimeException extends RuntimeException { 22 | public DorisRuntimeException() { 23 | super(); 24 | } 25 | 26 | public DorisRuntimeException(String message) { 27 | super(message); 28 | } 29 | 30 | public DorisRuntimeException(String message, Throwable cause) { 31 | super(message, cause); 32 | } 33 | 34 | public DorisRuntimeException(Throwable cause) { 35 | super(cause); 36 | } 37 | 38 | protected DorisRuntimeException( 39 | String message, 40 | Throwable cause, 41 | boolean enableSuppression, 42 | boolean writableStackTrace) { 43 | super(message, cause, enableSuppression, writableStackTrace); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/exception/IllegalArgumentException.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.exception; 19 | 20 | public class IllegalArgumentException extends DorisException { 21 | public IllegalArgumentException(String msg, Throwable cause) { 22 | super(msg, cause); 23 | } 24 | 25 | public IllegalArgumentException(String arg, String value) { 26 | super("argument '" + arg + "' is illegal, value is '" + value + "'."); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/exception/ShouldNeverHappenException.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.exception; 19 | 20 | public class ShouldNeverHappenException extends DorisException { } 21 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/exception/StreamLoadException.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.exception; 19 | 20 | public class StreamLoadException extends Exception { 21 | public StreamLoadException() { 22 | super(); 23 | } 24 | public StreamLoadException(String message) { 25 | super(message); 26 | } 27 | public StreamLoadException(String message, Throwable cause) { 28 | super(message, cause); 29 | } 30 | public StreamLoadException(Throwable cause) { 31 | super(cause); 32 | } 33 | protected StreamLoadException(String message, Throwable cause, 34 | boolean enableSuppression, 35 | boolean writableStackTrace) { 36 | super(message, cause, enableSuppression, writableStackTrace); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/rest/models/DataFormat.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.rest.models; 19 | 20 | public enum DataFormat { 21 | CSV, 22 | JSON, 23 | ARROW 24 | } 25 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/rest/models/DataModel.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.rest.models; 19 | 20 | public enum DataModel { 21 | DUPLICATE, 22 | UNIQUE, 23 | UNIQUE_MOR, 24 | AGGREGATE 25 | } 26 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/rest/models/QueryPlan.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.rest.models; 19 | 20 | import com.fasterxml.jackson.annotation.JsonIgnoreProperties; 21 | 22 | import java.util.Map; 23 | import java.util.Objects; 24 | 25 | @JsonIgnoreProperties(ignoreUnknown = true) 26 | public class QueryPlan { 27 | private int status; 28 | private String opaqued_query_plan; 29 | private Map partitions; 30 | 31 | public int getStatus() { 32 | return status; 33 | } 34 | 35 | public void setStatus(int status) { 36 | this.status = status; 37 | } 38 | 39 | public String getOpaqued_query_plan() { 40 | return opaqued_query_plan; 41 | } 42 | 43 | public void setOpaqued_query_plan(String opaqued_query_plan) { 44 | this.opaqued_query_plan = opaqued_query_plan; 45 | } 46 | 47 | public Map getPartitions() { 48 | return partitions; 49 | } 50 | 51 | public void setPartitions(Map partitions) { 52 | this.partitions = partitions; 53 | } 54 | 55 | @Override 56 | public boolean equals(Object o) { 57 | if (this == o) { 58 | return true; 59 | } 60 | if (o == null || getClass() != o.getClass()) { 61 | return false; 62 | } 63 | QueryPlan queryPlan = (QueryPlan) o; 64 | return status == queryPlan.status && 65 | Objects.equals(opaqued_query_plan, queryPlan.opaqued_query_plan) && 66 | Objects.equals(partitions, queryPlan.partitions); 67 | } 68 | 69 | @Override 70 | public int hashCode() { 71 | return Objects.hash(status, opaqued_query_plan, partitions); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/util/ErrorMessages.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.util; 19 | 20 | public abstract class ErrorMessages { 21 | public static final String PARSE_NUMBER_FAILED_MESSAGE = "Parse '{}' to number failed. Original string is '{}'."; 22 | public static final String PARSE_BOOL_FAILED_MESSAGE = "Parse '{}' to boolean failed. Original string is '{}'."; 23 | public static final String CONNECT_FAILED_MESSAGE = "Connect to doris {} failed."; 24 | public static final String ILLEGAL_ARGUMENT_MESSAGE = "argument '{}' is illegal, value is '{}'."; 25 | public static final String SHOULD_NOT_HAPPEN_MESSAGE = "Should not come here."; 26 | public static final String DORIS_INTERNAL_FAIL_MESSAGE = "Doris server '{}' internal failed, status is '{}', error message is '{}'"; 27 | } 28 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/util/EscapeHandler.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.util; 19 | 20 | import java.util.regex.Matcher; 21 | import java.util.regex.Pattern; 22 | 23 | /** Handler for escape in properties. */ 24 | public class EscapeHandler { 25 | public static final String ESCAPE_DELIMITERS_FLAGS = "\\x"; 26 | public static final Pattern ESCAPE_PATTERN = Pattern.compile("\\\\x([0-9|a-f|A-F]{2})"); 27 | 28 | public static String escapeString(String source) { 29 | if (source.contains(ESCAPE_DELIMITERS_FLAGS)) { 30 | Matcher m = ESCAPE_PATTERN.matcher(source); 31 | StringBuffer buf = new StringBuffer(); 32 | while (m.find()) { 33 | m.appendReplacement( 34 | buf, String.format("%s", (char) Integer.parseInt(m.group(1), 16))); 35 | } 36 | m.appendTail(buf); 37 | return buf.toString(); 38 | } 39 | return source; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/util/HttpPostBuilder.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.util; 19 | 20 | import com.google.common.base.Preconditions; 21 | import org.apache.commons.codec.binary.Base64; 22 | import org.apache.http.HttpEntity; 23 | import org.apache.http.HttpHeaders; 24 | import org.apache.http.client.methods.HttpPost; 25 | 26 | import java.nio.charset.StandardCharsets; 27 | import java.util.HashMap; 28 | import java.util.Map; 29 | 30 | public class HttpPostBuilder { 31 | String url; 32 | Map header; 33 | HttpEntity httpEntity; 34 | 35 | public HttpPostBuilder() { 36 | header = new HashMap<>(); 37 | } 38 | 39 | public HttpPostBuilder setUrl(String url) { 40 | this.url = url; 41 | return this; 42 | } 43 | 44 | public HttpPostBuilder addCommonHeader() { 45 | header.put(HttpHeaders.EXPECT, "100-continue"); 46 | return this; 47 | } 48 | 49 | public HttpPostBuilder baseAuth(String encoded) { 50 | header.put(HttpHeaders.AUTHORIZATION, "Basic " + encoded); 51 | return this; 52 | } 53 | 54 | public HttpPostBuilder setEntity(HttpEntity httpEntity) { 55 | this.httpEntity = httpEntity; 56 | return this; 57 | } 58 | 59 | public HttpPost build() { 60 | Preconditions.checkNotNull(url); 61 | Preconditions.checkNotNull(httpEntity); 62 | HttpPost put = new HttpPost(url); 63 | header.forEach(put::setHeader); 64 | put.setEntity(httpEntity); 65 | return put; 66 | } 67 | } -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/java/org/apache/doris/spark/util/ResponseUtil.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.util; 19 | 20 | import java.util.regex.Pattern; 21 | 22 | public class ResponseUtil { 23 | public static final Pattern LABEL_EXIST_PATTERN = 24 | Pattern.compile("errCode = 2, detailMessage = Label \\[(.*)\\] " + 25 | "has already been used, relate to txn \\[(\\d+)\\]"); 26 | public static final Pattern COMMITTED_PATTERN = 27 | Pattern.compile("errCode = 2, detailMessage = transaction \\[(\\d+)\\] " + 28 | "is already \\b(COMMITTED|committed|VISIBLE|visible)\\b, not pre-committed."); 29 | 30 | public static boolean isCommitted(String msg) { 31 | return COMMITTED_PATTERN.matcher(msg).matches(); 32 | } 33 | } -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/scala/org/apache/doris/spark/exception/OptionRequiredException.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | 19 | package org.apache.doris.spark.exception 20 | 21 | class OptionRequiredException(name: String) extends Exception(s"option [$name] is required") 22 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/scala/org/apache/doris/spark/package.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris 19 | 20 | import org.apache.doris.spark.rdd.DorisSpark 21 | import org.apache.spark.SparkContext 22 | import org.apache.spark.rdd.RDD 23 | 24 | import scala.language.implicitConversions 25 | 26 | package object spark { 27 | implicit def sparkContextFunctions(sc: SparkContext): SparkContextFunctions = new SparkContextFunctions(sc) 28 | 29 | class SparkContextFunctions(sc: SparkContext) extends Serializable { 30 | def dorisRDD( 31 | tableIdentifier: Option[String] = None, 32 | query: Option[String] = None, 33 | cfg: Option[Map[String, String]] = None): RDD[AnyRef] = 34 | DorisSpark.dorisRDD(sc, tableIdentifier, query, cfg) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/scala/org/apache/doris/spark/rdd/DorisRDD.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.rdd 19 | 20 | import org.apache.doris.spark.client.entity.DorisReaderPartition 21 | import org.apache.doris.spark.client.read.{DorisFlightSqlReader, DorisThriftReader} 22 | import org.apache.doris.spark.config.{DorisConfig, DorisOptions} 23 | import org.apache.spark.{Partition, SparkContext, TaskContext} 24 | 25 | import scala.reflect.ClassTag 26 | 27 | private[spark] class DorisRDD[T: ClassTag]( 28 | sc: SparkContext, 29 | params: Map[String, String] = Map.empty) 30 | extends AbstractDorisRDD[T](sc, params) { 31 | override def compute(split: Partition, context: TaskContext): ScalaDorisRDDIterator[T] = { 32 | new ScalaDorisRDDIterator(context, split.asInstanceOf[DorisPartition].dorisPartition) 33 | } 34 | } 35 | 36 | private[spark] class ScalaDorisRDDIterator[T]( 37 | context: TaskContext, 38 | partition: DorisReaderPartition) 39 | extends AbstractDorisRDDIterator[T](context, partition) { 40 | 41 | override def initReader(config: DorisConfig): Unit = { 42 | config.getValue(DorisOptions.READ_MODE).toLowerCase match { 43 | case "thrift" => config.setProperty(DorisOptions.DORIS_VALUE_READER_CLASS, classOf[DorisThriftReader].getName) 44 | case "arrow" => config.setProperty(DorisOptions.DORIS_VALUE_READER_CLASS, classOf[DorisFlightSqlReader].getName) 45 | case rm: String => throw new IllegalArgumentException("Unknown read mode: " + rm) 46 | } 47 | } 48 | 49 | override def createValue(value: Object): T = { 50 | value.asInstanceOf[T] 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/scala/org/apache/doris/spark/rdd/DorisSpark.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.rdd 19 | 20 | import org.apache.doris.spark.config.DorisOptions 21 | import org.apache.spark.SparkContext 22 | import org.apache.spark.rdd.RDD 23 | 24 | object DorisSpark { 25 | def dorisRDD( 26 | sc: SparkContext, 27 | tableIdentifier: Option[String] = None, 28 | query: Option[String] = None, 29 | cfg: Option[Map[String, String]] = None): RDD[AnyRef] = { 30 | val params = collection.mutable.Map(cfg.getOrElse(Map.empty).toSeq: _*) 31 | query.map { s => params += (DorisOptions.DORIS_FILTER_QUERY.getName -> s) } 32 | tableIdentifier.map { s => params += (DorisOptions.DORIS_TABLE_IDENTIFIER.getName -> s) } 33 | new DorisRDD[AnyRef](sc, params.toMap) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/scala/org/apache/doris/spark/sql/DorisRow.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.sql 19 | 20 | import org.apache.spark.sql.Row 21 | 22 | import scala.collection.mutable.ArrayBuffer 23 | 24 | private[spark] class DorisRow(rowOrder: Seq[String]) extends Row { 25 | lazy val values: ArrayBuffer[Any] = ArrayBuffer.fill(rowOrder.size)(null) 26 | 27 | /** No-arg constructor for Kryo serialization. */ 28 | def this() = this(null) 29 | 30 | def iterator: Iterator[Any] = values.iterator 31 | 32 | override def length: Int = values.length 33 | 34 | override def apply(i: Int): Any = values(i) 35 | 36 | override def get(i: Int): Any = values(i) 37 | 38 | override def isNullAt(i: Int): Boolean = values(i) == null 39 | 40 | override def getInt(i: Int): Int = getAs[Int](i) 41 | 42 | override def getLong(i: Int): Long = getAs[Long](i) 43 | 44 | override def getDouble(i: Int): Double = getAs[Double](i) 45 | 46 | override def getFloat(i: Int): Float = getAs[Float](i) 47 | 48 | override def getBoolean(i: Int): Boolean = getAs[Boolean](i) 49 | 50 | override def getShort(i: Int): Short = getAs[Short](i) 51 | 52 | override def getByte(i: Int): Byte = getAs[Byte](i) 53 | 54 | override def getString(i: Int): String = get(i).toString 55 | 56 | override def copy(): Row = this 57 | 58 | override def toSeq: Seq[Any] = values 59 | } 60 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/scala/org/apache/doris/spark/sql/DorisRowFlightSqlReader.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.sql 19 | 20 | import org.apache.doris.spark.client.entity.DorisReaderPartition 21 | import org.apache.doris.spark.client.read.DorisFlightSqlReader 22 | import org.apache.doris.spark.config.DorisOptions 23 | import org.apache.doris.spark.exception.ShouldNeverHappenException 24 | 25 | import scala.collection.JavaConverters.{asScalaBufferConverter, mapAsScalaMapConverter} 26 | 27 | class DorisRowFlightSqlReader(partition: DorisReaderPartition) extends DorisFlightSqlReader(partition) { 28 | 29 | private val rowOrder: Seq[String] = config.getValue(DorisOptions.DORIS_READ_FIELDS).split(",") 30 | 31 | override def next(): AnyRef = { 32 | if (!hasNext) { 33 | throw new ShouldNeverHappenException 34 | } 35 | val row: DorisRow = new DorisRow(rowOrder) 36 | rowBatch.next.asScala.zipWithIndex.foreach { 37 | case (s, index) if index < row.values.size && s.isInstanceOf[java.util.HashMap[String, String]] => 38 | row.values.update(index, s.asInstanceOf[java.util.HashMap[String, String]].asScala) 39 | case (s, index) if index < row.values.size => row.values.update(index, s) 40 | case _ => // nothing 41 | } 42 | row 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/scala/org/apache/doris/spark/sql/DorisRowThriftReader.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.sql 19 | 20 | import org.apache.doris.spark.client.entity.DorisReaderPartition 21 | import org.apache.doris.spark.client.read.DorisThriftReader 22 | import org.apache.doris.spark.config.DorisOptions 23 | import org.apache.doris.spark.exception.ShouldNeverHappenException 24 | 25 | import scala.collection.JavaConverters.{asScalaBufferConverter, mapAsScalaMapConverter} 26 | 27 | class DorisRowThriftReader(partition: DorisReaderPartition) extends DorisThriftReader(partition) { 28 | 29 | private val rowOrder: Seq[String] = config.getValue(DorisOptions.DORIS_READ_FIELDS).split(",") 30 | 31 | override def next(): AnyRef = { 32 | if (!hasNext) { 33 | throw new ShouldNeverHappenException 34 | } 35 | val row: DorisRow = new DorisRow(rowOrder) 36 | rowBatch.next.asScala.zipWithIndex.foreach { 37 | case (s, index) if index < row.values.size && s.isInstanceOf[java.util.HashMap[String, String]] => 38 | row.values.update(index, s.asInstanceOf[java.util.HashMap[String, String]].asScala) 39 | case (s, index) if index < row.values.size => row.values.update(index, s) 40 | case _ => // nothing 41 | } 42 | row 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/scala/org/apache/doris/spark/sql/ScalaDorisRowRDD.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.sql 19 | 20 | import org.apache.doris.spark.client.entity.DorisReaderPartition 21 | import org.apache.doris.spark.config.{DorisConfig, DorisOptions} 22 | import org.apache.doris.spark.rdd.{AbstractDorisRDD, AbstractDorisRDDIterator, DorisPartition} 23 | import org.apache.spark.sql.Row 24 | import org.apache.spark.sql.types.StructType 25 | import org.apache.spark.{Partition, SparkContext, TaskContext} 26 | 27 | private[spark] class DorisRowRDD(sc: SparkContext, params: Map[String, String] = Map.empty, 28 | schema: StructType) 29 | extends AbstractDorisRDD[Row](sc, params) { 30 | 31 | override def compute(split: Partition, context: TaskContext): ScalaDorisRowRDDIterator = { 32 | new ScalaDorisRowRDDIterator(context, split.asInstanceOf[DorisPartition].dorisPartition, schema) 33 | } 34 | } 35 | 36 | private[spark] class ScalaDorisRowRDDIterator(context: TaskContext, 37 | partition: DorisReaderPartition, schema: StructType) 38 | extends AbstractDorisRDDIterator[Row](context, partition) { 39 | 40 | override def initReader(config: DorisConfig): Unit = { 41 | config.getValue(DorisOptions.READ_MODE).toLowerCase match { 42 | case "thrift" => config.setProperty(DorisOptions.DORIS_VALUE_READER_CLASS, classOf[DorisRowThriftReader].getName) 43 | case "arrow" => config.setProperty(DorisOptions.DORIS_VALUE_READER_CLASS, classOf[DorisRowFlightSqlReader].getName) 44 | case rm: String => throw new IllegalArgumentException("Unknown read mode: " + rm) 45 | } 46 | } 47 | 48 | override def createValue(value: Object): Row = { 49 | value.asInstanceOf[DorisRow] 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/scala/org/apache/doris/spark/sql/sources/DorisSourceRegisterTrait.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.sql.sources 19 | 20 | import org.apache.spark.sql.sources.DataSourceRegister 21 | 22 | trait DorisSourceRegisterTrait extends DataSourceRegister { 23 | override def shortName(): String = "doris" 24 | } 25 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/main/scala/org/apache/doris/spark/util/Retry.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.util 19 | 20 | import org.slf4j.Logger 21 | 22 | import java.time.Duration 23 | import java.util.concurrent.locks.LockSupport 24 | import scala.annotation.tailrec 25 | import scala.reflect.ClassTag 26 | import scala.util.{Failure, Success, Try} 27 | 28 | object Retry { 29 | 30 | @tailrec 31 | def exec[R, T <: Throwable : ClassTag](retryTimes: Int, interval: Duration, logger: Logger) 32 | (f: => R)(h: => Unit): Try[R] = { 33 | assert(retryTimes >= 0) 34 | val result = Try(f) 35 | result match { 36 | case Success(result) => 37 | Success(result) 38 | case Failure(exception: T) if retryTimes > 0 => 39 | logger.warn("Execution failed caused by: {}", exception.getMessage) 40 | logger.warn(s"$retryTimes times retry remaining, the next attempt will be in ${interval.toMillis} ms") 41 | LockSupport.parkNanos(interval.toNanos) 42 | h 43 | exec(retryTimes - 1, interval, logger)(f)(h) 44 | case Failure(exception) => Failure(exception) 45 | } 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/test/java/org/apache/doris/spark/sql/ExpectedExceptionTest.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.sql; 19 | 20 | import org.junit.Rule; 21 | import org.junit.rules.ExpectedException; 22 | 23 | public class ExpectedExceptionTest { 24 | @Rule 25 | public ExpectedException thrown = ExpectedException.none(); 26 | } 27 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-base/src/test/resources/data.csv: -------------------------------------------------------------------------------- 1 | name,gender,age 2 | A,Male,16 3 | B,Female,12 -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-it/src/test/java/org/apache/doris/spark/container/instance/ContainerService.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.container.instance; 19 | 20 | import org.apache.doris.spark.exception.DorisRuntimeException; 21 | 22 | import java.sql.Connection; 23 | 24 | public interface ContainerService { 25 | void startContainer(); 26 | 27 | default void restartContainer() { 28 | throw new DorisRuntimeException("Only doris docker container can implemented."); 29 | }; 30 | 31 | boolean isRunning(); 32 | 33 | Connection getQueryConnection(); 34 | 35 | Connection getQueryConnection(String database); 36 | 37 | String getJdbcUrl(); 38 | 39 | String getInstanceHost(); 40 | 41 | Integer getMappedPort(int originalPort); 42 | 43 | String getUsername(); 44 | 45 | String getPassword(); 46 | 47 | String getFenodes(); 48 | 49 | String getBenodes(); 50 | 51 | void close(); 52 | 53 | int getQueryPort(); 54 | } 55 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-it/src/test/java/org/apache/doris/spark/example/DorisReadExample.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.example 19 | 20 | import org.apache.spark.sql.SparkSession 21 | 22 | /** 23 | * Example to read data from Doris using Spark Batch. 24 | */ 25 | object DorisReadExample { 26 | 27 | def main(args: Array[String]): Unit = { 28 | val session = SparkSession.builder().master("local[1]").getOrCreate() 29 | val dorisSparkDF = session.read.format("doris") 30 | .option("doris.table.identifier", "test.student") 31 | .option("doris.fenodes", "127.0.0.1:8030") 32 | .option("user", "root") 33 | .option("password", "") 34 | .load() 35 | 36 | dorisSparkDF.show() 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-it/src/test/java/org/apache/doris/spark/example/DorisWriteBatchExample.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.example 19 | 20 | import org.apache.spark.sql.SparkSession 21 | 22 | /** 23 | * Example to write data to Doris using Spark Batch. 24 | */ 25 | object DorisWriteBatchExample { 26 | 27 | def main(args: Array[String]): Unit = { 28 | val session = SparkSession.builder().master("local[1]").getOrCreate() 29 | val mockDataDF = session.createDataFrame(Seq( 30 | ("1", "doris", "18"), 31 | ("2", "apache", "28"), 32 | ("3", "spark", "78") 33 | )).toDF("id", "name","age") 34 | 35 | mockDataDF.show() 36 | mockDataDF.write.format("doris") 37 | .option("doris.table.identifier", "test.student") 38 | .option("doris.fenodes", "127.0.0.1:8030") 39 | .option("user", "root") 40 | .option("password", "") 41 | .mode("append") 42 | .save() 43 | session.stop() 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-it/src/test/java/org/apache/doris/spark/example/DorisWriteStreamExample.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.example 19 | 20 | import org.apache.spark.sql.SparkSession 21 | 22 | /** 23 | * Example to write data to Doris using Spark Structured Streaming. 24 | */ 25 | object DorisWriteStreamExample { 26 | 27 | def main(args: Array[String]): Unit = { 28 | 29 | val spark = SparkSession.builder() 30 | .appName("RateSourceExample") 31 | .master("local[1]") 32 | .getOrCreate() 33 | 34 | val rateStream = spark.readStream 35 | .format("rate") 36 | .option("rowsPerSecond", 10) 37 | .load() 38 | 39 | /** 40 | * root 41 | * |-- timestamp: timestamp (nullable = true) 42 | * |-- value: long (nullable = true) 43 | */ 44 | rateStream.printSchema(); 45 | 46 | rateStream.writeStream 47 | .format("doris") 48 | .option("checkpointLocation", "/tmp/checkpoint") 49 | .option("doris.table.identifier", "test_doris_streaming.tbl_write_tbl_stream") 50 | .option("doris.fenodes", "127.0.0.1:8030") 51 | .option("user", "root") 52 | .option("password", "") 53 | .start() 54 | .awaitTermination() 55 | 56 | spark.stop(); 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-it/src/test/resources/container/ddl/read_all_type.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS tbl_read_tbl_all_types; 2 | 3 | CREATE TABLE tbl_read_tbl_all_types ( 4 | `id` int, 5 | `c1` boolean, 6 | `c2` tinyint, 7 | `c3` smallint, 8 | `c4` int, 9 | `c5` bigint, 10 | `c6` largeint, 11 | `c7` float, 12 | `c8` double, 13 | `c9` decimal(12,4), 14 | `c10` date, 15 | `c11` datetime, 16 | `c12` char(1), 17 | `c13` varchar(16), 18 | `c14` string, 19 | `c15` Array, 20 | `c16` Map, 21 | `c17` Struct, 22 | `c18` JSON, 23 | `c19` JSON -- doris2.1.0 can not read VARIANT 24 | ) 25 | DUPLICATE KEY(`id`) 26 | DISTRIBUTED BY HASH(`id`) BUCKETS 2 27 | PROPERTIES ( 28 | "replication_num" = "1", 29 | "light_schema_change" = "true" 30 | ); 31 | 32 | INSERT INTO tbl_read_tbl_all_types VALUES 33 | (1, true, 127, 32767, 2147483647, 9223372036854775807, 170141183460469231731687303715884105727, 34 | 3.14, 2.71828, 12345.6789, '2025-03-11', '2025-03-11 12:34:56', 'A', 'Hello, Doris!', 'This is a string', 35 | ['Alice', 'Bob'], {'key1': 'value1', 'key2': 'value2'}, STRUCT('Tom', 30), '{"key": "value"}', '{"type": "variant", "data": 123}'); 36 | 37 | INSERT INTO tbl_read_tbl_all_types VALUES 38 | (2, false, -128, -32768, -2147483648, -9223372036854775808, -170141183460469231731687303715884105728, 39 | -1.23, 0.0001, -9999.9999, '2024-12-25', '2024-12-25 23:59:59', 'B', 'Doris Test', 'Another string!', 40 | ['Charlie', 'David'], {'k1': 'v1', 'k2': 'v2'}, STRUCT('Jerry', 25), '{"status": "ok"}', '{"data": [1, 2, 3]}' ); 41 | 42 | INSERT INTO tbl_read_tbl_all_types VALUES 43 | (3, true, 0, 0, 0, 0, 0, 44 | 0.0, 0.0, 0.0000, '2023-06-15', '2023-06-15 08:00:00', 'C', 'Test Doris', 'Sample text', 45 | ['Eve', 'Frank'], {'alpha': 'beta'}, STRUCT('Alice', 40), '{"nested": {"key": "value"}}', '{"variant": "test"}'); 46 | 47 | INSERT INTO tbl_read_tbl_all_types VALUES 48 | (4, NULL, NULL, NULL, NULL, NULL, NULL, 49 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 50 | NULL, NULL, NULL, NULL, NULL); -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-it/src/test/resources/container/ddl/read_bitmap.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS tbl_read_tbl_bitmap; 2 | 3 | create table tbl_read_tbl_bitmap ( 4 | datekey int, 5 | hour int, 6 | device_id bitmap BITMAP_UNION 7 | ) 8 | aggregate key (datekey, hour) 9 | distributed by hash(datekey, hour) buckets 1 10 | properties( 11 | "replication_num" = "1" 12 | ); 13 | 14 | insert into tbl_read_tbl_bitmap values 15 | (20200622, 1, to_bitmap(243)), 16 | (20200622, 2, bitmap_from_array([1,2,3,4,5,434543])), 17 | (20200622, 3, to_bitmap(287667876573)); -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-it/src/test/resources/container/ddl/write_all_type.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS tbl_write_tbl_all_types; 2 | 3 | CREATE TABLE tbl_write_tbl_all_types ( 4 | `id` int, 5 | `c1` boolean, 6 | `c2` tinyint, 7 | `c3` smallint, 8 | `c4` int, 9 | `c5` bigint, 10 | `c6` largeint, 11 | `c7` float, 12 | `c8` double, 13 | `c9` decimal(12,4), 14 | `c10` date, 15 | `c11` datetime, 16 | `c12` char(1), 17 | `c13` varchar(256), 18 | `c14` string, 19 | `c15` Array, 20 | `c16` Map, 21 | `c17` Struct, 22 | `c18` JSON, 23 | `c19` VARIANT 24 | ) 25 | DUPLICATE KEY(`id`) 26 | DISTRIBUTED BY HASH(`id`) BUCKETS 2 27 | PROPERTIES ( 28 | "replication_num" = "1", 29 | "light_schema_change" = "true" 30 | ); 31 | 32 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-it/src/test/resources/container/ddl/write_bitmap.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS tbl_write_tbl_bitmap; 2 | 3 | create table tbl_write_tbl_bitmap ( 4 | datekey int, 5 | hour int, 6 | device_id bitmap BITMAP_UNION 7 | ) 8 | aggregate key (datekey, hour) 9 | distributed by hash(datekey, hour) buckets 1 10 | properties( 11 | "replication_num" = "1" 12 | ); -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-it/src/test/resources/log4j2-test.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | rootLogger.level = info 20 | rootLogger.appenderRef.stdout.ref = console 21 | 22 | appender.console.type = Console 23 | appender.console.name = console 24 | appender.console.target = SYSTEM_ERR 25 | appender.console.layout.type = PatternLayout 26 | appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p [%t] %c{1}: %m%n%ex 27 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-2/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 20 | 23 | 4.0.0 24 | 25 | org.apache.doris 26 | spark-doris-connector 27 | ${revision} 28 | ../pom.xml 29 | 30 | jar 31 | 32 | spark-doris-connector-spark-2 33 | 34 | 35 | 8 36 | 8 37 | UTF-8 38 | 39 | 40 | 41 | 42 | org.apache.doris 43 | spark-doris-connector-base 44 | 45 | 46 | org.apache.spark 47 | spark-core_${scala.major.version} 48 | provided 49 | 50 | 51 | org.apache.spark 52 | spark-sql_${scala.major.version} 53 | provided 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-2/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | org.apache.doris.spark.sql.DorisSourceProvider 19 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-2/src/main/scala/org/apache/doris/spark/load/CommitMessage.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.load 19 | 20 | /** 21 | * Commit message class 22 | * 23 | * @param value message value 24 | */ 25 | case class CommitMessage(value: Any) extends Serializable -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-2/src/main/scala/org/apache/doris/spark/sql/DorisStreamLoadSink.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.sql 19 | 20 | import org.apache.doris.spark.config.DorisConfig 21 | import org.apache.doris.spark.load.CommitMessage 22 | import org.apache.doris.spark.txn.listener.DorisTxnStreamingQueryListener 23 | import org.apache.doris.spark.writer.DorisWriter 24 | import org.apache.spark.sql.execution.streaming.Sink 25 | import org.apache.spark.sql.{DataFrame, SQLContext} 26 | import org.slf4j.{Logger, LoggerFactory} 27 | 28 | private[sql] class DorisStreamLoadSink(sqlContext: SQLContext, config: DorisConfig) extends Sink with Serializable { 29 | 30 | private val logger: Logger = LoggerFactory.getLogger(classOf[DorisStreamLoadSink].getName) 31 | @volatile private var latestBatchId = -1L 32 | 33 | // accumulator for transaction handling 34 | private val acc = sqlContext.sparkContext.collectionAccumulator[CommitMessage]("StreamTxnAcc") 35 | private val writer = new DorisWriter(config, acc, true) 36 | 37 | // add listener for structured streaming 38 | sqlContext.streams.addListener(new DorisTxnStreamingQueryListener(acc, writer.getTransactionHandler)) 39 | 40 | override def addBatch(batchId: Long, data: DataFrame): Unit = { 41 | if (batchId <= latestBatchId) { 42 | logger.info(s"Skipping already committed batch $batchId") 43 | } else { 44 | writer.write(data) 45 | latestBatchId = batchId 46 | } 47 | } 48 | 49 | override def toString: String = "DorisStreamLoadSink" 50 | } 51 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3-base/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 20 | 23 | 4.0.0 24 | 25 | org.apache.doris 26 | spark-doris-connector 27 | ${revision} 28 | 29 | 30 | spark-doris-connector-spark-3-base 31 | 32 | 33 | 8 34 | 8 35 | UTF-8 36 | true 37 | 38 | 39 | 40 | 41 | org.apache.doris 42 | spark-doris-connector-base 43 | 44 | 45 | org.apache.spark 46 | spark-sql_${scala.major.version} 47 | provided 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3-base/src/main/scala/org/apache/doris/spark/read/DorisPartitionReaderFactory.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.read 19 | 20 | import org.apache.doris.spark.config.DorisConfig 21 | import org.apache.spark.sql.catalyst.InternalRow 22 | import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory} 23 | import org.apache.spark.sql.types.StructType 24 | 25 | class DorisPartitionReaderFactory(schema: StructType, mode: ScanMode, config: DorisConfig) extends PartitionReaderFactory { 26 | 27 | override def createReader(inputPartition: InputPartition): PartitionReader[InternalRow] = { 28 | new DorisPartitionReader(inputPartition, schema, mode, config) 29 | } 30 | 31 | } -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3-base/src/main/scala/org/apache/doris/spark/read/DorisScan.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.read 19 | 20 | import org.apache.doris.spark.config.{DorisConfig, DorisOptions} 21 | import org.apache.doris.spark.util.DorisDialects 22 | import org.apache.spark.internal.Logging 23 | import org.apache.spark.sql.sources.Filter 24 | import org.apache.spark.sql.types.StructType 25 | 26 | import scala.language.implicitConversions 27 | 28 | class DorisScan(config: DorisConfig, schema: StructType, filters: Array[Filter]) extends AbstractDorisScan(config, schema) with Logging { 29 | override protected def compiledFilters(): Array[String] = { 30 | val inValueLengthLimit = config.getValue(DorisOptions.DORIS_FILTER_QUERY_IN_MAX_COUNT) 31 | filters.map(DorisDialects.compileFilter(_, inValueLengthLimit)).filter(_.isDefined).map(_.get) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3-base/src/main/scala/org/apache/doris/spark/read/DorisScanBuilderBase.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.read 19 | 20 | import org.apache.doris.spark.config.{DorisConfig, DorisOptions} 21 | import org.apache.spark.sql.connector.read.{ScanBuilder, SupportsPushDownRequiredColumns} 22 | import org.apache.spark.sql.types.StructType 23 | 24 | protected[spark] abstract class DorisScanBuilderBase(config: DorisConfig, schema: StructType) extends ScanBuilder 25 | with SupportsPushDownRequiredColumns { 26 | 27 | protected var readSchema: StructType = { 28 | if (config.contains(DorisOptions.DORIS_READ_FIELDS)) { 29 | val dorisReadFields = config.getValue(DorisOptions.DORIS_READ_FIELDS).split(",").map(_.trim.replaceAll("`", "")) 30 | doPruneColumns(schema, dorisReadFields) 31 | } else { 32 | schema 33 | } 34 | } 35 | 36 | override def pruneColumns(requiredSchema: StructType): Unit = { 37 | readSchema = doPruneColumns(readSchema, requiredSchema.fieldNames) 38 | } 39 | 40 | private def doPruneColumns(originSchema: StructType, requiredCols: Array[String]): StructType = { 41 | if (requiredCols.nonEmpty) { 42 | val fields = originSchema.fields.filter( 43 | field => requiredCols.contains(field.name) 44 | ) 45 | if (fields.isEmpty) { 46 | throw new IllegalArgumentException("No required columns found") 47 | } 48 | StructType(fields) 49 | } else originSchema 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3-base/src/main/scala/org/apache/doris/spark/read/ScanMode.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.read; 19 | 20 | public enum ScanMode { 21 | THRIFT,ARROW 22 | } 23 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3-base/src/main/scala/org/apache/doris/spark/sql/sources/DorisSourceProvider.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.sql.sources 19 | 20 | import org.apache.spark.sql.SQLContext 21 | import org.apache.spark.sql.sources.{BaseRelation, RelationProvider} 22 | 23 | trait DorisSourceProvider extends RelationProvider { 24 | 25 | override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = { 26 | new DorisRelation(sqlContext, parameters) 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3-base/src/main/scala/org/apache/doris/spark/write/DorisDataWriterFactory.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.write 19 | 20 | import org.apache.doris.spark.config.DorisConfig 21 | import org.apache.spark.sql.catalyst.InternalRow 22 | import org.apache.spark.sql.connector.write.streaming.StreamingDataWriterFactory 23 | import org.apache.spark.sql.connector.write.{DataWriter, DataWriterFactory} 24 | import org.apache.spark.sql.types.StructType 25 | 26 | class DorisDataWriterFactory(config: DorisConfig, schema: StructType) extends DataWriterFactory with StreamingDataWriterFactory { 27 | 28 | // for batch write 29 | override def createWriter(partitionId: Int, taskId: Long): DataWriter[InternalRow] = { 30 | new DorisDataWriter(config, schema, partitionId, taskId) 31 | } 32 | 33 | // for streaming write 34 | override def createWriter(partitionId: Int, taskId: Long, epochId: Long): DataWriter[InternalRow] = { 35 | new DorisDataWriter(config, schema, partitionId, taskId, epochId) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3-base/src/main/scala/org/apache/doris/spark/write/DorisWriteBuilder.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.write 19 | 20 | import org.apache.doris.spark.client.DorisFrontendClient 21 | import org.apache.doris.spark.config.{DorisConfig, DorisOptions} 22 | import org.apache.spark.sql.connector.write.streaming.StreamingWrite 23 | import org.apache.spark.sql.connector.write.{BatchWrite, SupportsTruncate, WriteBuilder} 24 | import org.apache.spark.sql.types.StructType 25 | 26 | class DorisWriteBuilder(config: DorisConfig, schema: StructType) extends WriteBuilder with SupportsTruncate { 27 | 28 | private var isTruncate = false 29 | 30 | override def buildForBatch(): BatchWrite = { 31 | if (isTruncate) { 32 | val client = new DorisFrontendClient(config) 33 | val tableDb = config.getValue(DorisOptions.DORIS_TABLE_IDENTIFIER).split("\\.") 34 | client.truncateTable(tableDb(0), tableDb(1)) 35 | } 36 | new DorisWrite(config, schema) 37 | } 38 | 39 | override def buildForStreaming(): StreamingWrite = { 40 | new DorisWrite(config, schema) 41 | } 42 | 43 | override def truncate(): WriteBuilder = { 44 | isTruncate = true 45 | this 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.1/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 20 | 23 | 4.0.0 24 | 25 | org.apache.doris 26 | spark-doris-connector 27 | ${revision} 28 | 29 | 30 | spark-doris-connector-spark-3.1 31 | 32 | 33 | 8 34 | 8 35 | UTF-8 36 | 3.1.0 37 | 3.1 38 | 2.12.18 39 | 2.12 40 | 41 | 42 | 43 | 44 | org.apache.doris 45 | spark-doris-connector-spark-3-base 46 | 47 | 48 | org.apache.spark 49 | spark-sql_${scala.major.version} 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.1/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | org.apache.doris.spark.sql.sources.DorisDataSource 19 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.1/src/main/scala/org/apache/doris/spark/catalog/DorisTable.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.catalog 19 | 20 | import org.apache.doris.spark.config.DorisConfig 21 | import org.apache.doris.spark.read.DorisScanBuilder 22 | import org.apache.doris.spark.write.DorisWriteBuilder 23 | import org.apache.spark.sql.connector.catalog.Identifier 24 | import org.apache.spark.sql.connector.read.ScanBuilder 25 | import org.apache.spark.sql.connector.write.WriteBuilder 26 | import org.apache.spark.sql.types.StructType 27 | 28 | class DorisTable(identifier: Identifier, config: DorisConfig, schema: Option[StructType]) 29 | extends DorisTableBase(identifier, config, schema) { 30 | 31 | override def createScanBuilder(config: DorisConfig, schema: StructType): ScanBuilder = new DorisScanBuilder(config, schema) 32 | override protected def createWriteBuilder(config: DorisConfig, schema: StructType): WriteBuilder = new DorisWriteBuilder(config, schema) 33 | } 34 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.1/src/main/scala/org/apache/doris/spark/catalog/DorisTableCatalog.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.catalog 19 | 20 | import org.apache.doris.spark.config.DorisConfig 21 | import org.apache.spark.sql.connector.catalog.{Identifier, Table} 22 | import org.apache.spark.sql.types.StructType 23 | 24 | class DorisTableCatalog extends DorisTableCatalogBase { 25 | override def dropNamespace(strings: Array[String]): Boolean = throw new UnsupportedOperationException() 26 | override def newTableInstance(identifier: Identifier, config: DorisConfig, schema: Option[StructType]): Table = 27 | new DorisTable(identifier, config, schema) 28 | } 29 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.1/src/main/scala/org/apache/doris/spark/read/DorisScanBuilder.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.read 19 | 20 | import org.apache.doris.spark.config.{DorisConfig, DorisOptions} 21 | import org.apache.doris.spark.util.DorisDialects 22 | import org.apache.spark.sql.connector.read.{Scan, SupportsPushDownFilters} 23 | import org.apache.spark.sql.sources.Filter 24 | import org.apache.spark.sql.types.StructType 25 | 26 | class DorisScanBuilder(config: DorisConfig, schema: StructType) extends DorisScanBuilderBase(config, schema) with SupportsPushDownFilters { 27 | 28 | private var pushDownPredicates: Array[Filter] = Array[Filter]() 29 | 30 | private val inValueLengthLimit = config.getValue(DorisOptions.DORIS_FILTER_QUERY_IN_MAX_COUNT) 31 | 32 | override def build(): Scan = new DorisScan(config, readSchema, pushDownPredicates) 33 | 34 | override def pushFilters(filters: Array[Filter]): Array[Filter] = { 35 | val (pushed, unsupported) = filters.partition(DorisDialects.compileFilter(_, inValueLengthLimit).isDefined) 36 | this.pushDownPredicates = pushed 37 | unsupported 38 | } 39 | 40 | override def pushedFilters(): Array[Filter] = pushDownPredicates 41 | 42 | } 43 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.1/src/main/scala/org/apache/doris/spark/sql/sources/DorisDataSource.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.sql.sources 19 | 20 | import org.apache.doris.spark.catalog.{DorisTable, DorisTableProviderBase} 21 | import org.apache.doris.spark.config.DorisConfig 22 | import org.apache.spark.sql.connector.catalog.{Identifier, Table} 23 | import org.apache.spark.sql.types.StructType 24 | 25 | class DorisDataSource extends DorisTableProviderBase with DorisSourceRegisterTrait with DorisSourceProvider with Serializable { 26 | 27 | override def newTableInstance(identifier: Identifier, config: DorisConfig, schema: Option[StructType]): Table = 28 | new DorisTable(identifier, config, schema) 29 | } 30 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.2/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 20 | 23 | 4.0.0 24 | 25 | org.apache.doris 26 | spark-doris-connector 27 | ${revision} 28 | 29 | 30 | spark-doris-connector-spark-3.2 31 | 32 | 33 | 8 34 | 8 35 | UTF-8 36 | 3.2.0 37 | 3.2 38 | 2.12.18 39 | 2.12 40 | 41 | 42 | 43 | 44 | org.apache.doris 45 | spark-doris-connector-spark-3-base 46 | 47 | 48 | org.apache.spark 49 | spark-sql_${scala.major.version} 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.2/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | org.apache.doris.spark.sql.sources.DorisDataSource 19 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.2/src/main/scala/org/apache/doris/spark/catalog/DorisTable.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.catalog 19 | 20 | import org.apache.doris.spark.config.DorisConfig 21 | import org.apache.doris.spark.read.DorisScanBuilder 22 | import org.apache.doris.spark.write.DorisWriteBuilder 23 | import org.apache.spark.sql.connector.catalog.Identifier 24 | import org.apache.spark.sql.connector.read.ScanBuilder 25 | import org.apache.spark.sql.connector.write.WriteBuilder 26 | import org.apache.spark.sql.types.StructType 27 | 28 | class DorisTable(identifier: Identifier, config: DorisConfig, schema: Option[StructType]) 29 | extends DorisTableBase(identifier, config, schema) { 30 | 31 | override def createScanBuilder(config: DorisConfig, schema: StructType): ScanBuilder = new DorisScanBuilder(config, schema) 32 | override protected def createWriteBuilder(config: DorisConfig, schema: StructType): WriteBuilder = new DorisWriteBuilder(config, schema) 33 | } 34 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.2/src/main/scala/org/apache/doris/spark/catalog/DorisTableCatalog.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.catalog 19 | 20 | import org.apache.doris.spark.config.DorisConfig 21 | import org.apache.spark.sql.connector.catalog.{Identifier, Table} 22 | import org.apache.spark.sql.types.StructType 23 | 24 | class DorisTableCatalog extends DorisTableCatalogBase { 25 | override def dropNamespace(strings: Array[String]): Boolean = throw new UnsupportedOperationException() 26 | override def newTableInstance(identifier: Identifier, config: DorisConfig, schema: Option[StructType]): Table = 27 | new DorisTable(identifier, config, schema) 28 | } 29 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.2/src/main/scala/org/apache/doris/spark/read/DorisScanBuilder.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.read 19 | 20 | import org.apache.doris.spark.config.{DorisConfig, DorisOptions} 21 | import org.apache.doris.spark.util.DorisDialects 22 | import org.apache.spark.sql.connector.read.{Scan, SupportsPushDownFilters} 23 | import org.apache.spark.sql.sources.Filter 24 | import org.apache.spark.sql.types.StructType 25 | 26 | class DorisScanBuilder(config: DorisConfig, schema: StructType) extends DorisScanBuilderBase(config, schema) 27 | with SupportsPushDownFilters { 28 | 29 | private var pushDownPredicates: Array[Filter] = Array[Filter]() 30 | 31 | private val inValueLengthLimit = config.getValue(DorisOptions.DORIS_FILTER_QUERY_IN_MAX_COUNT) 32 | 33 | override def build(): Scan = new DorisScan(config, readSchema, pushDownPredicates) 34 | 35 | override def pushFilters(filters: Array[Filter]): Array[Filter] = { 36 | val (pushed, unsupported) = filters.partition(DorisDialects.compileFilter(_, inValueLengthLimit).isDefined) 37 | this.pushDownPredicates = pushed 38 | unsupported 39 | } 40 | 41 | override def pushedFilters(): Array[Filter] = pushDownPredicates 42 | 43 | } 44 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.2/src/main/scala/org/apache/doris/spark/sql/sources/DorisDataSource.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.sql.sources 19 | 20 | import org.apache.doris.spark.catalog.{DorisTable, DorisTableProviderBase} 21 | import org.apache.doris.spark.config.DorisConfig 22 | import org.apache.spark.sql.connector.catalog.{Identifier, Table} 23 | import org.apache.spark.sql.types.StructType 24 | 25 | class DorisDataSource extends DorisTableProviderBase with DorisSourceRegisterTrait with DorisSourceProvider with Serializable { 26 | 27 | override def newTableInstance(identifier: Identifier, config: DorisConfig, schema: Option[StructType]): Table = 28 | new DorisTable(identifier, config, schema) 29 | } 30 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.3/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 20 | 23 | 4.0.0 24 | 25 | org.apache.doris 26 | spark-doris-connector 27 | ${revision} 28 | 29 | 30 | spark-doris-connector-spark-3.3 31 | 32 | 33 | 8 34 | 8 35 | UTF-8 36 | 3.3.0 37 | 3.3 38 | 2.12.18 39 | 2.12 40 | 41 | 42 | 43 | 44 | org.apache.doris 45 | spark-doris-connector-spark-3-base 46 | 47 | 48 | org.apache.spark 49 | spark-sql_${scala.major.version} 50 | 51 | 52 | org.junit.jupiter 53 | junit-jupiter-api 54 | test 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.3/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | org.apache.doris.spark.sql.sources.DorisDataSource 19 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.3/src/main/scala/org/apache/doris/spark/catalog/DorisTable.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.catalog 19 | 20 | import org.apache.doris.spark.config.DorisConfig 21 | import org.apache.doris.spark.read.DorisScanBuilder 22 | import org.apache.doris.spark.write.DorisWriteBuilder 23 | import org.apache.spark.sql.connector.catalog.Identifier 24 | import org.apache.spark.sql.connector.read.ScanBuilder 25 | import org.apache.spark.sql.connector.write.WriteBuilder 26 | import org.apache.spark.sql.types.StructType 27 | 28 | class DorisTable(identifier: Identifier, config: DorisConfig, schema: Option[StructType]) 29 | extends DorisTableBase(identifier, config, schema) { 30 | 31 | override def createScanBuilder(config: DorisConfig, schema: StructType): ScanBuilder = new DorisScanBuilder(config, schema) 32 | override protected def createWriteBuilder(config: DorisConfig, schema: StructType): WriteBuilder = new DorisWriteBuilder(config, schema) 33 | 34 | } 35 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.3/src/main/scala/org/apache/doris/spark/catalog/DorisTableCatalog.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.catalog 19 | 20 | import org.apache.doris.spark.config.DorisConfig 21 | import org.apache.spark.sql.connector.catalog.{Identifier, Table} 22 | import org.apache.spark.sql.types.StructType 23 | 24 | class DorisTableCatalog extends DorisTableCatalogBase { 25 | override def dropNamespace(strings: Array[String], b: Boolean): Boolean = throw new UnsupportedOperationException() 26 | override def newTableInstance(identifier: Identifier, config: DorisConfig, schema: Option[StructType]): Table = 27 | new DorisTable(identifier, config, schema) 28 | } 29 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.3/src/main/scala/org/apache/doris/spark/read/DorisScanBuilder.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.read 19 | 20 | import org.apache.doris.spark.config.{DorisConfig, DorisOptions} 21 | import org.apache.doris.spark.read.expression.V2ExpressionBuilder 22 | import org.apache.spark.sql.connector.expressions.filter.Predicate 23 | import org.apache.spark.sql.connector.read.{Scan, SupportsPushDownLimit, SupportsPushDownV2Filters} 24 | import org.apache.spark.sql.types.StructType 25 | 26 | class DorisScanBuilder(config: DorisConfig, schema: StructType) extends DorisScanBuilderBase(config, schema) 27 | with SupportsPushDownV2Filters 28 | with SupportsPushDownLimit { 29 | 30 | private var pushDownPredicates: Array[Predicate] = Array[Predicate]() 31 | 32 | private val expressionBuilder = new V2ExpressionBuilder(config.getValue(DorisOptions.DORIS_FILTER_QUERY_IN_MAX_COUNT)) 33 | 34 | private var limitSize: Int = -1 35 | 36 | override def build(): Scan = new DorisScanV2(config, schema, pushDownPredicates, limitSize) 37 | 38 | override def pushPredicates(predicates: Array[Predicate]): Array[Predicate] = { 39 | val (pushed, unsupported) = predicates.partition(predicate => { 40 | Option(expressionBuilder.build(predicate)).isDefined 41 | }) 42 | this.pushDownPredicates = pushed 43 | unsupported 44 | } 45 | 46 | override def pushedPredicates(): Array[Predicate] = pushDownPredicates 47 | 48 | override def pushLimit(i: Int): Boolean = { 49 | limitSize = i 50 | true 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.3/src/main/scala/org/apache/doris/spark/read/DorisScanV2.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.read 19 | 20 | import org.apache.doris.spark.config.{DorisConfig, DorisOptions} 21 | import org.apache.doris.spark.read.expression.V2ExpressionBuilder 22 | import org.apache.spark.internal.Logging 23 | import org.apache.spark.sql.connector.expressions.filter.Predicate 24 | import org.apache.spark.sql.types.StructType 25 | 26 | class DorisScanV2(config: DorisConfig, schema: StructType, filters: Array[Predicate], limit: Int) extends AbstractDorisScan(config, schema) with Logging { 27 | override protected def compiledFilters(): Array[String] = { 28 | val inValueLengthLimit = config.getValue(DorisOptions.DORIS_FILTER_QUERY_IN_MAX_COUNT) 29 | val v2ExpressionBuilder = new V2ExpressionBuilder(inValueLengthLimit) 30 | filters.map(e => Option[String](v2ExpressionBuilder.build(e))).filter(_.isDefined).map(_.get) 31 | } 32 | 33 | override protected def getLimit: Int = limit 34 | } 35 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.3/src/main/scala/org/apache/doris/spark/sql/sources/DorisDataSource.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.sql.sources 19 | 20 | import org.apache.doris.spark.catalog.{DorisTable, DorisTableProviderBase} 21 | import org.apache.doris.spark.config.DorisConfig 22 | import org.apache.spark.sql.connector.catalog.{Identifier, Table} 23 | import org.apache.spark.sql.types.StructType 24 | 25 | class DorisDataSource extends DorisTableProviderBase with DorisSourceRegisterTrait with DorisSourceProvider with Serializable { 26 | 27 | override def newTableInstance(identifier: Identifier, config: DorisConfig, schema: Option[StructType]): Table = 28 | new DorisTable(identifier, config, schema) 29 | 30 | } 31 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.4/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 20 | 23 | 4.0.0 24 | 25 | org.apache.doris 26 | spark-doris-connector 27 | ${revision} 28 | 29 | 30 | spark-doris-connector-spark-3.4 31 | 32 | 33 | 8 34 | 8 35 | UTF-8 36 | 3.4.0 37 | 3.4 38 | 2.12.18 39 | 2.12 40 | 41 | 42 | 43 | 44 | org.apache.doris 45 | spark-doris-connector-spark-3-base 46 | 47 | 48 | org.apache.spark 49 | spark-sql_${scala.major.version} 50 | 51 | 52 | org.junit.jupiter 53 | junit-jupiter-api 54 | test 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.4/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | org.apache.doris.spark.sql.sources.DorisDataSource 19 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.4/src/main/scala/org/apache/doris/spark/catalog/DorisTable.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.catalog 19 | 20 | import org.apache.doris.spark.config.DorisConfig 21 | import org.apache.doris.spark.read.DorisScanBuilder 22 | import org.apache.doris.spark.write.DorisWriteBuilder 23 | import org.apache.spark.sql.connector.catalog.Identifier 24 | import org.apache.spark.sql.connector.read.ScanBuilder 25 | import org.apache.spark.sql.connector.write.WriteBuilder 26 | import org.apache.spark.sql.types.StructType 27 | 28 | class DorisTable(identifier: Identifier, config: DorisConfig, schema: Option[StructType]) 29 | extends DorisTableBase(identifier, config, schema) { 30 | 31 | override def createScanBuilder(config: DorisConfig, schema: StructType): ScanBuilder = new DorisScanBuilder(config, schema) 32 | override protected def createWriteBuilder(config: DorisConfig, schema: StructType): WriteBuilder = new DorisWriteBuilder(config, schema) 33 | 34 | } 35 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.4/src/main/scala/org/apache/doris/spark/catalog/DorisTableCatalog.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.catalog 19 | 20 | import org.apache.doris.spark.config.DorisConfig 21 | import org.apache.spark.sql.connector.catalog.{Identifier, Table} 22 | import org.apache.spark.sql.types.StructType 23 | 24 | class DorisTableCatalog extends DorisTableCatalogBase { 25 | override def dropNamespace(strings: Array[String], b: Boolean): Boolean = throw new UnsupportedOperationException() 26 | override def newTableInstance(identifier: Identifier, config: DorisConfig, schema: Option[StructType]): Table = 27 | new DorisTable(identifier, config, schema) 28 | } 29 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.4/src/main/scala/org/apache/doris/spark/read/DorisScanBuilder.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.read 19 | 20 | import org.apache.doris.spark.config.{DorisConfig, DorisOptions} 21 | import org.apache.doris.spark.read.expression.V2ExpressionBuilder 22 | import org.apache.spark.sql.connector.expressions.filter.Predicate 23 | import org.apache.spark.sql.connector.read.{Scan, SupportsPushDownLimit, SupportsPushDownV2Filters} 24 | import org.apache.spark.sql.types.StructType 25 | 26 | class DorisScanBuilder(config: DorisConfig, schema: StructType) extends DorisScanBuilderBase(config, schema) 27 | with SupportsPushDownV2Filters 28 | with SupportsPushDownLimit { 29 | 30 | private var pushDownPredicates: Array[Predicate] = Array[Predicate]() 31 | 32 | private val expressionBuilder = new V2ExpressionBuilder(config.getValue(DorisOptions.DORIS_FILTER_QUERY_IN_MAX_COUNT)) 33 | 34 | private var limitSize: Int = -1 35 | 36 | override def build(): Scan = new DorisScanV2(config, schema, pushDownPredicates, limitSize) 37 | 38 | override def pushPredicates(predicates: Array[Predicate]): Array[Predicate] = { 39 | val (pushed, unsupported) = predicates.partition(predicate => { 40 | Option(expressionBuilder.build(predicate)).isDefined 41 | }) 42 | this.pushDownPredicates = pushed 43 | unsupported 44 | } 45 | 46 | override def pushedPredicates(): Array[Predicate] = pushDownPredicates 47 | 48 | override def pushLimit(i: Int): Boolean = { 49 | limitSize = i 50 | true 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.4/src/main/scala/org/apache/doris/spark/read/DorisScanV2.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.read 19 | 20 | import org.apache.doris.spark.config.{DorisConfig, DorisOptions} 21 | import org.apache.doris.spark.read.expression.V2ExpressionBuilder 22 | import org.apache.spark.internal.Logging 23 | import org.apache.spark.sql.connector.expressions.filter.Predicate 24 | import org.apache.spark.sql.types.StructType 25 | 26 | class DorisScanV2(config: DorisConfig, schema: StructType, filters: Array[Predicate], limit: Int) extends AbstractDorisScan(config, schema) with Logging { 27 | override protected def compiledFilters(): Array[String] = { 28 | val inValueLengthLimit = config.getValue(DorisOptions.DORIS_FILTER_QUERY_IN_MAX_COUNT) 29 | val v2ExpressionBuilder = new V2ExpressionBuilder(inValueLengthLimit) 30 | filters.map(e => Option[String](v2ExpressionBuilder.build(e))).filter(_.isDefined).map(_.get) 31 | } 32 | 33 | override protected def getLimit: Int = limit 34 | } 35 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.4/src/main/scala/org/apache/doris/spark/sql/sources/DorisDataSource.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.sql.sources 19 | 20 | import org.apache.doris.spark.catalog.{DorisTable, DorisTableProviderBase} 21 | import org.apache.doris.spark.config.DorisConfig 22 | import org.apache.spark.sql.connector.catalog.{Identifier, Table} 23 | import org.apache.spark.sql.types.StructType 24 | 25 | class DorisDataSource extends DorisTableProviderBase with DorisSourceRegisterTrait with DorisSourceProvider with Serializable { 26 | 27 | override def newTableInstance(identifier: Identifier, config: DorisConfig, schema: Option[StructType]): Table = 28 | new DorisTable(identifier, config, schema) 29 | 30 | } 31 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.5/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 20 | 23 | 4.0.0 24 | 25 | org.apache.doris 26 | spark-doris-connector 27 | ${revision} 28 | 29 | 30 | spark-doris-connector-spark-3.5 31 | 32 | 33 | 8 34 | 8 35 | UTF-8 36 | 3.5.0 37 | 3.5 38 | 2.12.18 39 | 2.12 40 | 41 | 42 | 43 | 44 | org.apache.doris 45 | spark-doris-connector-spark-3-base 46 | 47 | 48 | org.apache.spark 49 | spark-sql_${scala.major.version} 50 | 51 | 52 | org.junit.jupiter 53 | junit-jupiter-api 54 | test 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.5/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | org.apache.doris.spark.sql.sources.DorisDataSource 19 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.5/src/main/scala/org/apache/doris/spark/catalog/DorisTable.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.catalog 19 | 20 | import org.apache.doris.spark.config.DorisConfig 21 | import org.apache.doris.spark.read.DorisScanBuilder 22 | import org.apache.doris.spark.write.DorisWriteBuilder 23 | import org.apache.spark.sql.connector.catalog.Identifier 24 | import org.apache.spark.sql.connector.read.ScanBuilder 25 | import org.apache.spark.sql.connector.write.WriteBuilder 26 | import org.apache.spark.sql.types.StructType 27 | 28 | class DorisTable(identifier: Identifier, config: DorisConfig, schema: Option[StructType]) 29 | extends DorisTableBase(identifier, config, schema) { 30 | 31 | override def createScanBuilder(config: DorisConfig, schema: StructType): ScanBuilder = new DorisScanBuilder(config, schema) 32 | override protected def createWriteBuilder(config: DorisConfig, schema: StructType): WriteBuilder = new DorisWriteBuilder(config, schema) 33 | 34 | } 35 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.5/src/main/scala/org/apache/doris/spark/catalog/DorisTableCatalog.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.catalog 19 | 20 | import org.apache.doris.spark.config.DorisConfig 21 | import org.apache.spark.sql.connector.catalog.{Identifier, Table} 22 | import org.apache.spark.sql.types.StructType 23 | 24 | class DorisTableCatalog extends DorisTableCatalogBase { 25 | override def dropNamespace(strings: Array[String], b: Boolean): Boolean = throw new UnsupportedOperationException() 26 | override def newTableInstance(identifier: Identifier, config: DorisConfig, schema: Option[StructType]): Table = 27 | new DorisTable(identifier, config, schema) 28 | } 29 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.5/src/main/scala/org/apache/doris/spark/read/DorisScanBuilder.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.read 19 | 20 | import org.apache.doris.spark.config.{DorisConfig, DorisOptions} 21 | import org.apache.doris.spark.read.expression.V2ExpressionBuilder 22 | import org.apache.spark.sql.connector.expressions.filter.Predicate 23 | import org.apache.spark.sql.connector.read.{Scan, SupportsPushDownLimit, SupportsPushDownV2Filters} 24 | import org.apache.spark.sql.types.StructType 25 | 26 | class DorisScanBuilder(config: DorisConfig, schema: StructType) extends DorisScanBuilderBase(config, schema) 27 | with SupportsPushDownV2Filters 28 | with SupportsPushDownLimit { 29 | 30 | private var pushDownPredicates: Array[Predicate] = Array[Predicate]() 31 | 32 | private val expressionBuilder = new V2ExpressionBuilder(config.getValue(DorisOptions.DORIS_FILTER_QUERY_IN_MAX_COUNT)) 33 | 34 | private var limitSize: Int = -1 35 | 36 | override def build(): Scan = new DorisScanV2(config, schema, pushDownPredicates, limitSize) 37 | 38 | override def pushPredicates(predicates: Array[Predicate]): Array[Predicate] = { 39 | val (pushed, unsupported) = predicates.partition(predicate => { 40 | Option(expressionBuilder.build(predicate)).isDefined 41 | }) 42 | this.pushDownPredicates = pushed 43 | unsupported 44 | } 45 | 46 | override def pushedPredicates(): Array[Predicate] = pushDownPredicates 47 | 48 | override def pushLimit(i: Int): Boolean = { 49 | limitSize = i 50 | true 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.5/src/main/scala/org/apache/doris/spark/read/DorisScanV2.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.read 19 | 20 | import org.apache.doris.spark.config.{DorisConfig, DorisOptions} 21 | import org.apache.doris.spark.read.expression.V2ExpressionBuilder 22 | import org.apache.spark.internal.Logging 23 | import org.apache.spark.sql.connector.expressions.filter.Predicate 24 | import org.apache.spark.sql.types.StructType 25 | 26 | class DorisScanV2(config: DorisConfig, schema: StructType, filters: Array[Predicate], limit: Int) extends AbstractDorisScan(config, schema) with Logging { 27 | override protected def compiledFilters(): Array[String] = { 28 | val inValueLengthLimit = config.getValue(DorisOptions.DORIS_FILTER_QUERY_IN_MAX_COUNT) 29 | val v2ExpressionBuilder = new V2ExpressionBuilder(inValueLengthLimit) 30 | filters.map(e => Option[String](v2ExpressionBuilder.build(e))).filter(_.isDefined).map(_.get) 31 | } 32 | 33 | override protected def getLimit: Int = limit 34 | } 35 | -------------------------------------------------------------------------------- /spark-doris-connector/spark-doris-connector-spark-3.5/src/main/scala/org/apache/doris/spark/sql/sources/DorisDataSource.scala: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.spark.sql.sources 19 | 20 | import org.apache.doris.spark.catalog.{DorisTable, DorisTableProviderBase} 21 | import org.apache.doris.spark.config.DorisConfig 22 | import org.apache.spark.sql.connector.catalog.{Identifier, Table} 23 | import org.apache.spark.sql.types.StructType 24 | 25 | class DorisDataSource extends DorisTableProviderBase with DorisSourceRegisterTrait with DorisSourceProvider with Serializable { 26 | 27 | override def newTableInstance(identifier: Identifier, config: DorisConfig, schema: Option[StructType]): Table = 28 | new DorisTable(identifier, config, schema) 29 | 30 | } 31 | -------------------------------------------------------------------------------- /spark-load/spark-load-common/src/main/java/org/apache/doris/common/io/Codec.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.common.io; 19 | 20 | import java.io.DataInput; 21 | import java.io.DataOutput; 22 | import java.io.IOException; 23 | 24 | /** 25 | * Copied from Apache Doris 26 | */ 27 | public class Codec { 28 | 29 | // not support encode negative value now 30 | public static void encodeVarint64(long source, DataOutput out) throws IOException { 31 | assert source >= 0; 32 | short B = 128; // CHECKSTYLE IGNORE THIS LINE 33 | 34 | while (source >= B) { 35 | out.write((int) (source & (B - 1) | B)); 36 | source = source >> 7; 37 | } 38 | out.write((int) (source & (B - 1))); 39 | } 40 | 41 | // not support decode negative value now 42 | public static long decodeVarint64(DataInput in) throws IOException { 43 | long result = 0; 44 | int shift = 0; 45 | short B = 128; // CHECKSTYLE IGNORE THIS LINE 46 | 47 | while (true) { 48 | int oneByte = in.readUnsignedByte(); 49 | boolean isEnd = (oneByte & B) == 0; 50 | result = result | ((long) (oneByte & B - 1) << (shift * 7)); 51 | if (isEnd) { 52 | break; 53 | } 54 | shift++; 55 | } 56 | 57 | return result; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/AutoType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2006 JMockit developers 3 | * This file is subject to the terms of the MIT license (see LICENSE.txt). 4 | */ 5 | 6 | package org.apache.doris.common.jmockit; 7 | 8 | import java.util.HashMap; 9 | import java.util.Map; 10 | 11 | /** 12 | * Helper class to convert type between Java's wrapper type and primitive type 13 | * There are 8 wrapper/primitive types in Java: 14 | * |Wrapped Type |Primitive Type 15 | * -------------------------------------- 16 | * |Boolean |boolean 17 | * |Character |char 18 | * |Byte |byte 19 | * |Short |short 20 | * |Integer |int 21 | * |Float |float 22 | * |Long |longFieldReflection 23 | * |Double |double 24 | *

25 | * Copied from Apache Doris 26 | */ 27 | public class AutoType { 28 | private static final Map, Class> PRIMITIVE_TO_WRAPPER = new HashMap(); 29 | private static final Map, Class> WRAPPER_TO_PRIMITIVE = new HashMap(); 30 | 31 | static { 32 | WRAPPER_TO_PRIMITIVE.put(Boolean.class, Boolean.TYPE); 33 | WRAPPER_TO_PRIMITIVE.put(Character.class, Character.TYPE); 34 | WRAPPER_TO_PRIMITIVE.put(Byte.class, Byte.TYPE); 35 | WRAPPER_TO_PRIMITIVE.put(Short.class, Short.TYPE); 36 | WRAPPER_TO_PRIMITIVE.put(Integer.class, Integer.TYPE); 37 | WRAPPER_TO_PRIMITIVE.put(Float.class, Float.TYPE); 38 | WRAPPER_TO_PRIMITIVE.put(Long.class, Long.TYPE); 39 | WRAPPER_TO_PRIMITIVE.put(Double.class, Double.TYPE); 40 | 41 | PRIMITIVE_TO_WRAPPER.put(Boolean.TYPE, Boolean.class); 42 | PRIMITIVE_TO_WRAPPER.put(Character.TYPE, Character.class); 43 | PRIMITIVE_TO_WRAPPER.put(Byte.TYPE, Byte.class); 44 | PRIMITIVE_TO_WRAPPER.put(Short.TYPE, Short.class); 45 | PRIMITIVE_TO_WRAPPER.put(Integer.TYPE, Integer.class); 46 | PRIMITIVE_TO_WRAPPER.put(Float.TYPE, Float.class); 47 | PRIMITIVE_TO_WRAPPER.put(Long.TYPE, Long.class); 48 | PRIMITIVE_TO_WRAPPER.put(Double.TYPE, Double.class); 49 | } 50 | 51 | public static boolean isWrapperOfPrimitiveType(Class type) { 52 | return WRAPPER_TO_PRIMITIVE.containsKey(type); 53 | } 54 | 55 | public static Class getPrimitiveType(Class wrapperType) { 56 | return WRAPPER_TO_PRIMITIVE.get(wrapperType); 57 | } 58 | 59 | public static Class getWrapperType(Class primitiveType) { 60 | return PRIMITIVE_TO_WRAPPER.get(primitiveType); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/GeneratedClasses.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2006 JMockit developers 3 | * This file is subject to the terms of the MIT license (see LICENSE.txt). 4 | */ 5 | 6 | package org.apache.doris.common.jmockit; 7 | 8 | import java.lang.reflect.Proxy; 9 | 10 | /** 11 | * Modify from mockit.internal.util.GeneratedClasses JMockit v1.13 12 | * Helper class to return type of mocked-object 13 | *

14 | * Copied from Apache Doris 15 | */ 16 | public final class GeneratedClasses { 17 | private static final String IMPLCLASS_PREFIX = "$Impl_"; 18 | private static final String SUBCLASS_PREFIX = "$Subclass_"; 19 | 20 | private GeneratedClasses() { 21 | } 22 | 23 | static boolean isGeneratedImplementationClass(Class mockedType) { 24 | return isGeneratedImplementationClass(mockedType.getName()); 25 | } 26 | 27 | static boolean isGeneratedImplementationClass(String className) { 28 | return className.contains(IMPLCLASS_PREFIX); 29 | } 30 | 31 | static boolean isGeneratedSubclass(String className) { 32 | return className.contains(SUBCLASS_PREFIX); 33 | } 34 | 35 | static boolean isGeneratedClass(String className) { 36 | return isGeneratedSubclass(className) || isGeneratedImplementationClass(className); 37 | } 38 | 39 | static Class getMockedClassOrInterfaceType(Class aClass) { 40 | if (!Proxy.isProxyClass(aClass) && !isGeneratedImplementationClass(aClass)) { 41 | return isGeneratedSubclass(aClass.getName()) ? aClass.getSuperclass() : aClass; 42 | } else { 43 | return aClass.getInterfaces()[0]; 44 | } 45 | } 46 | 47 | static Class getMockedClass(Object mock) { 48 | return getMockedClassOrInterfaceType(mock.getClass()); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/ThrowOfCheckedException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2006 JMockit developers 3 | * This file is subject to the terms of the MIT license (see LICENSE.txt). 4 | */ 5 | 6 | package org.apache.doris.common.jmockit; 7 | 8 | /** 9 | * Modify from mockit.internal.reflection.ThrowOfCheckedException JMockit v1.13 10 | */ 11 | public final class ThrowOfCheckedException { 12 | private static Exception exceptionToThrow; 13 | 14 | ThrowOfCheckedException() throws Exception { 15 | throw exceptionToThrow; 16 | } 17 | 18 | public static synchronized void doThrow(Exception checkedException) { 19 | exceptionToThrow = checkedException; 20 | ConstructorReflection.newInstanceUsingDefaultConstructor(ThrowOfCheckedException.class); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/main/java/org/apache/doris/common/CommandLineOptions.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.common; 19 | 20 | import lombok.Getter; 21 | 22 | @Getter 23 | public class CommandLineOptions { 24 | 25 | private final String configPath; 26 | 27 | private final Boolean recovery; 28 | 29 | public CommandLineOptions(String configPath, Boolean recovery) { 30 | this.configPath = configPath; 31 | this.recovery = recovery; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/main/java/org/apache/doris/common/Constants.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.common; 19 | 20 | public interface Constants { 21 | 22 | String HIVE_METASTORE_URIS = "hive.metastore.uris"; 23 | String SPARK_STANDALONE_SCHEME = "spark"; 24 | String HADOOP_AUTH_KERBEROS = "kerberos"; 25 | String HADOOP_SECURITY_AUTHENTICATION = "hadoop.security.authentication"; 26 | String HADOOP_KERBEROS_PRINCIPAL = "hadoop.kerberos.principal"; 27 | String HADOOP_KERBEROS_KEYTAB = "hadoop.kerberos.keytab"; 28 | 29 | String DEFAULT_CATALOG = "internal"; 30 | 31 | String S3_ENDPOINT = "s3.endpoint"; 32 | String S3_REGION = "s3.region"; 33 | String S3_ACCESS_KEY = "s3.access_key"; 34 | String S3_SECRET_KEY = "s3.secret_key"; 35 | String S3_TOKEN = "s3.session_token"; 36 | 37 | } 38 | -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/main/java/org/apache/doris/common/LoadInfo.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.common; 19 | 20 | import lombok.Data; 21 | 22 | import java.util.List; 23 | 24 | @Data 25 | public class LoadInfo { 26 | 27 | private String dbName; 28 | private List tblNames; 29 | private String label; 30 | private String clusterName; 31 | private String state; 32 | private String failMsg; 33 | private String trackingUrl; 34 | 35 | } 36 | -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/main/java/org/apache/doris/common/ResponseEntity.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.common; 19 | 20 | import com.fasterxml.jackson.databind.JsonNode; 21 | import lombok.Data; 22 | 23 | @Data 24 | public class ResponseEntity { 25 | 26 | private Integer code; 27 | private String msg; 28 | private JsonNode data; 29 | private Integer count; 30 | 31 | } 32 | -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/main/java/org/apache/doris/common/enums/JobStatus.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.common.enums; 19 | 20 | public enum JobStatus { 21 | 22 | RUNNING, 23 | FAILED, 24 | SUCCESS 25 | 26 | } 27 | -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/main/java/org/apache/doris/common/enums/LoadMode.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.common.enums; 19 | 20 | public enum LoadMode { 21 | PUSH, PULL; 22 | } 23 | -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/main/java/org/apache/doris/common/enums/StorageType.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.common.enums; 19 | 20 | public enum StorageType { 21 | HDFS,S3; 22 | } 23 | -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/main/java/org/apache/doris/common/enums/TaskType.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.common.enums; 19 | 20 | public enum TaskType { 21 | 22 | HIVE, 23 | FILE 24 | 25 | } 26 | -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/main/java/org/apache/doris/common/meta/LoadInfoResponse.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.common.meta; 19 | 20 | import org.apache.doris.common.LoadInfo; 21 | 22 | import lombok.Data; 23 | 24 | @Data 25 | public class LoadInfoResponse { 26 | 27 | private String status; 28 | private String msg; 29 | private LoadInfo jobInfo; 30 | 31 | } 32 | -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/main/java/org/apache/doris/exception/SparkLoadException.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.exception; 19 | 20 | public class SparkLoadException extends Exception { 21 | 22 | public SparkLoadException(String message) { 23 | super(message); 24 | } 25 | 26 | public SparkLoadException(String message, Throwable cause) { 27 | super(message, cause); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/main/java/org/apache/doris/load/LoaderFactory.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.load; 19 | 20 | import org.apache.doris.config.JobConfig; 21 | import org.apache.doris.load.job.Loader; 22 | import org.apache.doris.load.job.PullLoader; 23 | 24 | public class LoaderFactory { 25 | 26 | public static Loader createLoader(JobConfig jobConfig, Boolean isRecoveryMode) { 27 | switch (jobConfig.getLoadMode()) { 28 | case PULL: 29 | return new PullLoader(jobConfig, isRecoveryMode); 30 | case PUSH: 31 | default: 32 | throw new UnsupportedOperationException(); 33 | } 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/main/java/org/apache/doris/load/job/Recoverable.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.load.job; 19 | 20 | import org.apache.doris.exception.SparkLoadException; 21 | 22 | public interface Recoverable { 23 | 24 | boolean canBeRecovered() throws SparkLoadException; 25 | 26 | void prepareRecover() throws SparkLoadException; 27 | 28 | } 29 | -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/main/java/org/apache/doris/util/DateUtils.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.util; 19 | 20 | import java.time.LocalDateTime; 21 | import java.time.ZoneId; 22 | import java.time.format.DateTimeFormatter; 23 | 24 | public class DateUtils { 25 | 26 | public static final DateTimeFormatter NORMAL_FORMATER = 27 | DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss").withZone( 28 | ZoneId.systemDefault()); 29 | 30 | public static final DateTimeFormatter NUMBER_FORMATER = 31 | DateTimeFormatter.ofPattern("yyyyMMddHHmmss").withZone( 32 | ZoneId.systemDefault()); 33 | 34 | public static String getFormattedNow(DateTimeFormatter formatter) { 35 | return formatter.format(LocalDateTime.now(ZoneId.systemDefault())); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/main/java/org/apache/doris/util/HttpUtils.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.util; 19 | 20 | import org.apache.http.HttpEntity; 21 | import org.apache.http.client.config.RequestConfig; 22 | import org.apache.http.impl.client.CloseableHttpClient; 23 | import org.apache.http.impl.client.HttpClients; 24 | 25 | import java.io.BufferedReader; 26 | import java.io.IOException; 27 | import java.io.InputStream; 28 | import java.io.InputStreamReader; 29 | 30 | public class HttpUtils { 31 | 32 | public static final int DEFAULT_CONN_TIMEOUT = 60 * 1000; 33 | public static final int DEFAULT_SO_TIMEOUT = 60 * 1000; 34 | 35 | public static CloseableHttpClient getClient() { 36 | return getClient(DEFAULT_CONN_TIMEOUT, DEFAULT_SO_TIMEOUT); 37 | } 38 | 39 | public static CloseableHttpClient getClient(int connectionTimeout, int socketTimeout) { 40 | RequestConfig requestConfig = RequestConfig.custom() 41 | .setConnectTimeout(connectionTimeout) 42 | .setSocketTimeout(socketTimeout) 43 | .build(); 44 | return HttpClients.custom().setDefaultRequestConfig(requestConfig).build(); 45 | } 46 | 47 | public static String getEntityContent(HttpEntity entity) throws IOException { 48 | StringBuilder sb = new StringBuilder(); 49 | try (InputStream is = entity.getContent(); 50 | BufferedReader reader = new BufferedReader(new InputStreamReader(is))) { 51 | String line; 52 | while ((line = reader.readLine()) != null) { 53 | sb.append(line); 54 | } 55 | } 56 | return sb.toString(); 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | log4j.rootLogger=INFO,console 19 | log4j.additivity.org.apache=true 20 | log4j.appender.console=org.apache.log4j.ConsoleAppender 21 | log4j.appender.console.Threshold=INFO 22 | log4j.appender.console.ImmediateFlush=true 23 | log4j.appender.console.Target=System.out 24 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.console.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %p (%t|%tid) [%C{1}.%M():%L] %m%n -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/test/java/org/apache/doris/SparkLoadRunnerTest.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris; 19 | 20 | import org.apache.doris.config.JobConfig; 21 | 22 | import org.junit.jupiter.api.Assertions; 23 | import static org.junit.jupiter.api.Assertions.*; 24 | import org.junit.jupiter.api.Test; 25 | 26 | import java.util.HashMap; 27 | import java.util.Map; 28 | 29 | class SparkLoadRunnerTest { 30 | 31 | @Test 32 | void loadHadoopConfig() { 33 | 34 | JobConfig jobConfig = new JobConfig(); 35 | Map envMap = new HashMap<>(); 36 | envMap.put("HADOOP_CONF_DIR", this.getClass().getResource("/").getPath()); 37 | jobConfig.setEnv(envMap); 38 | SparkLoadRunner.loadHadoopConfig(jobConfig); 39 | Assertions.assertEquals("60000", jobConfig.getHadoopProperties().get("hadoop.http.idle_timeout.ms")); 40 | Assertions.assertEquals("1", jobConfig.getHadoopProperties().get("dfs.replication")); 41 | Assertions.assertEquals("my.hadoop.com", jobConfig.getHadoopProperties().get("yarn.resourcemanager.address")); 42 | 43 | } 44 | } -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/test/java/org/apache/doris/load/LoaderFactoryTest.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.load; 19 | 20 | import org.apache.doris.common.enums.LoadMode; 21 | import org.apache.doris.config.JobConfig; 22 | import org.apache.doris.load.job.Loader; 23 | import org.apache.doris.load.job.PullLoader; 24 | 25 | import org.junit.jupiter.api.Assertions; 26 | import org.junit.jupiter.api.Test; 27 | 28 | class LoaderFactoryTest { 29 | 30 | @Test 31 | void createLoader() { 32 | 33 | JobConfig jobConfig = new JobConfig(); 34 | jobConfig.setLoadMode(null); 35 | Assertions.assertThrows(NullPointerException.class, () -> LoaderFactory.createLoader(jobConfig, false)); 36 | 37 | jobConfig.setLoadMode(LoadMode.PUSH); 38 | Assertions.assertThrows(UnsupportedOperationException.class, () -> LoaderFactory.createLoader(jobConfig, false)); 39 | 40 | jobConfig.setLoadMode(LoadMode.PULL); 41 | Assertions.assertDoesNotThrow(() -> LoaderFactory.createLoader(jobConfig, false)); 42 | Loader loader = LoaderFactory.createLoader(jobConfig, false);; 43 | Assertions.assertInstanceOf(PullLoader.class, loader); 44 | 45 | } 46 | } -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/test/java/org/apache/doris/util/DateUtilsTest.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.util; 19 | 20 | import mockit.Mock; 21 | import mockit.MockUp; 22 | import org.junit.jupiter.api.Assertions; 23 | import org.junit.jupiter.api.Test; 24 | 25 | import java.time.LocalDateTime; 26 | import java.time.ZoneId; 27 | 28 | class DateUtilsTest { 29 | 30 | @Test 31 | void getFormattedNow() { 32 | new MockUp() { 33 | @Mock 34 | public LocalDateTime now(ZoneId zoneId) { 35 | return LocalDateTime.of(2024,8,1,12,34,56); 36 | } 37 | }; 38 | Assertions.assertEquals("2024-08-01 12:34:56", DateUtils.getFormattedNow(DateUtils.NORMAL_FORMATER)); 39 | Assertions.assertEquals("20240801123456", DateUtils.getFormattedNow(DateUtils.NUMBER_FORMATER)); 40 | } 41 | } -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/test/resources/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 20 | 21 | 22 | hadoop.http.idle_timeout.ms 23 | 60000 24 | 25 | -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/test/resources/hdfs-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 20 | 21 | 22 | dfs.replication 23 | 1 24 | 25 | -------------------------------------------------------------------------------- /spark-load/spark-load-core/src/test/resources/yarn-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 20 | 21 | 22 | yarn.resourcemanager.address 23 | my.hadoop.com 24 | 25 | -------------------------------------------------------------------------------- /spark-load/spark-load-dist/src/main/bin/spark-load.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | if [ -z ${SPARK_LOAD_HOME} ]; then 20 | cur_dir=$(dirname "$0")/../ 21 | SPARK_LOAD_HOME=$(readlink -f ${cur_dir}) 22 | fi 23 | 24 | export SPARK_LOAD_HOME 25 | 26 | if [[ -z "${JAVA_HOME}" ]]; then 27 | if ! command -v java &>/dev/null; then 28 | JAVA="" 29 | else 30 | JAVA="$(command -v java)" 31 | fi 32 | else 33 | JAVA="${JAVA_HOME}/bin/java" 34 | fi 35 | 36 | if [[ ! -x "${JAVA}" ]]; then 37 | echo "The JAVA_HOME environment variable is not set correctly" 38 | echo "This environment variable is required to run this program" 39 | echo "Note: JAVA_HOME should point to a JDK and not a JRE" 40 | echo "You can set JAVA_HOME in the fe.conf configuration file" 41 | exit 1 42 | fi 43 | 44 | SPARK_LOAD_CORE_JAR= 45 | for f in "${SPARK_LOAD_HOME}/lib"/*.jar; do 46 | if [[ $(basename "${f}") == "spark-load-core"*".jar" ]]; then 47 | SPARK_LOAD_CORE_JAR="${f}" 48 | continue 49 | fi 50 | CLASSPATH="${f}:${CLASSPATH}" 51 | done 52 | CLASSPATH="${SPARK_LOAD_CORE_JAR}:${CLASSPATH}" 53 | export CLASSPATH="${SPARK_LOAD_CORE_JAR}/conf:${CLASSPATH}:${SPARK_LOAD_CORE_JAR}/lib" 54 | 55 | ${JAVA} org.apache.doris.SparkLoadRunner "$@" -------------------------------------------------------------------------------- /spark-load/spark-load-dpp/src/main/java/org/apache/doris/common/SparkDppException.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.common; 19 | 20 | import com.google.common.base.Strings; 21 | 22 | // Exception for Spark DPP process 23 | public class SparkDppException extends Exception { 24 | public SparkDppException(String msg, Throwable cause) { 25 | super(Strings.nullToEmpty(msg), cause); 26 | } 27 | 28 | public SparkDppException(Throwable cause) { 29 | super(cause); 30 | } 31 | 32 | public SparkDppException(String msg, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { 33 | super(Strings.nullToEmpty(msg), cause, enableSuppression, writableStackTrace); 34 | } 35 | 36 | public SparkDppException(String msg) { 37 | super(Strings.nullToEmpty(msg)); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/DorisKryoRegistrator.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.load.loadv2.dpp; 19 | 20 | import org.apache.doris.common.io.BitmapValue; 21 | import org.apache.doris.common.io.Roaring64Map; 22 | 23 | import com.esotericsoftware.kryo.Kryo; 24 | import org.apache.spark.serializer.KryoRegistrator; 25 | 26 | /** 27 | * register etl classes with Kryo when using Kryo serialization. 28 | */ 29 | public class DorisKryoRegistrator implements KryoRegistrator { 30 | 31 | @Override 32 | public void registerClasses(Kryo kryo) { 33 | kryo.register(Roaring64Map.class); 34 | kryo.register(BitmapValue.class); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/RollupTreeBuilder.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.load.loadv2.dpp; 19 | 20 | import org.apache.doris.config.EtlJobConfig; 21 | 22 | // RollupTreeBuilder is used to get the RollupTree from the TableMeta 23 | public abstract interface RollupTreeBuilder { 24 | public RollupTreeNode build(EtlJobConfig.EtlTable tableMeta); 25 | } 26 | -------------------------------------------------------------------------------- /spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/RollupTreeNode.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.load.loadv2.dpp; 19 | 20 | import org.apache.doris.config.EtlJobConfig; 21 | 22 | import java.util.List; 23 | 24 | // Base and rollup indexes are managed by as a RollupTree in order to 25 | // produce the rollup index data from the best-fit index to get better performance. 26 | // The calculation will be done through preorder traversal 27 | public class RollupTreeNode { 28 | public RollupTreeNode parent; 29 | public List children; 30 | public long indexId; 31 | public List keyColumnNames; 32 | public List valueColumnNames; 33 | public int level; 34 | public EtlJobConfig.EtlIndex indexMeta; 35 | 36 | public String toString() { 37 | StringBuilder builder = new StringBuilder(); 38 | for (int i = 0; i < level; ++i) { 39 | builder.append("-"); 40 | } 41 | builder.append("indexid: " + indexId + "\n"); 42 | if (children != null && !children.isEmpty()) { 43 | for (int i = 0; i < level; ++i) { 44 | builder.append("-"); 45 | } 46 | builder.append("children:\n"); 47 | for (RollupTreeNode child : children) { 48 | builder.append(child.toString()); 49 | } 50 | } 51 | return builder.toString(); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/StringAccumulator.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package org.apache.doris.load.loadv2.dpp; 19 | 20 | import org.apache.spark.util.AccumulatorV2; 21 | 22 | import java.util.ArrayList; 23 | import java.util.List; 24 | 25 | // This class is a accumulator of string based on AccumulatorV2 26 | // (https://spark.apache.org/docs/latest/api/java/org/apache/spark/util/AccumulatorV2.html). 27 | // Spark does not provide string accumulator. 28 | // 29 | // This class is used to collect the invalid rows when doing etl. 30 | public class StringAccumulator extends AccumulatorV2 { 31 | private List strs = new ArrayList<>(); 32 | 33 | @Override 34 | public boolean isZero() { 35 | return strs.isEmpty(); 36 | } 37 | 38 | @Override 39 | public AccumulatorV2 copy() { 40 | StringAccumulator newAccumulator = new StringAccumulator(); 41 | newAccumulator.strs.addAll(this.strs); 42 | return newAccumulator; 43 | } 44 | 45 | @Override 46 | public void reset() { 47 | strs.clear(); 48 | } 49 | 50 | @Override 51 | public void add(String v) { 52 | strs.add(v); 53 | } 54 | 55 | @Override 56 | public void merge(AccumulatorV2 other) { 57 | StringAccumulator o = (StringAccumulator) other; 58 | strs.addAll(o.strs); 59 | } 60 | 61 | @Override 62 | public String value() { 63 | return strs.toString(); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /tools/releasing/create_release_branch.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | ############################################################## 20 | # This script is create release branch 21 | ############################################################## 22 | 23 | RELEASE_VERSION=${RELEASE_VERSION} 24 | 25 | if [ -z "${RELEASE_VERSION}" ]; then 26 | echo "RELEASE_VERSION was not set" 27 | exit 1 28 | fi 29 | 30 | # fail immediately 31 | set -o errexit 32 | set -o nounset 33 | 34 | CURR_DIR=`pwd` 35 | BASE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 36 | PROJECT_ROOT="${BASE_DIR}/../../" 37 | 38 | # Sanity check to ensure that resolved paths are valid; a LICENSE file should aways exist in project root 39 | if [ ! -f ${PROJECT_ROOT}/LICENSE.txt ]; then 40 | echo "Project root path ${PROJECT_ROOT} is not valid; script may be in the wrong directory." 41 | exit 1 42 | fi 43 | 44 | TARGET_BRANCH=release-${RELEASE_VERSION} 45 | 46 | cd ${PROJECT_ROOT} 47 | git checkout -b ${TARGET_BRANCH} 48 | 49 | RELEASE_COMMIT_HASH=`git rev-parse HEAD` 50 | echo "Created a new release branch ${TARGET_BRANCH} with commit hash ${RELEASE_COMMIT_HASH}." 51 | 52 | cd ${CURR_DIR} -------------------------------------------------------------------------------- /tools/releasing/update_branch_version.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | ############################################################## 20 | # This script is update branch version in pom.xml 21 | ############################################################## 22 | 23 | RELEASE_VERSION=${RELEASE_VERSION} 24 | 25 | if [ -z "${RELEASE_VERSION}" ]; then 26 | echo "RELEASE_VERSION was not set." 27 | exit 1 28 | fi 29 | 30 | # fail immediately 31 | set -o errexit 32 | set -o nounset 33 | 34 | CURR_DIR=`pwd` 35 | BASE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 36 | PROJECT_ROOT="${BASE_DIR}/../../" 37 | 38 | # Sanity check to ensure that resolved paths are valid; a LICENSE file should aways exist in project root 39 | if [ ! -f ${PROJECT_ROOT}/LICENSE.txt ]; then 40 | echo "Project root path ${PROJECT_ROOT} is not valid; script may be in the wrong directory." 41 | exit 1 42 | fi 43 | 44 | cd ${PROJECT_ROOT}/spark-load 45 | mvn versions:set -DgenerateBackupPoms=false -DnewVersion=${RELEASE_VERSION} 46 | mvn versions:set-property -DgenerateBackupPoms=false -Dproperty=revision -DnewVersion=${RELEASE_VERSION} 47 | 48 | cd ${PROJECT_ROOT}/spark-doris-connector 49 | mvn versions:set -DgenerateBackupPoms=false -DnewVersion=${RELEASE_VERSION} 50 | mvn versions:set-property -DgenerateBackupPoms=false -Dproperty=revision -DnewVersion=${RELEASE_VERSION} 51 | 52 | git commit -am "[release] Update version to ${RELEASE_VERSION}" 53 | 54 | RELEASE_VERSION_COMMIT_HASH=`git rev-parse HEAD` 55 | 56 | echo "Done. Created a new commit for the new version ${RELEASE_VERSION}, with hash ${RELEASE_VERSION_COMMIT_HASH}" 57 | echo "If this is a new version to be released (or a candidate to be voted on), don't forget to create a signed release tag on GitHub and push the changes." 58 | 59 | cd ${CURR_DIR} --------------------------------------------------------------------------------