├── .asf.yaml ├── .editorconfig ├── .github ├── boring-cyborg.yml └── workflows │ ├── push_pr.yml │ └── weekly.yml ├── .gitignore ├── .gitmodules ├── .idea └── vcs.xml ├── LICENSE ├── NOTICE ├── README.md ├── docs ├── content.zh │ └── docs │ │ └── connectors │ │ ├── datastream │ │ └── mongodb.md │ │ └── table │ │ └── mongodb.md ├── content │ └── docs │ │ └── connectors │ │ ├── datastream │ │ └── mongodb.md │ │ └── table │ │ └── mongodb.md └── data │ └── mongodb.yml ├── flink-connector-mongodb-e2e-tests ├── pom.xml └── src │ └── test │ ├── java │ └── org │ │ └── apache │ │ └── flink │ │ └── tests │ │ └── util │ │ └── mongodb │ │ └── MongoE2ECase.java │ └── resources │ ├── META-INF │ └── services │ │ └── org.junit.jupiter.api.extension.Extension │ ├── e2e_append_only.sql │ ├── e2e_upsert.sql │ └── log4j2-test.properties ├── flink-connector-mongodb ├── archunit-violations │ ├── 0339fa9f-980e-4b71-a187-dde3c2e4d22d │ ├── 07a10766-610a-4f6d-9b7f-82d523e68bb6 │ ├── 54355260-ce16-4e0e-9768-00cb07fadf7e │ ├── ae2f06aa-8fb6-4585-a50c-ffe88f59697d │ ├── bf400375-9b08-442c-80c1-051333988494 │ ├── d5033bdc-beb6-4505-9fd1-ab50e6c4be20 │ ├── ed03507c-2a0e-4dc2-b0c6-b90a8469bb7c │ ├── f871efde-12fe-46a5-9b10-11382284683f │ └── stored.rules ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── apache │ │ │ └── flink │ │ │ └── connector │ │ │ └── mongodb │ │ │ ├── common │ │ │ ├── config │ │ │ │ └── MongoConnectionOptions.java │ │ │ └── utils │ │ │ │ ├── MongoConstants.java │ │ │ │ ├── MongoSerdeUtils.java │ │ │ │ ├── MongoUtils.java │ │ │ │ └── MongoValidationUtils.java │ │ │ ├── sink │ │ │ ├── MongoSink.java │ │ │ ├── MongoSinkBuilder.java │ │ │ ├── config │ │ │ │ └── MongoWriteOptions.java │ │ │ └── writer │ │ │ │ ├── MongoWriter.java │ │ │ │ ├── context │ │ │ │ ├── DefaultMongoSinkContext.java │ │ │ │ └── MongoSinkContext.java │ │ │ │ └── serializer │ │ │ │ └── MongoSerializationSchema.java │ │ │ ├── source │ │ │ ├── MongoSource.java │ │ │ ├── MongoSourceBuilder.java │ │ │ ├── config │ │ │ │ └── MongoReadOptions.java │ │ │ ├── enumerator │ │ │ │ ├── MongoSourceEnumState.java │ │ │ │ ├── MongoSourceEnumStateSerializer.java │ │ │ │ ├── MongoSourceEnumerator.java │ │ │ │ ├── assigner │ │ │ │ │ ├── MongoScanSplitAssigner.java │ │ │ │ │ └── MongoSplitAssigner.java │ │ │ │ └── splitter │ │ │ │ │ ├── MongoPaginationSplitter.java │ │ │ │ │ ├── MongoSampleSplitter.java │ │ │ │ │ ├── MongoShardedSplitter.java │ │ │ │ │ ├── MongoSingleSplitter.java │ │ │ │ │ ├── MongoSplitContext.java │ │ │ │ │ ├── MongoSplitVectorSplitter.java │ │ │ │ │ ├── MongoSplitters.java │ │ │ │ │ └── PartitionStrategy.java │ │ │ ├── reader │ │ │ │ ├── MongoSourceReader.java │ │ │ │ ├── MongoSourceReaderContext.java │ │ │ │ ├── deserializer │ │ │ │ │ └── MongoDeserializationSchema.java │ │ │ │ ├── emitter │ │ │ │ │ └── MongoRecordEmitter.java │ │ │ │ └── split │ │ │ │ │ ├── MongoScanSourceSplitReader.java │ │ │ │ │ └── MongoSourceSplitReader.java │ │ │ └── split │ │ │ │ ├── MongoScanSourceSplit.java │ │ │ │ ├── MongoScanSourceSplitState.java │ │ │ │ ├── MongoSourceSplit.java │ │ │ │ ├── MongoSourceSplitSerializer.java │ │ │ │ └── MongoSourceSplitState.java │ │ │ └── table │ │ │ ├── FilterHandlingPolicy.java │ │ │ ├── MongoConnectorOptions.java │ │ │ ├── MongoDynamicTableFactory.java │ │ │ ├── MongoDynamicTableSink.java │ │ │ ├── MongoDynamicTableSource.java │ │ │ ├── MongoFilterPushDownVisitor.java │ │ │ ├── MongoPrimaryKeyExtractor.java │ │ │ ├── MongoRowDataLookupFunction.java │ │ │ ├── MongoShardKeysExtractor.java │ │ │ ├── config │ │ │ └── MongoConfiguration.java │ │ │ ├── converter │ │ │ ├── BsonToRowDataConverters.java │ │ │ └── RowDataToBsonConverters.java │ │ │ └── serialization │ │ │ ├── MongoRowDataDeserializationSchema.java │ │ │ └── MongoRowDataSerializationSchema.java │ └── resources │ │ └── META-INF │ │ └── services │ │ └── org.apache.flink.table.factories.Factory │ └── test │ ├── java │ └── org │ │ └── apache │ │ └── flink │ │ ├── architecture │ │ ├── ProductionCodeArchitectureTest.java │ │ └── TestCodeArchitectureTest.java │ │ └── connector │ │ └── mongodb │ │ ├── common │ │ └── utils │ │ │ └── MongoSerdeUtilsTest.java │ │ ├── sink │ │ ├── MongoSinkITCase.java │ │ └── writer │ │ │ └── MongoWriterITCase.java │ │ ├── source │ │ ├── MongoSourceITCase.java │ │ └── enumerator │ │ │ ├── MongoSourceEnumStateSerializerTest.java │ │ │ └── splitter │ │ │ ├── MongoPaginationSplitterTest.java │ │ │ └── MongoSampleSplitterTest.java │ │ ├── table │ │ ├── MongoDynamicTableFactoryTest.java │ │ ├── MongoDynamicTableSinkITCase.java │ │ ├── MongoDynamicTableSourceITCase.java │ │ ├── MongoFilterPushDownVisitorTest.java │ │ ├── MongoPartitionedTableSinkITCase.java │ │ ├── MongoPrimaryKeyExtractorTest.java │ │ ├── MongoShardKeysExtractorTest.java │ │ ├── MongoTablePlanTest.java │ │ └── converter │ │ │ └── MongoConvertersTest.java │ │ └── testutils │ │ ├── MongoShardedContainers.java │ │ └── MongoTestUtil.java │ └── resources │ ├── archunit.properties │ ├── log4j2-test.properties │ └── org │ └── apache │ └── flink │ └── connector │ └── mongodb │ └── table │ └── MongoTablePlanTest.xml ├── flink-sql-connector-mongodb ├── pom.xml └── src │ └── main │ └── resources │ └── META-INF │ └── NOTICE ├── pom.xml └── tools ├── ci └── log4j.properties └── maven ├── checkstyle.xml └── suppressions.xml /.asf.yaml: -------------------------------------------------------------------------------- 1 | github: 2 | enabled_merge_buttons: 3 | squash: true 4 | merge: false 5 | rebase: true 6 | labels: 7 | - flink 8 | - mongodb 9 | - connector 10 | autolink_jira: FLINK 11 | collaborators: 12 | - flinkbot 13 | notifications: 14 | commits: commits@flink.apache.org 15 | issues: issues@flink.apache.org 16 | pullrequests: issues@flink.apache.org 17 | jobs: builds@flink.apache.org 18 | jira_options: link label 19 | -------------------------------------------------------------------------------- /.github/boring-cyborg.yml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | labelPRBasedOnFilePath: 20 | component=BuildSystem: 21 | - .github/**/* 22 | - tools/maven/* 23 | 24 | component=Documentation: 25 | - docs/**/* 26 | 27 | component=Connectors/MongoDB: 28 | - flink-connector-mongodb*/**/* 29 | - flink-sql-connector-mongodb*/**/* 30 | 31 | ###### IssueLink Adder ################################################################################################# 32 | # Insert Issue (Jira/Github etc) link in PR description based on the Issue ID in PR title. 33 | insertIssueLinkInPrDescription: 34 | # specify the placeholder for the issue link that should be present in the description 35 | descriptionIssuePlaceholderRegexp: "^Issue link: (.*)$" 36 | matchers: 37 | # you can have several matches - for different types of issues 38 | # only the first matching entry is replaced 39 | jiraIssueMatch: 40 | # specify the regexp of issue id that you can find in the title of the PR 41 | # the match groups can be used to build the issue id (${1}, ${2}, etc.). 42 | titleIssueIdRegexp: \[(FLINK-[0-9]+)\] 43 | # the issue link to be added. ${1}, ${2} ... are replaced with the match groups from the 44 | # title match (remember to use quotes) 45 | descriptionIssueLink: "[${1}](https://issues.apache.org/jira/browse/${1}/)" 46 | docOnlyIssueMatch: 47 | titleIssueIdRegexp: \[hotfix\] 48 | descriptionIssueLink: "`Documentation only change, no JIRA issue`" 49 | 50 | ###### Title Validator ################################################################################################# 51 | # Verifies if commit/PR titles match the regexp specified 52 | verifyTitles: 53 | # Regular expression that should be matched by titles of commits or PR 54 | titleRegexp: ^\[FLINK-[0-9]+\].*$|^\[FLINK-XXXXX\].*$|^\[hotfix].*$ 55 | # If set to true, it will always check the PR title (as opposed to the individual commits). 56 | alwaysUsePrTitle: false 57 | # If set to true, it will only check the commit in case there is a single commit. 58 | # In case of multiple commits it will check PR title. 59 | # This reflects the standard behaviour of Github that for `Squash & Merge` GitHub 60 | # uses the PR title rather than commit messages for the squashed commit ¯\_(ツ)_/¯ 61 | # For single-commit PRs it takes the squashed commit message from the commit as expected. 62 | # 63 | # If set to false it will check all commit messages. This is useful when you do not squash commits at merge. 64 | validateEitherPrOrSingleCommitTitle: true 65 | # The title the GitHub status should appear from. 66 | statusTitle: "Title Validator" 67 | # A custom message to be displayed when the title passes validation. 68 | successMessage: "Validation successful!" 69 | # A custom message to be displayed when the title fails validation. 70 | # Allows insertion of ${type} (commit/PR), ${title} (the title validated) and ${regex} (the titleRegexp above). 71 | failureMessage: "Wrong ${type} title: ${title}" 72 | 73 | # Various Flags to control behaviour of the "Labeler" 74 | labelerFlags: 75 | # If this flag is changed to 'false', labels would only be added when the PR is first created 76 | # and not when existing PR is updated. 77 | # The default is 'true' which means the labels would be added when PR is updated even if they 78 | # were removed by the user 79 | labelOnPRUpdates: true 80 | 81 | # Comment to be posted to welcome users when they open their first PR 82 | firstPRWelcomeComment: > 83 | Thanks for opening this pull request! Please check out our contributing guidelines. (https://flink.apache.org/contributing/how-to-contribute.html) 84 | 85 | # Comment to be posted to congratulate user on their first merged PR 86 | firstPRMergeComment: > 87 | Awesome work, congrats on your first merged pull request! 88 | -------------------------------------------------------------------------------- /.github/workflows/push_pr.yml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | # We need to specify repo related information here since Apache INFRA doesn't differentiate 20 | # between several workflows with the same names while preparing a report for GHA usage 21 | # https://infra-reports.apache.org/#ghactions 22 | name: Flink Connector MongoDB CI 23 | on: [push, pull_request] 24 | concurrency: 25 | group: ${{ github.workflow }}-${{ github.ref }} 26 | cancel-in-progress: true 27 | jobs: 28 | compile_and_test: 29 | strategy: 30 | matrix: 31 | mongodb: [ mongodb4, mongodb5, mongodb6, mongodb7 ] 32 | flink: [ 1.19-SNAPSHOT, 1.20-SNAPSHOT ] 33 | jdk: [ '8, 11, 17, 21' ] 34 | 35 | uses: apache/flink-connector-shared-utils/.github/workflows/ci.yml@ci_utils 36 | with: 37 | flink_version: ${{ matrix.flink }} 38 | jdk_version: ${{ matrix.jdk }} 39 | optional_maven_profiles: ${{ matrix.mongodb }} 40 | -------------------------------------------------------------------------------- /.github/workflows/weekly.yml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | # We need to specify repo related information here since Apache INFRA doesn't differentiate 20 | # between several workflows with the same names while preparing a report for GHA usage 21 | # https://infra-reports.apache.org/#ghactions 22 | name: Weekly Flink Connector MongoDB 23 | on: 24 | schedule: 25 | - cron: "0 0 * * 0" 26 | workflow_dispatch: 27 | jobs: 28 | compile_and_test: 29 | if: github.repository_owner == 'apache' 30 | strategy: 31 | matrix: 32 | flink_branches: [ { 33 | flink: 1.19-SNAPSHOT, 34 | jdk: '8, 11, 17, 21', 35 | branch: main 36 | }, { 37 | flink: 1.20-SNAPSHOT, 38 | jdk: '8, 11, 17, 21', 39 | branch: main 40 | },{ 41 | flink: 1.19.2, 42 | jdk: '8, 11, 17, 21', 43 | branch: v2.0 44 | }, { 45 | flink: 1.20.1, 46 | jdk: '8, 11, 17, 21', 47 | branch: main 48 | }] 49 | uses: apache/flink-connector-shared-utils/.github/workflows/ci.yml@ci_utils 50 | with: 51 | flink_version: ${{ matrix.flink_branches.flink }} 52 | connector_branch: ${{ matrix.flink_branches.branch }} 53 | jdk_version: ${{ matrix.flink_branches.jdk || '8, 11' }} 54 | run_dependency_convergence: false 55 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .eslintcache 2 | .cache 3 | scalastyle-output.xml 4 | .classpath 5 | .idea/* 6 | !.idea/vcs.xml 7 | .metadata 8 | .settings 9 | .project 10 | .version.properties 11 | filter.properties 12 | logs.zip 13 | .mvn/wrapper/*.jar 14 | target 15 | tmp 16 | *.class 17 | *.iml 18 | *.swp 19 | *.jar 20 | *.zip 21 | *.log 22 | *.pyc 23 | .DS_Store 24 | build-target 25 | atlassian-ide-plugin.xml 26 | out/ 27 | /docs/api 28 | /docs/.bundle 29 | /docs/.rubydeps 30 | /docs/ruby2/.bundle 31 | /docs/ruby2/.rubydeps 32 | /docs/.jekyll-metadata 33 | *.ipr 34 | *.iws 35 | tools/flink 36 | tools/flink-* 37 | tools/releasing/release 38 | tools/japicmp-output 39 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tools/releasing/shared"] 2 | path = tools/releasing/shared 3 | url = git@github.com:apache/flink-connector-shared-utils.git 4 | branch = release_utils 5 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Apache Flink MongoDB Connector 2 | Copyright 2014-2025 The Apache Software Foundation 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | 7 | Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby 8 | granted, provided that this permission notice appear in all copies. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING 11 | ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, 12 | DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 13 | WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE 14 | USE OR PERFORMANCE OF THIS SOFTWARE. 15 | 16 | 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Apache Flink MongoDB Connector 2 | 3 | This repository contains the official Apache Flink MongoDB connector. 4 | 5 | ## Apache Flink 6 | 7 | Apache Flink is an open source stream processing framework with powerful stream- and batch-processing capabilities. 8 | 9 | Learn more about Flink at [https://flink.apache.org/](https://flink.apache.org/) 10 | 11 | ## Building the Apache Flink MongoDB Connector from Source 12 | 13 | Prerequisites: 14 | 15 | * Unix-like environment (we use Linux, Mac OS X) 16 | * Git 17 | * Maven (we recommend version 3.8.6) 18 | * Java 11 19 | 20 | ``` 21 | git clone https://github.com/apache/flink-connector-mongodb.git 22 | cd flink-connector-mongodb 23 | mvn clean package -DskipTests 24 | ``` 25 | 26 | The resulting jars can be found in the `target` directory of the respective module. 27 | 28 | ## Developing Flink 29 | 30 | The Flink committers use IntelliJ IDEA to develop the Flink codebase. 31 | We recommend IntelliJ IDEA for developing projects that involve Scala code. 32 | 33 | Minimal requirements for an IDE are: 34 | * Support for Java and Scala (also mixed projects) 35 | * Support for Maven with Java and Scala 36 | 37 | ### IntelliJ IDEA 38 | 39 | The IntelliJ IDE supports Maven out of the box and offers a plugin for Scala development. 40 | 41 | * IntelliJ download: [https://www.jetbrains.com/idea/](https://www.jetbrains.com/idea/) 42 | * IntelliJ Scala Plugin: [https://plugins.jetbrains.com/plugin/?id=1347](https://plugins.jetbrains.com/plugin/?id=1347) 43 | 44 | Check out our [Setting up IntelliJ](https://nightlies.apache.org/flink/flink-docs-master/flinkDev/ide_setup.html#intellij-idea) guide for details. 45 | 46 | ## Support 47 | 48 | Don’t hesitate to ask! 49 | 50 | Contact the developers and community on the [mailing lists](https://flink.apache.org/community.html#mailing-lists) if you need any help. 51 | 52 | [Open an issue](https://issues.apache.org/jira/browse/FLINK) if you found a bug in Flink. 53 | 54 | ## Documentation 55 | 56 | The documentation of Apache Flink is located on the website: [https://flink.apache.org](https://flink.apache.org) 57 | or in the `docs/` directory of the source code. 58 | 59 | ## Fork and Contribute 60 | 61 | This is an active open-source project. We are always open to people who want to use the system or contribute to it. 62 | Contact us if you are looking for implementation tasks that fit your skills. 63 | This article describes [how to contribute to Apache Flink](https://flink.apache.org/contributing/how-to-contribute.html). 64 | 65 | ## About 66 | 67 | Apache Flink is an open source project of The Apache Software Foundation (ASF). 68 | The Apache Flink project originated from the [Stratosphere](http://stratosphere.eu) research project. 69 | -------------------------------------------------------------------------------- /docs/data/mongodb.yml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | version: 2.1-SNAPSHOT 20 | variants: 21 | - maven: flink-connector-mongodb 22 | sql_url: https://repo.maven.apache.org/maven2/org/apache/flink/flink-sql-connector-mongodb/$full_version/flink-sql-connector-mongodb-$full_version.jar 23 | -------------------------------------------------------------------------------- /flink-connector-mongodb-e2e-tests/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 21 | 22 | 4.0.0 23 | 24 | 25 | flink-connector-mongodb-parent 26 | org.apache.flink 27 | 2.1-SNAPSHOT 28 | 29 | 30 | flink-connector-mongodb-e2e-tests 31 | Flink : E2E Tests : MongoDB 32 | 33 | 34 | 35 | org.apache.flink 36 | flink-connector-test-utils 37 | ${flink.version} 38 | test 39 | 40 | 41 | 42 | 43 | org.apache.flink 44 | flink-sql-connector-mongodb 45 | ${project.version} 46 | test 47 | 48 | 49 | 50 | org.apache.flink 51 | flink-connector-mongodb 52 | ${project.version} 53 | test-jar 54 | test 55 | 56 | 57 | 58 | org.mongodb 59 | mongodb-driver-sync 60 | test 61 | 62 | 63 | 64 | org.testcontainers 65 | mongodb 66 | test 67 | 68 | 69 | 70 | 71 | 72 | run-end-to-end-tests 73 | 74 | 75 | 76 | org.apache.maven.plugins 77 | maven-surefire-plugin 78 | 79 | 80 | end-to-end-tests 81 | integration-test 82 | 83 | test 84 | 85 | 86 | 87 | **/*.* 88 | 89 | 90 | ${project.basedir} 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | org.apache.maven.plugins 105 | maven-deploy-plugin 106 | 107 | true 108 | 109 | 110 | 111 | org.apache.maven.plugins 112 | maven-surefire-plugin 113 | 114 | 115 | default-test 116 | none 117 | 118 | 119 | integration-tests 120 | none 121 | 122 | 123 | 124 | 125 | org.apache.maven.plugins 126 | maven-dependency-plugin 127 | 128 | 129 | copy 130 | pre-integration-test 131 | 132 | copy 133 | 134 | 135 | 136 | 137 | org.apache.flink 138 | flink-sql-connector-mongodb 139 | ${project.version} 140 | sql-mongodb.jar 141 | jar 142 | ${project.build.directory}/dependencies 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | -------------------------------------------------------------------------------- /flink-connector-mongodb-e2e-tests/src/test/resources/META-INF/services/org.junit.jupiter.api.extension.Extension: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | org.apache.flink.util.TestLoggerExtension 17 | -------------------------------------------------------------------------------- /flink-connector-mongodb-e2e-tests/src/test/resources/e2e_append_only.sql: -------------------------------------------------------------------------------- 1 | --/* 2 | -- * Licensed to the Apache Software Foundation (ASF) under one 3 | -- * or more contributor license agreements. See the NOTICE file 4 | -- * distributed with this work for additional information 5 | -- * regarding copyright ownership. The ASF licenses this file 6 | -- * to you under the Apache License, Version 2.0 (the 7 | -- * "License"); you may not use this file except in compliance 8 | -- * with the License. You may obtain a copy of the License at 9 | -- * 10 | -- * http://www.apache.org/licenses/LICENSE-2.0 11 | -- * 12 | -- * Unless required by applicable law or agreed to in writing, software 13 | -- * distributed under the License is distributed on an "AS IS" BASIS, 14 | -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | -- * See the License for the specific language governing permissions and 16 | -- * limitations under the License. 17 | -- */ 18 | 19 | DROP TABLE IF EXISTS orders; 20 | DROP TABLE IF EXISTS orders_bak; 21 | 22 | CREATE TABLE orders ( 23 | `_id` STRING, 24 | `code` STRING, 25 | `quantity` BIGINT, 26 | PRIMARY KEY (_id) NOT ENFORCED 27 | ) WITH ( 28 | 'connector' = 'mongodb', 29 | 'uri' = 'mongodb://mongodb:27017', 30 | 'database' = 'test_append_only', 31 | 'collection' = 'orders' 32 | ); 33 | 34 | CREATE TABLE orders_bak ( 35 | `code` STRING, 36 | `quantity` BIGINT 37 | ) WITH ( 38 | 'connector' = 'mongodb', 39 | 'uri' = 'mongodb://mongodb:27017', 40 | 'database' = 'test_append_only', 41 | 'collection' = 'orders_bak' 42 | ); 43 | 44 | INSERT INTO orders_bak SELECT `code`, `quantity` FROM orders; 45 | -------------------------------------------------------------------------------- /flink-connector-mongodb-e2e-tests/src/test/resources/e2e_upsert.sql: -------------------------------------------------------------------------------- 1 | --/* 2 | -- * Licensed to the Apache Software Foundation (ASF) under one 3 | -- * or more contributor license agreements. See the NOTICE file 4 | -- * distributed with this work for additional information 5 | -- * regarding copyright ownership. The ASF licenses this file 6 | -- * to you under the Apache License, Version 2.0 (the 7 | -- * "License"); you may not use this file except in compliance 8 | -- * with the License. You may obtain a copy of the License at 9 | -- * 10 | -- * http://www.apache.org/licenses/LICENSE-2.0 11 | -- * 12 | -- * Unless required by applicable law or agreed to in writing, software 13 | -- * distributed under the License is distributed on an "AS IS" BASIS, 14 | -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | -- * See the License for the specific language governing permissions and 16 | -- * limitations under the License. 17 | -- */ 18 | 19 | DROP TABLE IF EXISTS orders; 20 | DROP TABLE IF EXISTS orders_bak; 21 | 22 | CREATE TABLE orders ( 23 | `_id` STRING, 24 | `code` STRING, 25 | `quantity` BIGINT, 26 | PRIMARY KEY (_id) NOT ENFORCED 27 | ) WITH ( 28 | 'connector' = 'mongodb', 29 | 'uri' = 'mongodb://mongodb:27017', 30 | 'database' = 'test_upsert', 31 | 'collection' = 'orders' 32 | ); 33 | 34 | CREATE TABLE orders_bak ( 35 | `_id` STRING, 36 | `code` STRING, 37 | `quantity` BIGINT, 38 | PRIMARY KEY (_id) NOT ENFORCED 39 | ) WITH ( 40 | 'connector' = 'mongodb', 41 | 'uri' = 'mongodb://mongodb:27017', 42 | 'database' = 'test_upsert', 43 | 'collection' = 'orders_bak' 44 | ); 45 | 46 | INSERT OVERWRITE orders_bak SELECT * FROM orders; 47 | -------------------------------------------------------------------------------- /flink-connector-mongodb-e2e-tests/src/test/resources/log4j2-test.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | # Set root logger level to OFF to not flood build logs 20 | # set manually to INFO for debugging purposes 21 | rootLogger.level = OFF 22 | rootLogger.appenderRef.test.ref = TestLogger 23 | 24 | appender.testlogger.name = TestLogger 25 | appender.testlogger.type = CONSOLE 26 | appender.testlogger.target = SYSTEM_ERR 27 | appender.testlogger.layout.type = PatternLayout 28 | appender.testlogger.layout.pattern = %-4r [%t] %-5p %c %x - %m%n 29 | -------------------------------------------------------------------------------- /flink-connector-mongodb/archunit-violations/0339fa9f-980e-4b71-a187-dde3c2e4d22d: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/flink-connector-mongodb/34f14a6ce82ad350a84a9ddecea84317096f053a/flink-connector-mongodb/archunit-violations/0339fa9f-980e-4b71-a187-dde3c2e4d22d -------------------------------------------------------------------------------- /flink-connector-mongodb/archunit-violations/07a10766-610a-4f6d-9b7f-82d523e68bb6: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/flink-connector-mongodb/34f14a6ce82ad350a84a9ddecea84317096f053a/flink-connector-mongodb/archunit-violations/07a10766-610a-4f6d-9b7f-82d523e68bb6 -------------------------------------------------------------------------------- /flink-connector-mongodb/archunit-violations/54355260-ce16-4e0e-9768-00cb07fadf7e: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/flink-connector-mongodb/34f14a6ce82ad350a84a9ddecea84317096f053a/flink-connector-mongodb/archunit-violations/54355260-ce16-4e0e-9768-00cb07fadf7e -------------------------------------------------------------------------------- /flink-connector-mongodb/archunit-violations/ae2f06aa-8fb6-4585-a50c-ffe88f59697d: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/flink-connector-mongodb/34f14a6ce82ad350a84a9ddecea84317096f053a/flink-connector-mongodb/archunit-violations/ae2f06aa-8fb6-4585-a50c-ffe88f59697d -------------------------------------------------------------------------------- /flink-connector-mongodb/archunit-violations/bf400375-9b08-442c-80c1-051333988494: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/flink-connector-mongodb/34f14a6ce82ad350a84a9ddecea84317096f053a/flink-connector-mongodb/archunit-violations/bf400375-9b08-442c-80c1-051333988494 -------------------------------------------------------------------------------- /flink-connector-mongodb/archunit-violations/d5033bdc-beb6-4505-9fd1-ab50e6c4be20: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/flink-connector-mongodb/34f14a6ce82ad350a84a9ddecea84317096f053a/flink-connector-mongodb/archunit-violations/d5033bdc-beb6-4505-9fd1-ab50e6c4be20 -------------------------------------------------------------------------------- /flink-connector-mongodb/archunit-violations/ed03507c-2a0e-4dc2-b0c6-b90a8469bb7c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/flink-connector-mongodb/34f14a6ce82ad350a84a9ddecea84317096f053a/flink-connector-mongodb/archunit-violations/ed03507c-2a0e-4dc2-b0c6-b90a8469bb7c -------------------------------------------------------------------------------- /flink-connector-mongodb/archunit-violations/f871efde-12fe-46a5-9b10-11382284683f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/flink-connector-mongodb/34f14a6ce82ad350a84a9ddecea84317096f053a/flink-connector-mongodb/archunit-violations/f871efde-12fe-46a5-9b10-11382284683f -------------------------------------------------------------------------------- /flink-connector-mongodb/archunit-violations/stored.rules: -------------------------------------------------------------------------------- 1 | # 2 | #Wed Nov 02 20:18:11 CST 2022 3 | Production\ code\ must\ not\ call\ methods\ annotated\ with\ @VisibleForTesting=f871efde-12fe-46a5-9b10-11382284683f 4 | Options\ for\ connectors\ and\ formats\ should\ reside\ in\ a\ consistent\ package\ and\ be\ public\ API.=d5033bdc-beb6-4505-9fd1-ab50e6c4be20 5 | Tests\ inheriting\ from\ AbstractTestBase\ should\ have\ name\ ending\ with\ ITCase=07a10766-610a-4f6d-9b7f-82d523e68bb6 6 | Classes\ in\ API\ packages\ should\ have\ at\ least\ one\ API\ visibility\ annotation.=ae2f06aa-8fb6-4585-a50c-ffe88f59697d 7 | Return\ and\ argument\ types\ of\ methods\ annotated\ with\ @PublicEvolving\ must\ be\ annotated\ with\ @Public(Evolving).=bf400375-9b08-442c-80c1-051333988494 8 | Return\ and\ argument\ types\ of\ methods\ annotated\ with\ @Public\ must\ be\ annotated\ with\ @Public.=ed03507c-2a0e-4dc2-b0c6-b90a8469bb7c 9 | ITCASE\ tests\ should\ use\ a\ MiniCluster\ resource\ or\ extension=0339fa9f-980e-4b71-a187-dde3c2e4d22d 10 | Connector\ production\ code\ must\ not\ depend\ on\ non-public\ API\ outside\ of\ connector\ packages=54355260-ce16-4e0e-9768-00cb07fadf7e 11 | -------------------------------------------------------------------------------- /flink-connector-mongodb/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 21 | 22 | 4.0.0 23 | 24 | 25 | flink-connector-mongodb-parent 26 | org.apache.flink 27 | 2.1-SNAPSHOT 28 | 29 | 30 | flink-connector-mongodb 31 | Flink : Connectors : MongoDB 32 | 33 | jar 34 | 35 | 36 | 37 | 38 | --add-opens=java.base/java.util=ALL-UNNAMED 39 | 40 | --add-opens=java.base/java.lang=ALL-UNNAMED 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | org.apache.flink 49 | flink-connector-base 50 | ${flink.version} 51 | provided 52 | 53 | 54 | 55 | org.apache.flink 56 | flink-streaming-java 57 | ${flink.version} 58 | provided 59 | 60 | 61 | 62 | 63 | 64 | org.mongodb 65 | mongodb-driver-sync 66 | 67 | 68 | 69 | org.mongodb 70 | bson-record-codec 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | org.apache.flink 80 | flink-table-api-java-bridge 81 | ${flink.version} 82 | provided 83 | true 84 | 85 | 86 | 87 | 88 | 89 | org.testcontainers 90 | mongodb 91 | test 92 | 93 | 94 | 95 | org.apache.flink 96 | flink-test-utils 97 | ${flink.version} 98 | test 99 | 100 | 101 | 102 | org.apache.flink 103 | flink-connector-test-utils 104 | ${flink.version} 105 | test 106 | 107 | 108 | 109 | org.apache.flink 110 | flink-runtime 111 | ${flink.version} 112 | test-jar 113 | test 114 | 115 | 116 | 117 | org.apache.flink 118 | flink-streaming-java 119 | ${flink.version} 120 | test-jar 121 | test 122 | 123 | 124 | 125 | 126 | org.apache.flink 127 | flink-table-planner_${scala.binary.version} 128 | ${flink.version} 129 | test 130 | 131 | 132 | 133 | org.apache.flink 134 | flink-table-planner_${scala.binary.version} 135 | ${flink.version} 136 | test-jar 137 | test 138 | 139 | 140 | 141 | org.apache.flink 142 | flink-table-runtime 143 | ${flink.version} 144 | test 145 | 146 | 147 | 148 | org.apache.flink 149 | flink-table-common 150 | ${flink.version} 151 | test-jar 152 | test 153 | 154 | 155 | 156 | org.apache.flink 157 | flink-connector-base 158 | ${flink.version} 159 | test-jar 160 | test 161 | 162 | 163 | 164 | 165 | 166 | org.apache.flink 167 | flink-architecture-tests-test 168 | ${flink.version} 169 | test 170 | 171 | 172 | 173 | 174 | 175 | 176 | org.apache.maven.plugins 177 | maven-jar-plugin 178 | 179 | 180 | 181 | test-jar 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/common/config/MongoConnectionOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.common.config; 19 | 20 | import org.apache.flink.annotation.PublicEvolving; 21 | 22 | import com.mongodb.ConnectionString; 23 | 24 | import java.io.Serializable; 25 | import java.util.Objects; 26 | 27 | import static org.apache.flink.util.Preconditions.checkNotNull; 28 | 29 | /** The connection configuration class for MongoDB. */ 30 | @PublicEvolving 31 | public class MongoConnectionOptions implements Serializable { 32 | 33 | private final String uri; 34 | private final String database; 35 | private final String collection; 36 | 37 | private MongoConnectionOptions(String uri, String database, String collection) { 38 | this.uri = checkNotNull(uri); 39 | this.database = checkNotNull(database); 40 | this.collection = checkNotNull(collection); 41 | } 42 | 43 | public String getUri() { 44 | return uri; 45 | } 46 | 47 | public String getDatabase() { 48 | return database; 49 | } 50 | 51 | public String getCollection() { 52 | return collection; 53 | } 54 | 55 | @Override 56 | public boolean equals(Object o) { 57 | if (this == o) { 58 | return true; 59 | } 60 | if (o == null || getClass() != o.getClass()) { 61 | return false; 62 | } 63 | MongoConnectionOptions that = (MongoConnectionOptions) o; 64 | return Objects.equals(uri, that.uri) 65 | && Objects.equals(database, that.database) 66 | && Objects.equals(collection, that.collection); 67 | } 68 | 69 | @Override 70 | public int hashCode() { 71 | return Objects.hash(uri, database, collection); 72 | } 73 | 74 | public static MongoConnectionOptionsBuilder builder() { 75 | return new MongoConnectionOptionsBuilder(); 76 | } 77 | 78 | /** Builder for {@link MongoConnectionOptions}. */ 79 | @PublicEvolving 80 | public static class MongoConnectionOptionsBuilder { 81 | private String uri; 82 | private String database; 83 | private String collection; 84 | 85 | private MongoConnectionOptionsBuilder() {} 86 | 87 | /** 88 | * Sets the connection string of MongoDB. 89 | * 90 | * @param uri connection string of MongoDB 91 | * @return this builder 92 | */ 93 | public MongoConnectionOptionsBuilder setUri(String uri) { 94 | this.uri = new ConnectionString(uri).getConnectionString(); 95 | return this; 96 | } 97 | 98 | /** 99 | * Sets the database of MongoDB. 100 | * 101 | * @param database the database to sink of MongoDB. 102 | * @return this builder 103 | */ 104 | public MongoConnectionOptionsBuilder setDatabase(String database) { 105 | this.database = checkNotNull(database, "The database of MongoDB must not be null"); 106 | return this; 107 | } 108 | 109 | /** 110 | * Sets the collection of MongoDB. 111 | * 112 | * @param collection the collection to sink of MongoDB. 113 | * @return this builder 114 | */ 115 | public MongoConnectionOptionsBuilder setCollection(String collection) { 116 | this.collection = 117 | checkNotNull(collection, "The collection of MongoDB must not be null"); 118 | return this; 119 | } 120 | 121 | /** 122 | * Build the {@link MongoConnectionOptions}. 123 | * 124 | * @return a MongoConnectionOptions with the settings made for this builder. 125 | */ 126 | public MongoConnectionOptions build() { 127 | return new MongoConnectionOptions(uri, database, collection); 128 | } 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/common/utils/MongoConstants.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.common.utils; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | 22 | import org.bson.BsonDocument; 23 | import org.bson.BsonInt32; 24 | import org.bson.BsonMaxKey; 25 | import org.bson.BsonMinKey; 26 | import org.bson.BsonValue; 27 | import org.bson.json.JsonMode; 28 | import org.bson.json.JsonWriterSettings; 29 | 30 | /** Constants for MongoDB. */ 31 | @Internal 32 | public class MongoConstants { 33 | 34 | public static final String ID_FIELD = "_id"; 35 | 36 | public static final String ENCODE_VALUE_FIELD = "_value"; 37 | 38 | public static final String NAMESPACE_FIELD = "ns"; 39 | 40 | public static final String KEY_FIELD = "key"; 41 | 42 | public static final String MAX_FIELD = "max"; 43 | 44 | public static final String MIN_FIELD = "min"; 45 | 46 | public static final String UUID_FIELD = "uuid"; 47 | 48 | public static final String SPLIT_KEYS_FIELD = "splitKeys"; 49 | 50 | public static final String SHARD_FIELD = "shard"; 51 | 52 | public static final String SHARDED_FIELD = "sharded"; 53 | 54 | public static final String COUNT_FIELD = "count"; 55 | 56 | public static final String SIZE_FIELD = "size"; 57 | 58 | public static final String AVG_OBJ_SIZE_FIELD = "avgObjSize"; 59 | 60 | public static final String DROPPED_FIELD = "dropped"; 61 | 62 | public static final BsonValue BSON_MIN_KEY = new BsonMinKey(); 63 | 64 | public static final BsonValue BSON_MAX_KEY = new BsonMaxKey(); 65 | 66 | public static final BsonDocument ID_HINT = new BsonDocument(ID_FIELD, new BsonInt32(1)); 67 | 68 | public static final BsonDocument BSON_MIN_BOUNDARY = new BsonDocument(ID_FIELD, BSON_MIN_KEY); 69 | public static final BsonDocument BSON_MAX_BOUNDARY = new BsonDocument(ID_FIELD, BSON_MAX_KEY); 70 | 71 | public static final JsonWriterSettings DEFAULT_JSON_WRITER_SETTINGS = 72 | JsonWriterSettings.builder().outputMode(JsonMode.EXTENDED).build(); 73 | 74 | private MongoConstants() {} 75 | } 76 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/common/utils/MongoSerdeUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.common.utils; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.util.function.BiConsumerWithException; 22 | import org.apache.flink.util.function.FunctionWithException; 23 | 24 | import java.io.DataInputStream; 25 | import java.io.DataOutputStream; 26 | import java.io.IOException; 27 | import java.util.ArrayList; 28 | import java.util.HashMap; 29 | import java.util.List; 30 | import java.util.Map; 31 | 32 | /** A util class with some helper method for serde in the MongoDB source. */ 33 | @Internal 34 | public class MongoSerdeUtils { 35 | 36 | /** Private constructor for util class. */ 37 | private MongoSerdeUtils() {} 38 | 39 | public static void serializeList( 40 | DataOutputStream out, 41 | List list, 42 | BiConsumerWithException serializer) 43 | throws IOException { 44 | out.writeInt(list.size()); 45 | for (T t : list) { 46 | serializer.accept(out, t); 47 | } 48 | } 49 | 50 | public static List deserializeList( 51 | DataInputStream in, FunctionWithException deserializer) 52 | throws IOException { 53 | int size = in.readInt(); 54 | List list = new ArrayList<>(size); 55 | for (int i = 0; i < size; i++) { 56 | T t = deserializer.apply(in); 57 | list.add(t); 58 | } 59 | 60 | return list; 61 | } 62 | 63 | public static void serializeMap( 64 | DataOutputStream out, 65 | Map map, 66 | BiConsumerWithException keySerializer, 67 | BiConsumerWithException valueSerializer) 68 | throws IOException { 69 | out.writeInt(map.size()); 70 | for (Map.Entry entry : map.entrySet()) { 71 | keySerializer.accept(out, entry.getKey()); 72 | valueSerializer.accept(out, entry.getValue()); 73 | } 74 | } 75 | 76 | public static Map deserializeMap( 77 | DataInputStream in, 78 | FunctionWithException keyDeserializer, 79 | FunctionWithException valueDeserializer) 80 | throws IOException { 81 | int size = in.readInt(); 82 | Map result = new HashMap<>(size); 83 | for (int i = 0; i < size; i++) { 84 | K key = keyDeserializer.apply(in); 85 | V value = valueDeserializer.apply(in); 86 | result.put(key, value); 87 | } 88 | return result; 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/sink/MongoSink.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.sink; 19 | 20 | import org.apache.flink.annotation.PublicEvolving; 21 | import org.apache.flink.api.common.ExecutionConfig; 22 | import org.apache.flink.api.connector.sink2.Sink; 23 | import org.apache.flink.api.connector.sink2.SinkWriter; 24 | import org.apache.flink.api.java.ClosureCleaner; 25 | import org.apache.flink.connector.base.DeliveryGuarantee; 26 | import org.apache.flink.connector.mongodb.common.config.MongoConnectionOptions; 27 | import org.apache.flink.connector.mongodb.sink.config.MongoWriteOptions; 28 | import org.apache.flink.connector.mongodb.sink.writer.MongoWriter; 29 | import org.apache.flink.connector.mongodb.sink.writer.serializer.MongoSerializationSchema; 30 | 31 | import com.mongodb.client.model.WriteModel; 32 | 33 | import static org.apache.flink.util.Preconditions.checkNotNull; 34 | 35 | /** 36 | * Mongo sink converts each incoming element into MongoDB {@link WriteModel} (bulk write action) and 37 | * bulk writes to mongodb when the number of actions is greater than batchSize or the flush interval 38 | * is greater than batchIntervalMs. 39 | * 40 | *

The following example shows how to create a MongoSink sending records of {@code Document} 41 | * type. 42 | * 43 | *

{@code
44 |  * MongoSink sink = MongoSink.builder()
45 |  *     .setUri("mongodb://user:password@127.0.0.1:27017")
46 |  *     .setDatabase("db")
47 |  *     .setCollection("coll")
48 |  *     .setBatchSize(5)
49 |  *     .setSerializationSchema(
50 |  *         (doc, context) -> new InsertOneModel<>(doc.toBsonDocument()))
51 |  *     .build();
52 |  * }
53 | * 54 | * @param Type of the elements handled by this sink 55 | */ 56 | @PublicEvolving 57 | public class MongoSink implements Sink { 58 | 59 | private static final long serialVersionUID = 1L; 60 | 61 | private final MongoConnectionOptions connectionOptions; 62 | private final MongoWriteOptions writeOptions; 63 | private final MongoSerializationSchema serializationSchema; 64 | 65 | MongoSink( 66 | MongoConnectionOptions connectionOptions, 67 | MongoWriteOptions writeOptions, 68 | MongoSerializationSchema serializationSchema) { 69 | this.connectionOptions = checkNotNull(connectionOptions); 70 | this.writeOptions = checkNotNull(writeOptions); 71 | this.serializationSchema = checkNotNull(serializationSchema); 72 | ClosureCleaner.clean( 73 | serializationSchema, ExecutionConfig.ClosureCleanerLevel.RECURSIVE, true); 74 | } 75 | 76 | public static MongoSinkBuilder builder() { 77 | return new MongoSinkBuilder<>(); 78 | } 79 | 80 | @Override 81 | public SinkWriter createWriter(InitContext context) { 82 | return new MongoWriter<>( 83 | connectionOptions, 84 | writeOptions, 85 | writeOptions.getDeliveryGuarantee() == DeliveryGuarantee.AT_LEAST_ONCE, 86 | context, 87 | serializationSchema); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/sink/MongoSinkBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.sink; 19 | 20 | import org.apache.flink.annotation.PublicEvolving; 21 | import org.apache.flink.connector.base.DeliveryGuarantee; 22 | import org.apache.flink.connector.mongodb.common.config.MongoConnectionOptions; 23 | import org.apache.flink.connector.mongodb.sink.config.MongoWriteOptions; 24 | import org.apache.flink.connector.mongodb.sink.writer.serializer.MongoSerializationSchema; 25 | import org.apache.flink.util.InstantiationUtil; 26 | 27 | import com.mongodb.client.model.WriteModel; 28 | 29 | import static org.apache.flink.util.Preconditions.checkNotNull; 30 | import static org.apache.flink.util.Preconditions.checkState; 31 | 32 | /** 33 | * Base builder to construct a {@link MongoSink}. 34 | * 35 | * @param type of the records converted to MongoDB bulk request 36 | */ 37 | @PublicEvolving 38 | public class MongoSinkBuilder { 39 | 40 | private final MongoConnectionOptions.MongoConnectionOptionsBuilder connectionOptionsBuilder; 41 | private final MongoWriteOptions.MongoWriteOptionsBuilder writeOptionsBuilder; 42 | 43 | private MongoSerializationSchema serializationSchema; 44 | 45 | MongoSinkBuilder() { 46 | this.connectionOptionsBuilder = MongoConnectionOptions.builder(); 47 | this.writeOptionsBuilder = MongoWriteOptions.builder(); 48 | } 49 | 50 | /** 51 | * Sets the connection string of MongoDB. 52 | * 53 | * @param uri connection string of MongoDB 54 | * @return this builder 55 | */ 56 | public MongoSinkBuilder setUri(String uri) { 57 | connectionOptionsBuilder.setUri(uri); 58 | return this; 59 | } 60 | 61 | /** 62 | * Sets the database to sink of MongoDB. 63 | * 64 | * @param database the database to sink of MongoDB. 65 | * @return this builder 66 | */ 67 | public MongoSinkBuilder setDatabase(String database) { 68 | connectionOptionsBuilder.setDatabase(database); 69 | return this; 70 | } 71 | 72 | /** 73 | * Sets the collection to sink of MongoDB. 74 | * 75 | * @param collection the collection to sink of MongoDB. 76 | * @return this builder 77 | */ 78 | public MongoSinkBuilder setCollection(String collection) { 79 | connectionOptionsBuilder.setCollection(collection); 80 | return this; 81 | } 82 | 83 | /** 84 | * Sets the maximum number of actions to buffer for each batch request. You can pass -1 to 85 | * disable batching. 86 | * 87 | * @param batchSize the maximum number of actions to buffer for each batch request. 88 | * @return this builder 89 | */ 90 | public MongoSinkBuilder setBatchSize(int batchSize) { 91 | writeOptionsBuilder.setBatchSize(batchSize); 92 | return this; 93 | } 94 | 95 | /** 96 | * Sets the batch flush interval, in milliseconds. You can pass -1 to disable it. 97 | * 98 | * @param batchIntervalMs the batch flush interval, in milliseconds. 99 | * @return this builder 100 | */ 101 | public MongoSinkBuilder setBatchIntervalMs(long batchIntervalMs) { 102 | writeOptionsBuilder.setBatchIntervalMs(batchIntervalMs); 103 | return this; 104 | } 105 | 106 | /** 107 | * Sets the max retry times if writing records failed. 108 | * 109 | * @param maxRetries the max retry times. 110 | * @return this builder 111 | */ 112 | public MongoSinkBuilder setMaxRetries(int maxRetries) { 113 | writeOptionsBuilder.setMaxRetries(maxRetries); 114 | return this; 115 | } 116 | 117 | /** 118 | * Sets the wanted {@link DeliveryGuarantee}. The default delivery guarantee is {@link 119 | * DeliveryGuarantee#AT_LEAST_ONCE} 120 | * 121 | * @param deliveryGuarantee which describes the record emission behaviour 122 | * @return this builder 123 | */ 124 | public MongoSinkBuilder setDeliveryGuarantee(DeliveryGuarantee deliveryGuarantee) { 125 | writeOptionsBuilder.setDeliveryGuarantee(deliveryGuarantee); 126 | return this; 127 | } 128 | 129 | /** 130 | * Sets the serialization schema which is invoked on every record to convert it to MongoDB bulk 131 | * request. 132 | * 133 | * @param serializationSchema to process records into MongoDB bulk {@link WriteModel}. 134 | * @return this builder 135 | */ 136 | public MongoSinkBuilder setSerializationSchema( 137 | MongoSerializationSchema serializationSchema) { 138 | checkNotNull(serializationSchema); 139 | checkState( 140 | InstantiationUtil.isSerializable(serializationSchema), 141 | "The mongo serialization schema must be serializable."); 142 | this.serializationSchema = serializationSchema; 143 | return this; 144 | } 145 | 146 | /** 147 | * Constructs the {@link MongoSink} with the properties configured this builder. 148 | * 149 | * @return {@link MongoSink} 150 | */ 151 | public MongoSink build() { 152 | checkNotNull(serializationSchema, "The serialization schema must be supplied"); 153 | return new MongoSink<>( 154 | connectionOptionsBuilder.build(), writeOptionsBuilder.build(), serializationSchema); 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/sink/writer/context/DefaultMongoSinkContext.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.sink.writer.context; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.api.connector.sink2.Sink; 22 | import org.apache.flink.connector.mongodb.sink.config.MongoWriteOptions; 23 | 24 | /** Default {@link MongoSinkContext} implementation. */ 25 | @Internal 26 | public class DefaultMongoSinkContext implements MongoSinkContext { 27 | 28 | private final Sink.InitContext initContext; 29 | private final MongoWriteOptions writeOptions; 30 | 31 | public DefaultMongoSinkContext(Sink.InitContext initContext, MongoWriteOptions writeOptions) { 32 | this.initContext = initContext; 33 | this.writeOptions = writeOptions; 34 | } 35 | 36 | @Override 37 | public Sink.InitContext getInitContext() { 38 | return initContext; 39 | } 40 | 41 | @Override 42 | public long processTime() { 43 | return initContext.getProcessingTimeService().getCurrentProcessingTime(); 44 | } 45 | 46 | @Override 47 | public MongoWriteOptions getWriteOptions() { 48 | return writeOptions; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/sink/writer/context/MongoSinkContext.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.sink.writer.context; 19 | 20 | import org.apache.flink.annotation.PublicEvolving; 21 | import org.apache.flink.api.connector.sink2.Sink; 22 | import org.apache.flink.connector.mongodb.sink.config.MongoWriteOptions; 23 | import org.apache.flink.connector.mongodb.sink.writer.serializer.MongoSerializationSchema; 24 | 25 | /** This context provides information for {@link MongoSerializationSchema}. */ 26 | @PublicEvolving 27 | public interface MongoSinkContext { 28 | 29 | /** Returns the current sink's init context. */ 30 | Sink.InitContext getInitContext(); 31 | 32 | /** Returns the current process time in flink. */ 33 | long processTime(); 34 | 35 | /** Returns the write options of MongoSink. */ 36 | MongoWriteOptions getWriteOptions(); 37 | } 38 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/sink/writer/serializer/MongoSerializationSchema.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.sink.writer.serializer; 19 | 20 | import org.apache.flink.annotation.PublicEvolving; 21 | import org.apache.flink.api.common.serialization.SerializationSchema; 22 | import org.apache.flink.connector.mongodb.sink.config.MongoWriteOptions; 23 | import org.apache.flink.connector.mongodb.sink.writer.context.MongoSinkContext; 24 | 25 | import com.mongodb.client.model.WriteModel; 26 | import org.bson.BsonDocument; 27 | 28 | import java.io.Serializable; 29 | 30 | /** 31 | * The serialization schema for how to serialize records into MongoDB. 32 | * 33 | * @param The message type send to MongoDB. 34 | */ 35 | @PublicEvolving 36 | public interface MongoSerializationSchema extends Serializable { 37 | 38 | /** 39 | * Initialization method for the schema. It is called before the actual working methods {@link 40 | * #serialize(Object, MongoSinkContext)} and thus suitable for one-time setup work. 41 | * 42 | *

The provided {@link SerializationSchema.InitializationContext} can be used to access 43 | * additional features such as registering user metrics. 44 | * 45 | * @param initializationContext Contextual information that can be used during initialization. 46 | * @param sinkContext Runtime information i.e. partitions, subtaskId. 47 | * @param sinkConfiguration All the configure options for the MongoDB sink. 48 | */ 49 | default void open( 50 | SerializationSchema.InitializationContext initializationContext, 51 | MongoSinkContext sinkContext, 52 | MongoWriteOptions sinkConfiguration) 53 | throws Exception { 54 | // Nothing to do by default. 55 | } 56 | 57 | /** 58 | * Serializes the given element into {@link WriteModel}. 59 | * 60 | * @param element Element to be serialized. 61 | * @param sinkContext Context to provide extra information. 62 | */ 63 | WriteModel serialize(IN element, MongoSinkContext sinkContext); 64 | } 65 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/enumerator/MongoSourceEnumState.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.enumerator; 19 | 20 | import org.apache.flink.annotation.PublicEvolving; 21 | import org.apache.flink.connector.mongodb.source.enumerator.assigner.MongoSplitAssigner; 22 | import org.apache.flink.connector.mongodb.source.reader.split.MongoSourceSplitReader; 23 | import org.apache.flink.connector.mongodb.source.split.MongoScanSourceSplit; 24 | 25 | import java.util.ArrayList; 26 | import java.util.HashMap; 27 | import java.util.List; 28 | import java.util.Map; 29 | 30 | /** 31 | * The state class for MongoDB source enumerator, used for storing the split state. This class is 32 | * managed and controlled by {@link MongoSplitAssigner}. 33 | */ 34 | @PublicEvolving 35 | public class MongoSourceEnumState { 36 | 37 | /** The Mongo collections remaining. */ 38 | private final List remainingCollections; 39 | 40 | /** 41 | * The paths that are no longer in the enumerator checkpoint, but have been processed before. 42 | */ 43 | private final List alreadyProcessedCollections; 44 | 45 | /** The scan splits in the checkpoint. */ 46 | private final List remainingScanSplits; 47 | 48 | /** 49 | * The scan splits that the {@link MongoSourceEnumerator} has assigned to {@link 50 | * MongoSourceSplitReader}s. 51 | */ 52 | private final Map assignedScanSplits; 53 | 54 | /** The pipeline has been triggered and topic partitions have been assigned to readers. */ 55 | private final boolean initialized; 56 | 57 | public MongoSourceEnumState( 58 | List remainingCollections, 59 | List alreadyProcessedCollections, 60 | List remainingScanSplits, 61 | Map assignedScanSplits, 62 | boolean initialized) { 63 | this.remainingCollections = remainingCollections; 64 | this.alreadyProcessedCollections = alreadyProcessedCollections; 65 | this.remainingScanSplits = remainingScanSplits; 66 | this.assignedScanSplits = assignedScanSplits; 67 | this.initialized = initialized; 68 | } 69 | 70 | public List getRemainingCollections() { 71 | return remainingCollections; 72 | } 73 | 74 | public List getAlreadyProcessedCollections() { 75 | return alreadyProcessedCollections; 76 | } 77 | 78 | public List getRemainingScanSplits() { 79 | return remainingScanSplits; 80 | } 81 | 82 | public Map getAssignedScanSplits() { 83 | return assignedScanSplits; 84 | } 85 | 86 | public boolean isInitialized() { 87 | return initialized; 88 | } 89 | 90 | /** The initial assignment state for Mongo. */ 91 | public static MongoSourceEnumState initialState() { 92 | return new MongoSourceEnumState( 93 | new ArrayList<>(), new ArrayList<>(), new ArrayList<>(), new HashMap<>(), false); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/enumerator/MongoSourceEnumStateSerializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.enumerator; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.connector.mongodb.source.split.MongoScanSourceSplit; 22 | import org.apache.flink.connector.mongodb.source.split.MongoSourceSplitSerializer; 23 | import org.apache.flink.core.io.SimpleVersionedSerializer; 24 | 25 | import java.io.ByteArrayInputStream; 26 | import java.io.ByteArrayOutputStream; 27 | import java.io.DataInput; 28 | import java.io.DataInputStream; 29 | import java.io.DataOutputStream; 30 | import java.io.IOException; 31 | import java.util.List; 32 | import java.util.Map; 33 | 34 | import static org.apache.flink.connector.mongodb.common.utils.MongoSerdeUtils.deserializeList; 35 | import static org.apache.flink.connector.mongodb.common.utils.MongoSerdeUtils.deserializeMap; 36 | import static org.apache.flink.connector.mongodb.common.utils.MongoSerdeUtils.serializeList; 37 | import static org.apache.flink.connector.mongodb.common.utils.MongoSerdeUtils.serializeMap; 38 | import static org.apache.flink.connector.mongodb.source.split.MongoSourceSplitSerializer.SCAN_SPLIT_FLAG; 39 | 40 | /** The {@link SimpleVersionedSerializer Serializer} for the enumerator state of Mongo source. */ 41 | @Internal 42 | public class MongoSourceEnumStateSerializer 43 | implements SimpleVersionedSerializer { 44 | 45 | public static final MongoSourceEnumStateSerializer INSTANCE = 46 | new MongoSourceEnumStateSerializer(); 47 | 48 | private MongoSourceEnumStateSerializer() { 49 | // Singleton instance. 50 | } 51 | 52 | @Override 53 | public int getVersion() { 54 | // We use MongoSourceSplitSerializer's version because we use reuse this class. 55 | return MongoSourceSplitSerializer.CURRENT_VERSION; 56 | } 57 | 58 | @Override 59 | public byte[] serialize(MongoSourceEnumState state) throws IOException { 60 | try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); 61 | DataOutputStream out = new DataOutputStream(baos)) { 62 | serializeList(out, state.getRemainingCollections(), DataOutputStream::writeUTF); 63 | 64 | serializeList(out, state.getAlreadyProcessedCollections(), DataOutputStream::writeUTF); 65 | 66 | serializeList( 67 | out, 68 | state.getRemainingScanSplits(), 69 | MongoSourceSplitSerializer.INSTANCE::serializeMongoSplit); 70 | 71 | serializeMap( 72 | out, 73 | state.getAssignedScanSplits(), 74 | DataOutputStream::writeUTF, 75 | MongoSourceSplitSerializer.INSTANCE::serializeMongoSplit); 76 | 77 | out.writeBoolean(state.isInitialized()); 78 | 79 | out.flush(); 80 | return baos.toByteArray(); 81 | } 82 | } 83 | 84 | @Override 85 | public MongoSourceEnumState deserialize(int version, byte[] serialized) throws IOException { 86 | try (ByteArrayInputStream bais = new ByteArrayInputStream(serialized); 87 | DataInputStream in = new DataInputStream(bais)) { 88 | List remainingCollections = deserializeList(in, DataInput::readUTF); 89 | List alreadyProcessedCollections = deserializeList(in, DataInput::readUTF); 90 | List remainingScanSplits = 91 | deserializeList(in, i -> deserializeMongoScanSourceSplit(version, i)); 92 | 93 | Map assignedScanSplits = 94 | deserializeMap( 95 | in, 96 | DataInput::readUTF, 97 | i -> deserializeMongoScanSourceSplit(version, i)); 98 | 99 | boolean initialized = in.readBoolean(); 100 | 101 | return new MongoSourceEnumState( 102 | remainingCollections, 103 | alreadyProcessedCollections, 104 | remainingScanSplits, 105 | assignedScanSplits, 106 | initialized); 107 | } 108 | } 109 | 110 | private static MongoScanSourceSplit deserializeMongoScanSourceSplit( 111 | int version, DataInputStream in) throws IOException { 112 | int splitKind = in.readInt(); 113 | if (splitKind == SCAN_SPLIT_FLAG) { 114 | return MongoSourceSplitSerializer.INSTANCE.deserializeMongoScanSourceSplit(version, in); 115 | } 116 | throw new IOException("Split kind mismatch expect 1 but was " + splitKind); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/enumerator/MongoSourceEnumerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.enumerator; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.api.connector.source.Boundedness; 22 | import org.apache.flink.api.connector.source.SplitEnumerator; 23 | import org.apache.flink.api.connector.source.SplitEnumeratorContext; 24 | import org.apache.flink.connector.mongodb.source.MongoSource; 25 | import org.apache.flink.connector.mongodb.source.enumerator.assigner.MongoSplitAssigner; 26 | import org.apache.flink.connector.mongodb.source.split.MongoSourceSplit; 27 | 28 | import org.slf4j.Logger; 29 | import org.slf4j.LoggerFactory; 30 | 31 | import javax.annotation.Nullable; 32 | 33 | import java.io.IOException; 34 | import java.util.Iterator; 35 | import java.util.List; 36 | import java.util.Optional; 37 | import java.util.TreeSet; 38 | 39 | /** The enumerator class for {@link MongoSource}. */ 40 | @Internal 41 | public class MongoSourceEnumerator 42 | implements SplitEnumerator { 43 | 44 | private static final Logger LOG = LoggerFactory.getLogger(MongoSourceEnumerator.class); 45 | 46 | private final Boundedness boundedness; 47 | private final SplitEnumeratorContext context; 48 | private final MongoSplitAssigner splitAssigner; 49 | private final TreeSet readersAwaitingSplit; 50 | 51 | public MongoSourceEnumerator( 52 | Boundedness boundedness, 53 | SplitEnumeratorContext context, 54 | MongoSplitAssigner splitAssigner) { 55 | this.boundedness = boundedness; 56 | this.context = context; 57 | this.splitAssigner = splitAssigner; 58 | this.readersAwaitingSplit = new TreeSet<>(); 59 | } 60 | 61 | @Override 62 | public void start() { 63 | splitAssigner.open(); 64 | } 65 | 66 | @Override 67 | public void handleSplitRequest(int subtaskId, @Nullable String requesterHostname) { 68 | if (!context.registeredReaders().containsKey(subtaskId)) { 69 | // reader failed between sending the request and now. skip this request. 70 | return; 71 | } 72 | 73 | readersAwaitingSplit.add(subtaskId); 74 | assignSplits(); 75 | } 76 | 77 | @Override 78 | public void addSplitsBack(List splits, int subtaskId) { 79 | LOG.debug("Mongo Source Enumerator adds splits back: {}", splits); 80 | splitAssigner.addSplitsBack(splits); 81 | } 82 | 83 | @Override 84 | public void addReader(int subtaskId) { 85 | LOG.debug("Adding reader {} to MongoSourceEnumerator.", subtaskId); 86 | } 87 | 88 | private void assignSplits() { 89 | final Iterator awaitingReader = readersAwaitingSplit.iterator(); 90 | 91 | while (awaitingReader.hasNext()) { 92 | int nextAwaiting = awaitingReader.next(); 93 | // if the reader that requested another split has failed in the meantime, remove 94 | // it from the list of waiting readers 95 | if (!context.registeredReaders().containsKey(nextAwaiting)) { 96 | awaitingReader.remove(); 97 | continue; 98 | } 99 | 100 | // close idle readers 101 | if (splitAssigner.noMoreSplits() && boundedness == Boundedness.BOUNDED) { 102 | context.signalNoMoreSplits(nextAwaiting); 103 | awaitingReader.remove(); 104 | LOG.info( 105 | "All scan splits have been assigned, closing idle reader {}", nextAwaiting); 106 | continue; 107 | } 108 | 109 | Optional split = splitAssigner.getNext(); 110 | if (split.isPresent()) { 111 | final MongoSourceSplit mongoSplit = split.get(); 112 | context.assignSplit(mongoSplit, nextAwaiting); 113 | awaitingReader.remove(); 114 | LOG.info("Assign split {} to subtask {}", mongoSplit, nextAwaiting); 115 | break; 116 | } else { 117 | // there is no available splits by now, skip assigning 118 | break; 119 | } 120 | } 121 | } 122 | 123 | @Override 124 | public MongoSourceEnumState snapshotState(long checkpointId) { 125 | return splitAssigner.snapshotState(checkpointId); 126 | } 127 | 128 | @Override 129 | public void close() throws IOException { 130 | splitAssigner.close(); 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/enumerator/assigner/MongoScanSplitAssigner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.enumerator.assigner; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.connector.mongodb.common.config.MongoConnectionOptions; 22 | import org.apache.flink.connector.mongodb.source.config.MongoReadOptions; 23 | import org.apache.flink.connector.mongodb.source.enumerator.MongoSourceEnumState; 24 | import org.apache.flink.connector.mongodb.source.enumerator.splitter.MongoSplitters; 25 | import org.apache.flink.connector.mongodb.source.split.MongoScanSourceSplit; 26 | import org.apache.flink.connector.mongodb.source.split.MongoSourceSplit; 27 | 28 | import com.mongodb.MongoNamespace; 29 | import com.mongodb.client.MongoClient; 30 | import com.mongodb.client.MongoClients; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | import java.io.IOException; 35 | import java.util.Collection; 36 | import java.util.LinkedList; 37 | import java.util.List; 38 | import java.util.Map; 39 | import java.util.Optional; 40 | 41 | import static org.apache.flink.util.Preconditions.checkState; 42 | 43 | /** The split assigner for {@link MongoScanSourceSplit}. */ 44 | @Internal 45 | public class MongoScanSplitAssigner implements MongoSplitAssigner { 46 | 47 | private static final Logger LOG = LoggerFactory.getLogger(MongoScanSplitAssigner.class); 48 | 49 | private final MongoConnectionOptions connectionOptions; 50 | private final MongoReadOptions readOptions; 51 | 52 | private final LinkedList remainingCollections; 53 | private final List alreadyProcessedCollections; 54 | private final LinkedList remainingScanSplits; 55 | private final Map assignedScanSplits; 56 | private boolean initialized; 57 | 58 | private MongoClient mongoClient; 59 | 60 | public MongoScanSplitAssigner( 61 | MongoConnectionOptions connectionOptions, 62 | MongoReadOptions readOptions, 63 | MongoSourceEnumState sourceEnumState) { 64 | this.connectionOptions = connectionOptions; 65 | this.readOptions = readOptions; 66 | this.remainingCollections = new LinkedList<>(sourceEnumState.getRemainingCollections()); 67 | this.alreadyProcessedCollections = sourceEnumState.getAlreadyProcessedCollections(); 68 | this.remainingScanSplits = new LinkedList<>(sourceEnumState.getRemainingScanSplits()); 69 | this.assignedScanSplits = sourceEnumState.getAssignedScanSplits(); 70 | this.initialized = sourceEnumState.isInitialized(); 71 | } 72 | 73 | @Override 74 | public void open() { 75 | LOG.info("Mongo scan split assigner is opening."); 76 | if (!initialized) { 77 | String collectionId = 78 | String.format( 79 | "%s.%s", 80 | connectionOptions.getDatabase(), connectionOptions.getCollection()); 81 | remainingCollections.add(collectionId); 82 | mongoClient = MongoClients.create(connectionOptions.getUri()); 83 | initialized = true; 84 | } 85 | } 86 | 87 | @Override 88 | public Optional getNext() { 89 | if (!remainingScanSplits.isEmpty()) { 90 | // return remaining splits firstly 91 | MongoScanSourceSplit split = remainingScanSplits.poll(); 92 | assignedScanSplits.put(split.splitId(), split); 93 | return Optional.of(split); 94 | } else { 95 | // it's turn for next collection 96 | String nextCollection = remainingCollections.poll(); 97 | if (nextCollection != null) { 98 | // split the given collection into chunks (scan splits) 99 | Collection splits = 100 | MongoSplitters.split( 101 | mongoClient, readOptions, new MongoNamespace(nextCollection)); 102 | remainingScanSplits.addAll(splits); 103 | alreadyProcessedCollections.add(nextCollection); 104 | return getNext(); 105 | } else { 106 | return Optional.empty(); 107 | } 108 | } 109 | } 110 | 111 | @Override 112 | public void addSplitsBack(Collection splits) { 113 | for (MongoSourceSplit split : splits) { 114 | if (split instanceof MongoScanSourceSplit) { 115 | remainingScanSplits.add((MongoScanSourceSplit) split); 116 | // we should remove the add-backed splits from the assigned list, 117 | // because they are failed 118 | assignedScanSplits.remove(split.splitId()); 119 | } 120 | } 121 | } 122 | 123 | @Override 124 | public MongoSourceEnumState snapshotState(long checkpointId) { 125 | return new MongoSourceEnumState( 126 | remainingCollections, 127 | alreadyProcessedCollections, 128 | remainingScanSplits, 129 | assignedScanSplits, 130 | initialized); 131 | } 132 | 133 | @Override 134 | public boolean noMoreSplits() { 135 | checkState(initialized, "The noMoreSplits method was called but not initialized."); 136 | return remainingCollections.isEmpty() && remainingScanSplits.isEmpty(); 137 | } 138 | 139 | @Override 140 | public void close() throws IOException { 141 | if (mongoClient != null) { 142 | mongoClient.close(); 143 | LOG.info("Mongo scan split assigner is closed."); 144 | } 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/enumerator/assigner/MongoSplitAssigner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.enumerator.assigner; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.connector.mongodb.source.enumerator.MongoSourceEnumState; 22 | import org.apache.flink.connector.mongodb.source.enumerator.MongoSourceEnumerator; 23 | import org.apache.flink.connector.mongodb.source.reader.split.MongoSourceSplitReader; 24 | import org.apache.flink.connector.mongodb.source.split.MongoSourceSplit; 25 | 26 | import java.io.IOException; 27 | import java.util.Collection; 28 | import java.util.Optional; 29 | 30 | /** The split assigner for {@link MongoSourceSplit}. */ 31 | @Internal 32 | public interface MongoSplitAssigner { 33 | 34 | /** 35 | * Called to open the assigner to acquire any resources, like threads or network connections. 36 | */ 37 | void open(); 38 | 39 | /** 40 | * Called to close the assigner, in case it holds on to any resources, like threads or network 41 | * connections. 42 | */ 43 | void close() throws IOException; 44 | 45 | /** 46 | * Gets the next split to assign to {@link MongoSourceSplitReader} when {@link 47 | * MongoSourceEnumerator} receives a split request, until there are {@link #noMoreSplits()}. 48 | */ 49 | Optional getNext(); 50 | 51 | /** 52 | * Adds a set of splits to this assigner. This happens for example when some split processing 53 | * failed and the splits need to be re-added. 54 | */ 55 | void addSplitsBack(Collection splits); 56 | 57 | /** Snapshot the current assign state into checkpoint. */ 58 | MongoSourceEnumState snapshotState(long checkpointId); 59 | 60 | /** Return whether there are no more splits. */ 61 | boolean noMoreSplits(); 62 | } 63 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/enumerator/splitter/MongoPaginationSplitter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.enumerator.splitter; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.connector.mongodb.source.config.MongoReadOptions; 22 | import org.apache.flink.connector.mongodb.source.split.MongoScanSourceSplit; 23 | 24 | import com.mongodb.MongoNamespace; 25 | import com.mongodb.client.model.Aggregates; 26 | import com.mongodb.client.model.Projections; 27 | import com.mongodb.client.model.Sorts; 28 | import org.bson.BsonDocument; 29 | import org.bson.conversions.Bson; 30 | import org.slf4j.Logger; 31 | import org.slf4j.LoggerFactory; 32 | 33 | import java.util.ArrayList; 34 | import java.util.Collection; 35 | import java.util.List; 36 | 37 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.BSON_MAX_BOUNDARY; 38 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.BSON_MIN_BOUNDARY; 39 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.ID_FIELD; 40 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.ID_HINT; 41 | 42 | /** Mongo Splitter that splits MongoDB collection evenly by record counts. */ 43 | @Internal 44 | public class MongoPaginationSplitter { 45 | 46 | private static final Logger LOG = LoggerFactory.getLogger(MongoPaginationSplitter.class); 47 | 48 | public static Collection split(MongoSplitContext splitContext) { 49 | MongoReadOptions readOptions = splitContext.getReadOptions(); 50 | MongoNamespace namespace = splitContext.getMongoNamespace(); 51 | 52 | // If partition record size isn't present, we'll use the partition size option and average 53 | // object size to calculate number of records in each partitioned split. 54 | Integer partitionRecordSize = readOptions.getPartitionRecordSize(); 55 | if (partitionRecordSize == null) { 56 | long avgObjSizeInBytes = splitContext.getAvgObjSize(); 57 | if (avgObjSizeInBytes == 0) { 58 | LOG.info( 59 | "{} seems to be an empty collection, Returning a single partition.", 60 | namespace); 61 | return MongoSingleSplitter.split(splitContext); 62 | } 63 | 64 | partitionRecordSize = 65 | Math.toIntExact(readOptions.getPartitionSize().getBytes() / avgObjSizeInBytes); 66 | } 67 | 68 | long totalNumOfDocuments = splitContext.getCount(); 69 | 70 | if (partitionRecordSize >= totalNumOfDocuments) { 71 | LOG.info( 72 | "Fewer documents ({}) than the number of documents per partition ({}), Returning a single partition.", 73 | totalNumOfDocuments, 74 | partitionRecordSize); 75 | return MongoSingleSplitter.split(splitContext); 76 | } 77 | 78 | int numberOfPartitions = 79 | (int) (Math.ceil(totalNumOfDocuments / (double) partitionRecordSize)); 80 | 81 | BsonDocument lastUpperBound = null; 82 | List paginatedSplits = new ArrayList<>(); 83 | 84 | for (int splitNum = 0; splitNum < numberOfPartitions; splitNum++) { 85 | List pipeline = new ArrayList<>(); 86 | 87 | pipeline.add(Aggregates.project(Projections.include(ID_FIELD))); 88 | pipeline.add(Aggregates.project(Sorts.ascending(ID_FIELD))); 89 | 90 | // We don't have to set the upper bounds limit if we're generating the first split. 91 | if (lastUpperBound != null) { 92 | BsonDocument matchFilter = new BsonDocument(); 93 | if (lastUpperBound.containsKey(ID_FIELD)) { 94 | matchFilter.put( 95 | ID_FIELD, new BsonDocument("$gte", lastUpperBound.get(ID_FIELD))); 96 | } 97 | pipeline.add(Aggregates.match(matchFilter)); 98 | } 99 | pipeline.add(Aggregates.skip(partitionRecordSize)); 100 | pipeline.add(Aggregates.limit(1)); 101 | 102 | BsonDocument currentUpperBound = 103 | splitContext 104 | .getMongoCollection() 105 | .aggregate(pipeline) 106 | .allowDiskUse(true) 107 | .first(); 108 | 109 | paginatedSplits.add( 110 | new MongoScanSourceSplit( 111 | String.format("%s_%d", namespace, splitNum), 112 | namespace.getDatabaseName(), 113 | namespace.getCollectionName(), 114 | lastUpperBound != null ? lastUpperBound : BSON_MIN_BOUNDARY, 115 | currentUpperBound != null ? currentUpperBound : BSON_MAX_BOUNDARY, 116 | ID_HINT)); 117 | 118 | if (currentUpperBound == null) { 119 | break; 120 | } 121 | lastUpperBound = currentUpperBound; 122 | } 123 | 124 | return paginatedSplits; 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/enumerator/splitter/MongoShardedSplitter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.enumerator.splitter; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.connector.mongodb.source.split.MongoScanSourceSplit; 22 | import org.apache.flink.util.FlinkRuntimeException; 23 | 24 | import com.mongodb.MongoException; 25 | import com.mongodb.MongoNamespace; 26 | import com.mongodb.client.MongoClient; 27 | import org.bson.BsonDocument; 28 | import org.slf4j.Logger; 29 | import org.slf4j.LoggerFactory; 30 | 31 | import java.util.ArrayList; 32 | import java.util.Collection; 33 | import java.util.List; 34 | import java.util.Optional; 35 | 36 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.KEY_FIELD; 37 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.MAX_FIELD; 38 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.MIN_FIELD; 39 | import static org.apache.flink.connector.mongodb.common.utils.MongoUtils.isShardedCollectionDropped; 40 | import static org.apache.flink.connector.mongodb.common.utils.MongoUtils.readChunks; 41 | import static org.apache.flink.connector.mongodb.common.utils.MongoUtils.readCollectionMetadata; 42 | 43 | /** 44 | * Sharded Partitioner 45 | * 46 | *

Uses the chunks collection and partitions the collection based on the sharded collections 47 | * chunk ranges. 48 | * 49 | *

The following config collections' read privilege is required. 50 | * 51 | *

    52 | *
  • config.collections 53 | *
  • config.chunks 54 | *
55 | */ 56 | @Internal 57 | public class MongoShardedSplitter { 58 | 59 | private static final Logger LOG = LoggerFactory.getLogger(MongoShardedSplitter.class); 60 | 61 | private MongoShardedSplitter() {} 62 | 63 | public static Collection split(MongoSplitContext splitContext) { 64 | MongoNamespace namespace = splitContext.getMongoNamespace(); 65 | MongoClient mongoClient = splitContext.getMongoClient(); 66 | 67 | List chunks; 68 | Optional collectionMetadata; 69 | try { 70 | collectionMetadata = readCollectionMetadata(mongoClient, namespace); 71 | if (!collectionMetadata.isPresent()) { 72 | LOG.error( 73 | "Do sharded split failed, collection {} does not appear to be sharded.", 74 | namespace); 75 | throw new FlinkRuntimeException( 76 | String.format( 77 | "Do sharded split failed, %s is not a sharded collection.", 78 | namespace)); 79 | } 80 | 81 | if (isShardedCollectionDropped(collectionMetadata.get())) { 82 | LOG.error("Do sharded split failed, collection {} was dropped.", namespace); 83 | throw new FlinkRuntimeException( 84 | String.format("Do sharded split failed, %s was dropped.", namespace)); 85 | } 86 | 87 | chunks = readChunks(mongoClient, collectionMetadata.get()); 88 | if (chunks.isEmpty()) { 89 | LOG.error("Do sharded split failed, chunks of {} is empty.", namespace); 90 | throw new FlinkRuntimeException( 91 | String.format( 92 | "Do sharded split failed, chunks of %s is empty.", namespace)); 93 | } 94 | } catch (MongoException e) { 95 | LOG.error( 96 | "Read chunks from {} failed with error message: {}", namespace, e.getMessage()); 97 | throw new FlinkRuntimeException(e); 98 | } 99 | 100 | List sourceSplits = new ArrayList<>(chunks.size()); 101 | for (int i = 0; i < chunks.size(); i++) { 102 | BsonDocument chunk = chunks.get(i); 103 | sourceSplits.add( 104 | new MongoScanSourceSplit( 105 | String.format("%s_%d", namespace, i), 106 | namespace.getDatabaseName(), 107 | namespace.getCollectionName(), 108 | chunk.getDocument(MIN_FIELD), 109 | chunk.getDocument(MAX_FIELD), 110 | collectionMetadata.get().getDocument(KEY_FIELD))); 111 | } 112 | 113 | return sourceSplits; 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/enumerator/splitter/MongoSingleSplitter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.enumerator.splitter; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.connector.mongodb.source.split.MongoScanSourceSplit; 22 | 23 | import java.util.Collection; 24 | 25 | import static java.util.Collections.singletonList; 26 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.BSON_MAX_BOUNDARY; 27 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.BSON_MIN_BOUNDARY; 28 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.ID_HINT; 29 | 30 | /** Mongo Splitter that splits MongoDB collection as a single split. */ 31 | @Internal 32 | public class MongoSingleSplitter { 33 | 34 | private MongoSingleSplitter() {} 35 | 36 | public static Collection split(MongoSplitContext splitContext) { 37 | MongoScanSourceSplit singleSplit = 38 | new MongoScanSourceSplit( 39 | splitContext.getMongoNamespace().getFullName(), 40 | splitContext.getDatabaseName(), 41 | splitContext.getCollectionName(), 42 | BSON_MIN_BOUNDARY, 43 | BSON_MAX_BOUNDARY, 44 | ID_HINT); 45 | 46 | return singletonList(singleSplit); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/enumerator/splitter/MongoSplitContext.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.connector.mongodb.source.enumerator.splitter; 20 | 21 | import org.apache.flink.annotation.Internal; 22 | import org.apache.flink.connector.mongodb.source.config.MongoReadOptions; 23 | 24 | import com.mongodb.MongoNamespace; 25 | import com.mongodb.client.MongoClient; 26 | import com.mongodb.client.MongoCollection; 27 | import org.bson.BsonBoolean; 28 | import org.bson.BsonDocument; 29 | import org.bson.BsonInt64; 30 | 31 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.AVG_OBJ_SIZE_FIELD; 32 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.COUNT_FIELD; 33 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.SHARDED_FIELD; 34 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.SIZE_FIELD; 35 | 36 | /** 37 | * The split context used by {@link MongoSplitters} to split collection into a set of chunks for 38 | * MongoDB data source. 39 | */ 40 | @Internal 41 | public class MongoSplitContext { 42 | 43 | /** Read options of MongoDB. */ 44 | private final MongoReadOptions readOptions; 45 | 46 | /** Client of MongoDB. */ 47 | private final MongoClient mongoClient; 48 | 49 | /** Namespace of MongoDB, eg. db.coll. */ 50 | private final MongoNamespace namespace; 51 | 52 | /** Is a sharded collection. */ 53 | private final boolean sharded; 54 | 55 | /** The number of objects or documents in this collection. */ 56 | private final long count; 57 | 58 | /** The total uncompressed size(bytes) in memory of all records in a collection. */ 59 | private final long size; 60 | 61 | /** The average size(bytes) of an object in the collection. */ 62 | private final long avgObjSize; 63 | 64 | public MongoSplitContext( 65 | MongoReadOptions readOptions, 66 | MongoClient mongoClient, 67 | MongoNamespace namespace, 68 | boolean sharded, 69 | long count, 70 | long size, 71 | long avgObjSize) { 72 | this.readOptions = readOptions; 73 | this.mongoClient = mongoClient; 74 | this.namespace = namespace; 75 | this.sharded = sharded; 76 | this.count = count; 77 | this.size = size; 78 | this.avgObjSize = avgObjSize; 79 | } 80 | 81 | public static MongoSplitContext of( 82 | MongoReadOptions readOptions, 83 | MongoClient mongoClient, 84 | MongoNamespace namespace, 85 | BsonDocument collStats) { 86 | return new MongoSplitContext( 87 | readOptions, 88 | mongoClient, 89 | namespace, 90 | collStats.getBoolean(SHARDED_FIELD, BsonBoolean.FALSE).getValue(), 91 | collStats.getNumber(COUNT_FIELD, new BsonInt64(0)).longValue(), 92 | collStats.getNumber(SIZE_FIELD, new BsonInt64(0)).longValue(), 93 | collStats.getNumber(AVG_OBJ_SIZE_FIELD, new BsonInt64(0)).longValue()); 94 | } 95 | 96 | public MongoClient getMongoClient() { 97 | return mongoClient; 98 | } 99 | 100 | public MongoReadOptions getReadOptions() { 101 | return readOptions; 102 | } 103 | 104 | public String getDatabaseName() { 105 | return namespace.getDatabaseName(); 106 | } 107 | 108 | public String getCollectionName() { 109 | return namespace.getCollectionName(); 110 | } 111 | 112 | public MongoNamespace getMongoNamespace() { 113 | return namespace; 114 | } 115 | 116 | public MongoCollection getMongoCollection() { 117 | return mongoClient 118 | .getDatabase(namespace.getDatabaseName()) 119 | .getCollection(namespace.getCollectionName()) 120 | .withDocumentClass(BsonDocument.class); 121 | } 122 | 123 | public boolean isSharded() { 124 | return sharded; 125 | } 126 | 127 | public long getCount() { 128 | return count; 129 | } 130 | 131 | public long getSize() { 132 | return size; 133 | } 134 | 135 | public long getAvgObjSize() { 136 | return avgObjSize; 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/enumerator/splitter/MongoSplitVectorSplitter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.enumerator.splitter; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.configuration.MemorySize; 22 | import org.apache.flink.connector.mongodb.source.config.MongoReadOptions; 23 | import org.apache.flink.connector.mongodb.source.split.MongoScanSourceSplit; 24 | import org.apache.flink.util.FlinkRuntimeException; 25 | 26 | import com.mongodb.MongoException; 27 | import com.mongodb.MongoNamespace; 28 | import com.mongodb.client.MongoClient; 29 | import org.apache.commons.collections.CollectionUtils; 30 | import org.bson.BsonArray; 31 | import org.bson.BsonDocument; 32 | import org.bson.BsonInt32; 33 | import org.bson.BsonValue; 34 | import org.slf4j.Logger; 35 | import org.slf4j.LoggerFactory; 36 | 37 | import java.util.ArrayList; 38 | import java.util.Collection; 39 | import java.util.List; 40 | 41 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.BSON_MAX_BOUNDARY; 42 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.BSON_MIN_KEY; 43 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.ID_FIELD; 44 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.ID_HINT; 45 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.SPLIT_KEYS_FIELD; 46 | import static org.apache.flink.connector.mongodb.common.utils.MongoUtils.splitVector; 47 | 48 | /** 49 | * SplitVector Partitioner 50 | * 51 | *

Uses the SplitVector command to generate chunks for a collection. eg. 52 | * db.runCommand({splitVector:"inventory.products", keyPattern:{_id:1}, maxChunkSize:64}) 53 | * 54 | *

Requires splitVector privilege. 55 | */ 56 | @Internal 57 | public class MongoSplitVectorSplitter { 58 | 59 | private static final Logger LOG = LoggerFactory.getLogger(MongoSplitVectorSplitter.class); 60 | 61 | private MongoSplitVectorSplitter() {} 62 | 63 | public static Collection split(MongoSplitContext splitContext) { 64 | if (splitContext.isSharded()) { 65 | throw new FlinkRuntimeException("splitVector does not apply to sharded collections."); 66 | } 67 | 68 | MongoClient mongoClient = splitContext.getMongoClient(); 69 | MongoNamespace namespace = splitContext.getMongoNamespace(); 70 | MongoReadOptions readOptions = splitContext.getReadOptions(); 71 | 72 | MemorySize chunkSize = readOptions.getPartitionSize(); 73 | // if partition size < 1mb, use 1 mb as chunk size. 74 | int maxChunkSizeMB = Math.max(chunkSize.getMebiBytes(), 1); 75 | 76 | BsonDocument keyPattern = new BsonDocument(ID_FIELD, new BsonInt32(1)); 77 | 78 | BsonDocument splitResult; 79 | try { 80 | splitResult = splitVector(mongoClient, namespace, keyPattern, maxChunkSizeMB); 81 | } catch (MongoException e) { 82 | LOG.error("Execute splitVector command failed : {}", e.getMessage()); 83 | throw new FlinkRuntimeException(e); 84 | } 85 | 86 | BsonArray splitKeys = splitResult.getArray(SPLIT_KEYS_FIELD); 87 | if (CollectionUtils.isEmpty(splitKeys)) { 88 | // documents size is less than chunk size, treat the entire collection as single chunk. 89 | return MongoSingleSplitter.split(splitContext); 90 | } 91 | 92 | // Complete right bound: (lastKey, maxKey) 93 | splitKeys.add(BSON_MAX_BOUNDARY); 94 | 95 | List sourceSplits = new ArrayList<>(splitKeys.size()); 96 | 97 | BsonValue lowerValue = BSON_MIN_KEY; 98 | for (int i = 0; i < splitKeys.size(); i++) { 99 | BsonValue splitKeyValue = splitKeys.get(i).asDocument().get(ID_FIELD); 100 | sourceSplits.add( 101 | new MongoScanSourceSplit( 102 | String.format("%s_%d", namespace, i), 103 | namespace.getDatabaseName(), 104 | namespace.getCollectionName(), 105 | new BsonDocument(ID_FIELD, lowerValue), 106 | new BsonDocument(ID_FIELD, splitKeyValue), 107 | ID_HINT)); 108 | lowerValue = splitKeyValue; 109 | } 110 | 111 | return sourceSplits; 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/enumerator/splitter/MongoSplitters.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.connector.mongodb.source.enumerator.splitter; 20 | 21 | import org.apache.flink.annotation.Internal; 22 | import org.apache.flink.connector.mongodb.common.utils.MongoUtils; 23 | import org.apache.flink.connector.mongodb.source.config.MongoReadOptions; 24 | import org.apache.flink.connector.mongodb.source.split.MongoScanSourceSplit; 25 | import org.apache.flink.connector.mongodb.source.split.MongoSourceSplit; 26 | import org.apache.flink.util.FlinkRuntimeException; 27 | 28 | import com.mongodb.MongoException; 29 | import com.mongodb.MongoNamespace; 30 | import com.mongodb.client.MongoClient; 31 | import org.bson.BsonDocument; 32 | import org.slf4j.Logger; 33 | import org.slf4j.LoggerFactory; 34 | 35 | import java.util.Collection; 36 | 37 | /** To split collections of MongoDB to {@link MongoSourceSplit}s. */ 38 | @Internal 39 | public class MongoSplitters { 40 | 41 | private static final Logger LOG = LoggerFactory.getLogger(MongoSplitters.class); 42 | 43 | private MongoSplitters() {} 44 | 45 | public static Collection split( 46 | MongoClient mongoClient, MongoReadOptions readOptions, MongoNamespace namespace) { 47 | BsonDocument collStats; 48 | try { 49 | collStats = MongoUtils.collStats(mongoClient, namespace); 50 | } catch (MongoException e) { 51 | LOG.error("Execute collStats command failed, with error message: {}", e.getMessage()); 52 | throw new FlinkRuntimeException(e); 53 | } 54 | 55 | MongoSplitContext splitContext = 56 | MongoSplitContext.of(readOptions, mongoClient, namespace, collStats); 57 | 58 | switch (readOptions.getPartitionStrategy()) { 59 | case SINGLE: 60 | return MongoSingleSplitter.split(splitContext); 61 | case SAMPLE: 62 | return MongoSampleSplitter.split(splitContext); 63 | case SPLIT_VECTOR: 64 | return MongoSplitVectorSplitter.split(splitContext); 65 | case SHARDED: 66 | return MongoShardedSplitter.split(splitContext); 67 | case PAGINATION: 68 | return MongoPaginationSplitter.split(splitContext); 69 | case DEFAULT: 70 | default: 71 | return splitContext.isSharded() 72 | ? MongoShardedSplitter.split(splitContext) 73 | : MongoSplitVectorSplitter.split(splitContext); 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/enumerator/splitter/PartitionStrategy.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.enumerator.splitter; 19 | 20 | import org.apache.flink.annotation.PublicEvolving; 21 | import org.apache.flink.configuration.DescribedEnum; 22 | import org.apache.flink.configuration.description.InlineElement; 23 | 24 | import static org.apache.flink.configuration.description.TextElement.text; 25 | 26 | /** 27 | * Partition strategies that can be chosen. Available strategies are single, sample, split-vector, 28 | * sharded and default. 29 | * 30 | *

    31 | *
  • single: treats the entire collection as a single partition. 32 | *
  • sample: samples the collection and generate partitions which is fast but possibly uneven. 33 | *
  • split-vector: uses the splitVector command to generate partitions for non-sharded 34 | * collections which is fast and even. The splitVector permission is required. 35 | *
  • sharded: reads config.chunks (MongoDB splits a sharded collection into chunks, and the 36 | * range of the chunks are stored within the collection) as the partitions directly. The 37 | * sharded strategy only used for sharded collection which is fast and even. Read permission 38 | * of config database is required. 39 | *
  • default: uses sharded strategy for sharded collections otherwise using split vector 40 | * strategy. 41 | *
42 | */ 43 | @PublicEvolving 44 | public enum PartitionStrategy implements DescribedEnum { 45 | SINGLE("single", text("Do not split, treat a collection as a single chunk.")), 46 | 47 | SAMPLE("sample", text("Randomly sample the collection, then splits to multiple chunks.")), 48 | 49 | SPLIT_VECTOR( 50 | "split-vector", 51 | text("Uses the SplitVector command to generate chunks for non-sharded collections.")), 52 | 53 | SHARDED( 54 | "sharded", 55 | text( 56 | "Read the chunk ranges from config.chunks collection and splits to multiple chunks. Only support sharded collections.")), 57 | 58 | PAGINATION( 59 | "pagination", 60 | text( 61 | "Creating chunk records evenly by count. Each chunk will have exactly the same number of records.")), 62 | 63 | DEFAULT( 64 | "default", 65 | text( 66 | "Using sharded strategy for sharded collections" 67 | + " otherwise using split vector strategy.")); 68 | 69 | private final String name; 70 | private final InlineElement description; 71 | 72 | PartitionStrategy(String name, InlineElement description) { 73 | this.name = name; 74 | this.description = description; 75 | } 76 | 77 | @Override 78 | public InlineElement getDescription() { 79 | return description; 80 | } 81 | 82 | @Override 83 | public String toString() { 84 | return name; 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/reader/MongoSourceReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.reader; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.connector.base.source.reader.RecordEmitter; 22 | import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds; 23 | import org.apache.flink.connector.base.source.reader.SingleThreadMultiplexSourceReaderBase; 24 | import org.apache.flink.connector.base.source.reader.fetcher.SingleThreadFetcherManager; 25 | import org.apache.flink.connector.base.source.reader.splitreader.SplitReader; 26 | import org.apache.flink.connector.base.source.reader.synchronization.FutureCompletingBlockingQueue; 27 | import org.apache.flink.connector.mongodb.source.split.MongoScanSourceSplit; 28 | import org.apache.flink.connector.mongodb.source.split.MongoScanSourceSplitState; 29 | import org.apache.flink.connector.mongodb.source.split.MongoSourceSplit; 30 | import org.apache.flink.connector.mongodb.source.split.MongoSourceSplitState; 31 | 32 | import org.bson.BsonDocument; 33 | import org.slf4j.Logger; 34 | import org.slf4j.LoggerFactory; 35 | 36 | import java.util.Map; 37 | import java.util.function.Supplier; 38 | 39 | /** 40 | * The common mongo source reader for both ordered & unordered message consuming. 41 | * 42 | * @param The output message type for flink. 43 | */ 44 | @Internal 45 | public class MongoSourceReader 46 | extends SingleThreadMultiplexSourceReaderBase< 47 | BsonDocument, OUT, MongoSourceSplit, MongoSourceSplitState> { 48 | 49 | private static final Logger LOG = LoggerFactory.getLogger(MongoSourceReader.class); 50 | 51 | public MongoSourceReader( 52 | FutureCompletingBlockingQueue> elementQueue, 53 | Supplier> splitReaderSupplier, 54 | RecordEmitter recordEmitter, 55 | MongoSourceReaderContext readerContext) { 56 | super( 57 | elementQueue, 58 | new SingleThreadFetcherManager<>(elementQueue, splitReaderSupplier), 59 | recordEmitter, 60 | readerContext.getConfiguration(), 61 | readerContext); 62 | } 63 | 64 | @Override 65 | public void start() { 66 | if (getNumberOfCurrentlyAssignedSplits() == 0) { 67 | context.sendSplitRequest(); 68 | } 69 | } 70 | 71 | @Override 72 | protected void onSplitFinished(Map finishedSplitIds) { 73 | for (MongoSourceSplitState splitState : finishedSplitIds.values()) { 74 | MongoSourceSplit sourceSplit = splitState.toMongoSourceSplit(); 75 | LOG.info("Split {} is finished.", sourceSplit.splitId()); 76 | } 77 | context.sendSplitRequest(); 78 | } 79 | 80 | @Override 81 | protected MongoSourceSplitState initializedState(MongoSourceSplit split) { 82 | if (split instanceof MongoScanSourceSplit) { 83 | return new MongoScanSourceSplitState((MongoScanSourceSplit) split); 84 | } else { 85 | throw new IllegalArgumentException("Unknown split type."); 86 | } 87 | } 88 | 89 | @Override 90 | protected MongoSourceSplit toSplitType(String splitId, MongoSourceSplitState splitState) { 91 | return splitState.toMongoSourceSplit(); 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/reader/MongoSourceReaderContext.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.reader; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.api.connector.source.SourceEvent; 22 | import org.apache.flink.api.connector.source.SourceReaderContext; 23 | import org.apache.flink.configuration.Configuration; 24 | import org.apache.flink.connector.mongodb.source.reader.split.MongoScanSourceSplitReader; 25 | import org.apache.flink.metrics.groups.SourceReaderMetricGroup; 26 | import org.apache.flink.util.UserCodeClassLoader; 27 | 28 | import java.util.concurrent.atomic.AtomicInteger; 29 | 30 | /** 31 | * A wrapper class that wraps {@link SourceReaderContext} for sharing message between {@link 32 | * MongoSourceReader} and {@link MongoScanSourceSplitReader}. 33 | */ 34 | @Internal 35 | public class MongoSourceReaderContext implements SourceReaderContext { 36 | 37 | private final SourceReaderContext readerContext; 38 | private final AtomicInteger readCount = new AtomicInteger(0); 39 | private final int limit; 40 | 41 | public MongoSourceReaderContext(SourceReaderContext readerContext, int limit) { 42 | this.readerContext = readerContext; 43 | this.limit = limit; 44 | } 45 | 46 | @Override 47 | public SourceReaderMetricGroup metricGroup() { 48 | return readerContext.metricGroup(); 49 | } 50 | 51 | @Override 52 | public Configuration getConfiguration() { 53 | return readerContext.getConfiguration(); 54 | } 55 | 56 | @Override 57 | public String getLocalHostName() { 58 | return readerContext.getLocalHostName(); 59 | } 60 | 61 | @Override 62 | public int getIndexOfSubtask() { 63 | return readerContext.getIndexOfSubtask(); 64 | } 65 | 66 | @Override 67 | public void sendSplitRequest() { 68 | readerContext.sendSplitRequest(); 69 | } 70 | 71 | @Override 72 | public void sendSourceEventToCoordinator(SourceEvent sourceEvent) { 73 | readerContext.sendSourceEventToCoordinator(sourceEvent); 74 | } 75 | 76 | @Override 77 | public UserCodeClassLoader getUserCodeClassLoader() { 78 | return readerContext.getUserCodeClassLoader(); 79 | } 80 | 81 | public AtomicInteger getReadCount() { 82 | return readCount; 83 | } 84 | 85 | public boolean isLimitPushedDown() { 86 | return limit > 0; 87 | } 88 | 89 | public boolean isOverLimit() { 90 | return limit > 0 && readCount.get() >= limit; 91 | } 92 | 93 | public int getLimit() { 94 | return limit; 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/reader/deserializer/MongoDeserializationSchema.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.reader.deserializer; 19 | 20 | import org.apache.flink.annotation.PublicEvolving; 21 | import org.apache.flink.api.java.typeutils.ResultTypeQueryable; 22 | import org.apache.flink.util.Collector; 23 | 24 | import org.bson.BsonDocument; 25 | 26 | import java.io.IOException; 27 | import java.io.Serializable; 28 | 29 | /** 30 | * A schema bridge for deserializing the MongoDB's {@code BsonDocument} into a flink managed 31 | * instance. 32 | * 33 | * @param The output message type for sinking to downstream flink operator. 34 | */ 35 | @PublicEvolving 36 | public interface MongoDeserializationSchema extends Serializable, ResultTypeQueryable { 37 | 38 | /** 39 | * Deserializes the BSON document. 40 | * 41 | * @param document The BSON document to deserialize. 42 | * @return The deserialized message as an object (null if the message cannot be deserialized). 43 | */ 44 | T deserialize(BsonDocument document) throws IOException; 45 | 46 | /** 47 | * Deserializes the BSON document. 48 | * 49 | *

Can output multiple records through the {@link Collector}. Note that number and size of 50 | * the produced records should be relatively small. Depending on the source implementation 51 | * records can be buffered in memory or collecting records might delay emitting checkpoint 52 | * barrier. 53 | * 54 | * @param document The BSON document to deserialize. 55 | * @param out The collector to put the resulting messages. 56 | */ 57 | default void deserialize(BsonDocument document, Collector out) throws IOException { 58 | T deserialize = deserialize(document); 59 | if (deserialize != null) { 60 | out.collect(deserialize); 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/reader/emitter/MongoRecordEmitter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.reader.emitter; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.api.connector.source.SourceOutput; 22 | import org.apache.flink.connector.base.source.reader.RecordEmitter; 23 | import org.apache.flink.connector.mongodb.source.reader.MongoSourceReader; 24 | import org.apache.flink.connector.mongodb.source.reader.deserializer.MongoDeserializationSchema; 25 | import org.apache.flink.connector.mongodb.source.split.MongoSourceSplitState; 26 | import org.apache.flink.util.Collector; 27 | 28 | import org.bson.BsonDocument; 29 | 30 | /** 31 | * The {@link RecordEmitter} implementation for {@link MongoSourceReader} . We would always update 32 | * the last consumed message id in this emitter. 33 | */ 34 | @Internal 35 | public class MongoRecordEmitter 36 | implements RecordEmitter { 37 | 38 | private final MongoDeserializationSchema deserializationSchema; 39 | private final SourceOutputWrapper sourceOutputWrapper; 40 | 41 | public MongoRecordEmitter(MongoDeserializationSchema deserializationSchema) { 42 | this.deserializationSchema = deserializationSchema; 43 | this.sourceOutputWrapper = new SourceOutputWrapper<>(); 44 | } 45 | 46 | @Override 47 | public void emitRecord( 48 | BsonDocument document, SourceOutput output, MongoSourceSplitState splitState) 49 | throws Exception { 50 | // Update current offset. 51 | splitState.updateOffset(document); 52 | // Sink the record to source output. 53 | sourceOutputWrapper.setSourceOutput(output); 54 | deserializationSchema.deserialize(document, sourceOutputWrapper); 55 | } 56 | 57 | private static class SourceOutputWrapper implements Collector { 58 | private SourceOutput sourceOutput; 59 | 60 | @Override 61 | public void collect(T record) { 62 | sourceOutput.collect(record); 63 | } 64 | 65 | @Override 66 | public void close() {} 67 | 68 | private void setSourceOutput(SourceOutput sourceOutput) { 69 | this.sourceOutput = sourceOutput; 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/reader/split/MongoSourceSplitReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.reader.split; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.connector.base.source.reader.splitreader.SplitReader; 22 | import org.apache.flink.connector.mongodb.source.split.MongoSourceSplit; 23 | 24 | import org.bson.BsonDocument; 25 | 26 | /** 27 | * A split reader implements {@link SplitReader} for {@link MongoSourceSplit}. 28 | * 29 | * @param Mongo source split. 30 | */ 31 | @Internal 32 | public interface MongoSourceSplitReader 33 | extends SplitReader {} 34 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/split/MongoScanSourceSplit.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.split; 19 | 20 | import org.apache.flink.annotation.PublicEvolving; 21 | import org.apache.flink.api.connector.source.SourceSplit; 22 | 23 | import org.bson.BsonDocument; 24 | 25 | import java.util.Objects; 26 | 27 | /** A {@link SourceSplit} implementation for a MongoDB's partition. */ 28 | @PublicEvolving 29 | public class MongoScanSourceSplit extends MongoSourceSplit { 30 | 31 | private static final long serialVersionUID = 1L; 32 | 33 | private final String database; 34 | 35 | private final String collection; 36 | 37 | private final BsonDocument min; 38 | 39 | private final BsonDocument max; 40 | 41 | private final BsonDocument hint; 42 | 43 | private final int offset; 44 | 45 | public MongoScanSourceSplit( 46 | String splitId, 47 | String database, 48 | String collection, 49 | BsonDocument min, 50 | BsonDocument max, 51 | BsonDocument hint) { 52 | this(splitId, database, collection, min, max, hint, 0); 53 | } 54 | 55 | public MongoScanSourceSplit( 56 | String splitId, 57 | String database, 58 | String collection, 59 | BsonDocument min, 60 | BsonDocument max, 61 | BsonDocument hint, 62 | int offset) { 63 | super(splitId); 64 | this.database = database; 65 | this.collection = collection; 66 | this.min = min; 67 | this.max = max; 68 | this.hint = hint; 69 | this.offset = offset; 70 | } 71 | 72 | public String getDatabase() { 73 | return database; 74 | } 75 | 76 | public String getCollection() { 77 | return collection; 78 | } 79 | 80 | public BsonDocument getMin() { 81 | return min; 82 | } 83 | 84 | public BsonDocument getMax() { 85 | return max; 86 | } 87 | 88 | public BsonDocument getHint() { 89 | return hint; 90 | } 91 | 92 | public int getOffset() { 93 | return offset; 94 | } 95 | 96 | @Override 97 | public boolean equals(Object o) { 98 | if (this == o) { 99 | return true; 100 | } 101 | if (o == null || getClass() != o.getClass()) { 102 | return false; 103 | } 104 | if (!super.equals(o)) { 105 | return false; 106 | } 107 | MongoScanSourceSplit split = (MongoScanSourceSplit) o; 108 | return Objects.equals(database, split.database) 109 | && Objects.equals(collection, split.collection) 110 | && Objects.equals(min, split.min) 111 | && Objects.equals(max, split.max) 112 | && Objects.equals(hint, split.hint) 113 | && offset == split.offset; 114 | } 115 | 116 | @Override 117 | public int hashCode() { 118 | return Objects.hash(super.hashCode(), database, collection, min, max, hint, offset); 119 | } 120 | 121 | @Override 122 | public String toString() { 123 | return "MongoScanSourceSplit {" 124 | + " splitId=" 125 | + splitId 126 | + ", database=" 127 | + database 128 | + ", collection=" 129 | + collection 130 | + ", min=" 131 | + min 132 | + ", max=" 133 | + max 134 | + ", hint=" 135 | + hint 136 | + ", offset=" 137 | + offset 138 | + " }"; 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/split/MongoScanSourceSplitState.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.split; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | 22 | import org.bson.BsonDocument; 23 | 24 | /** MongoDB source split state for {@link MongoScanSourceSplit}. */ 25 | @Internal 26 | public class MongoScanSourceSplitState implements MongoSourceSplitState { 27 | 28 | private final MongoScanSourceSplit scanSplit; 29 | 30 | private int offset; 31 | 32 | public MongoScanSourceSplitState(MongoScanSourceSplit scanSplit) { 33 | this.scanSplit = scanSplit; 34 | this.offset = scanSplit.getOffset(); 35 | } 36 | 37 | @Override 38 | public MongoScanSourceSplit toMongoSourceSplit() { 39 | return new MongoScanSourceSplit( 40 | scanSplit.splitId(), 41 | scanSplit.getDatabase(), 42 | scanSplit.getCollection(), 43 | scanSplit.getMin(), 44 | scanSplit.getMax(), 45 | scanSplit.getHint(), 46 | offset); 47 | } 48 | 49 | @Override 50 | public void updateOffset(BsonDocument record) { 51 | offset++; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/split/MongoSourceSplit.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.split; 19 | 20 | import org.apache.flink.annotation.PublicEvolving; 21 | import org.apache.flink.api.connector.source.SourceSplit; 22 | 23 | import java.io.Serializable; 24 | import java.util.Objects; 25 | 26 | /** A super class of {@link SourceSplit} implementation for a MongoDB's source split. */ 27 | @PublicEvolving 28 | public abstract class MongoSourceSplit implements SourceSplit, Serializable { 29 | 30 | protected final String splitId; 31 | 32 | protected MongoSourceSplit(String splitId) { 33 | this.splitId = splitId; 34 | } 35 | 36 | @Override 37 | public String splitId() { 38 | return splitId; 39 | } 40 | 41 | @Override 42 | public boolean equals(Object o) { 43 | if (this == o) { 44 | return true; 45 | } 46 | if (o == null || getClass() != o.getClass()) { 47 | return false; 48 | } 49 | MongoSourceSplit that = (MongoSourceSplit) o; 50 | return Objects.equals(splitId, that.splitId); 51 | } 52 | 53 | @Override 54 | public int hashCode() { 55 | return Objects.hash(splitId); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/split/MongoSourceSplitSerializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.split; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.core.io.SimpleVersionedSerializer; 22 | 23 | import org.bson.BsonDocument; 24 | 25 | import java.io.ByteArrayInputStream; 26 | import java.io.ByteArrayOutputStream; 27 | import java.io.DataInputStream; 28 | import java.io.DataOutputStream; 29 | import java.io.IOException; 30 | 31 | /** The {@link SimpleVersionedSerializer serializer} for {@link MongoSourceSplit}. */ 32 | @Internal 33 | public class MongoSourceSplitSerializer implements SimpleVersionedSerializer { 34 | 35 | public static final MongoSourceSplitSerializer INSTANCE = new MongoSourceSplitSerializer(); 36 | 37 | // This version should be bumped after modifying the MongoSourceSplit. 38 | public static final int CURRENT_VERSION = 0; 39 | 40 | public static final int SCAN_SPLIT_FLAG = 1; 41 | 42 | private MongoSourceSplitSerializer() {} 43 | 44 | @Override 45 | public int getVersion() { 46 | return CURRENT_VERSION; 47 | } 48 | 49 | @Override 50 | public byte[] serialize(MongoSourceSplit obj) throws IOException { 51 | // VERSION 0 serialization 52 | try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); 53 | DataOutputStream out = new DataOutputStream(baos)) { 54 | serializeMongoSplit(out, obj); 55 | out.flush(); 56 | return baos.toByteArray(); 57 | } 58 | } 59 | 60 | @Override 61 | public MongoSourceSplit deserialize(int version, byte[] serialized) throws IOException { 62 | // VERSION 0 deserialization 63 | try (ByteArrayInputStream bais = new ByteArrayInputStream(serialized); 64 | DataInputStream in = new DataInputStream(bais)) { 65 | int splitKind = in.readInt(); 66 | if (splitKind == SCAN_SPLIT_FLAG) { 67 | return deserializeMongoScanSourceSplit(version, in); 68 | } 69 | throw new IOException("Unknown split kind: " + splitKind); 70 | } 71 | } 72 | 73 | public void serializeMongoSplit(DataOutputStream out, MongoSourceSplit obj) throws IOException { 74 | if (obj instanceof MongoScanSourceSplit) { 75 | MongoScanSourceSplit split = (MongoScanSourceSplit) obj; 76 | out.writeInt(SCAN_SPLIT_FLAG); 77 | out.writeUTF(split.splitId()); 78 | out.writeUTF(split.getDatabase()); 79 | out.writeUTF(split.getCollection()); 80 | out.writeUTF(split.getMin().toJson()); 81 | out.writeUTF(split.getMax().toJson()); 82 | out.writeUTF(split.getHint().toJson()); 83 | out.writeInt(split.getOffset()); 84 | } 85 | } 86 | 87 | public MongoScanSourceSplit deserializeMongoScanSourceSplit(int version, DataInputStream in) 88 | throws IOException { 89 | switch (version) { 90 | case 0: 91 | String splitId = in.readUTF(); 92 | String database = in.readUTF(); 93 | String collection = in.readUTF(); 94 | BsonDocument min = BsonDocument.parse(in.readUTF()); 95 | BsonDocument max = BsonDocument.parse(in.readUTF()); 96 | BsonDocument hint = BsonDocument.parse(in.readUTF()); 97 | int offset = in.readInt(); 98 | return new MongoScanSourceSplit( 99 | splitId, database, collection, min, max, hint, offset); 100 | default: 101 | throw new IOException("Unknown version: " + version); 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/source/split/MongoSourceSplitState.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.split; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | 22 | import org.bson.BsonDocument; 23 | 24 | /** MongoDB source split state for {@link MongoSourceSplit}. */ 25 | @Internal 26 | public interface MongoSourceSplitState { 27 | 28 | /** Use the current split state to create a new {@link MongoSourceSplit}. */ 29 | MongoSourceSplit toMongoSourceSplit(); 30 | 31 | /** 32 | * Update the offset read by the current split for failure recovery. 33 | * 34 | * @param record The latest record that was read. 35 | */ 36 | void updateOffset(BsonDocument record); 37 | } 38 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/table/FilterHandlingPolicy.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.table; 19 | 20 | import org.apache.flink.annotation.PublicEvolving; 21 | import org.apache.flink.configuration.DescribedEnum; 22 | import org.apache.flink.configuration.description.InlineElement; 23 | 24 | import static org.apache.flink.configuration.description.TextElement.text; 25 | 26 | /** Fine-grained configuration to control filter push down for MongoDB Table/SQL source. */ 27 | @PublicEvolving 28 | public enum FilterHandlingPolicy implements DescribedEnum { 29 | ALWAYS("always", text("Always push the supported filters to MongoDB.")), 30 | 31 | NEVER("never", text("Never push any filters to MongoDB.")); 32 | 33 | private final String name; 34 | private final InlineElement description; 35 | 36 | FilterHandlingPolicy(String name, InlineElement description) { 37 | this.name = name; 38 | this.description = description; 39 | } 40 | 41 | @Override 42 | public InlineElement getDescription() { 43 | return description; 44 | } 45 | 46 | @Override 47 | public String toString() { 48 | return name; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/table/MongoShardKeysExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.table; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.connector.mongodb.table.converter.RowDataToBsonConverters; 22 | import org.apache.flink.table.catalog.ResolvedSchema; 23 | import org.apache.flink.table.connector.Projection; 24 | import org.apache.flink.table.data.RowData; 25 | import org.apache.flink.table.data.utils.ProjectedRowData; 26 | import org.apache.flink.table.types.DataType; 27 | import org.apache.flink.table.types.logical.LogicalType; 28 | import org.apache.flink.util.function.SerializableFunction; 29 | 30 | import org.bson.BsonDocument; 31 | import org.bson.BsonObjectId; 32 | import org.bson.BsonValue; 33 | import org.bson.types.ObjectId; 34 | import org.slf4j.Logger; 35 | import org.slf4j.LoggerFactory; 36 | 37 | import java.util.Arrays; 38 | import java.util.List; 39 | import java.util.Optional; 40 | 41 | /** An extractor for a MongoDB shard keys from a {@link RowData}. */ 42 | @Internal 43 | public class MongoShardKeysExtractor implements SerializableFunction { 44 | 45 | private static final long serialVersionUID = 1L; 46 | 47 | private static final Logger LOG = LoggerFactory.getLogger(MongoShardKeysExtractor.class); 48 | 49 | private static final BsonDocument EMPTY_DOCUMENT = new BsonDocument(); 50 | 51 | private final SerializableFunction shardKeysConverter; 52 | 53 | private final RowData.FieldGetter shardKeysGetter; 54 | 55 | private MongoShardKeysExtractor(LogicalType shardKeysType, int[] shardKeysIndexes) { 56 | this.shardKeysConverter = RowDataToBsonConverters.createFieldDataConverter(shardKeysType); 57 | this.shardKeysGetter = 58 | rowData -> ProjectedRowData.from(shardKeysIndexes).replaceRow(rowData); 59 | } 60 | 61 | @Override 62 | public BsonDocument apply(RowData rowData) { 63 | BsonDocument shardKeysDoc = 64 | Optional.ofNullable(shardKeysGetter.getFieldOrNull(rowData)) 65 | .map(shardKeys -> shardKeysConverter.apply(shardKeys).asDocument()) 66 | .orElse(EMPTY_DOCUMENT); 67 | 68 | shardKeysDoc 69 | .entrySet() 70 | .forEach( 71 | entry -> { 72 | if (entry.getValue().isString()) { 73 | String keyString = entry.getValue().asString().getValue(); 74 | // Try to restore MongoDB's ObjectId from string. 75 | if (ObjectId.isValid(keyString)) { 76 | entry.setValue(new BsonObjectId(new ObjectId(keyString))); 77 | } 78 | } 79 | }); 80 | 81 | return shardKeysDoc; 82 | } 83 | 84 | public static SerializableFunction createShardKeysExtractor( 85 | ResolvedSchema resolvedSchema, String[] shardKeys) { 86 | // no shard keys are declared. 87 | if (shardKeys.length == 0) { 88 | return new NoOpShardKeysExtractor(); 89 | } 90 | 91 | int[] shardKeysIndexes = getShardKeysIndexes(resolvedSchema.getColumnNames(), shardKeys); 92 | DataType physicalRowDataType = resolvedSchema.toPhysicalRowDataType(); 93 | DataType shardKeysType = Projection.of(shardKeysIndexes).project(physicalRowDataType); 94 | 95 | MongoShardKeysExtractor shardKeysExtractor = 96 | new MongoShardKeysExtractor(shardKeysType.getLogicalType(), shardKeysIndexes); 97 | 98 | LOG.info("Shard keys extractor created, shard keys: {}", Arrays.toString(shardKeys)); 99 | return shardKeysExtractor; 100 | } 101 | 102 | private static int[] getShardKeysIndexes(List columnNames, String[] shardKeys) { 103 | return Arrays.stream(shardKeys).mapToInt(columnNames::indexOf).toArray(); 104 | } 105 | 106 | /** 107 | * It behaves as no-op extractor when no shard keys are declared. We use static class instead of 108 | * lambda because the maven shade plugin cannot relocate classes in SerializedLambdas 109 | * (MSHADE-260). 110 | */ 111 | private static class NoOpShardKeysExtractor 112 | implements SerializableFunction { 113 | 114 | private static final long serialVersionUID = 1L; 115 | 116 | @Override 117 | public BsonDocument apply(RowData rowData) { 118 | return EMPTY_DOCUMENT; 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/table/config/MongoConfiguration.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.table.config; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.configuration.MemorySize; 22 | import org.apache.flink.configuration.ReadableConfig; 23 | import org.apache.flink.connector.base.DeliveryGuarantee; 24 | import org.apache.flink.connector.mongodb.source.enumerator.splitter.PartitionStrategy; 25 | import org.apache.flink.connector.mongodb.table.FilterHandlingPolicy; 26 | import org.apache.flink.table.connector.source.lookup.LookupOptions; 27 | 28 | import javax.annotation.Nullable; 29 | 30 | import java.util.Objects; 31 | 32 | import static org.apache.flink.connector.mongodb.table.MongoConnectorOptions.BUFFER_FLUSH_INTERVAL; 33 | import static org.apache.flink.connector.mongodb.table.MongoConnectorOptions.BUFFER_FLUSH_MAX_ROWS; 34 | import static org.apache.flink.connector.mongodb.table.MongoConnectorOptions.COLLECTION; 35 | import static org.apache.flink.connector.mongodb.table.MongoConnectorOptions.DATABASE; 36 | import static org.apache.flink.connector.mongodb.table.MongoConnectorOptions.DELIVERY_GUARANTEE; 37 | import static org.apache.flink.connector.mongodb.table.MongoConnectorOptions.FILTER_HANDLING_POLICY; 38 | import static org.apache.flink.connector.mongodb.table.MongoConnectorOptions.LOOKUP_RETRY_INTERVAL; 39 | import static org.apache.flink.connector.mongodb.table.MongoConnectorOptions.SCAN_CURSOR_NO_TIMEOUT; 40 | import static org.apache.flink.connector.mongodb.table.MongoConnectorOptions.SCAN_FETCH_SIZE; 41 | import static org.apache.flink.connector.mongodb.table.MongoConnectorOptions.SCAN_PARTITION_RECORD_SIZE; 42 | import static org.apache.flink.connector.mongodb.table.MongoConnectorOptions.SCAN_PARTITION_SAMPLES; 43 | import static org.apache.flink.connector.mongodb.table.MongoConnectorOptions.SCAN_PARTITION_SIZE; 44 | import static org.apache.flink.connector.mongodb.table.MongoConnectorOptions.SCAN_PARTITION_STRATEGY; 45 | import static org.apache.flink.connector.mongodb.table.MongoConnectorOptions.SINK_MAX_RETRIES; 46 | import static org.apache.flink.connector.mongodb.table.MongoConnectorOptions.SINK_RETRY_INTERVAL; 47 | import static org.apache.flink.connector.mongodb.table.MongoConnectorOptions.URI; 48 | import static org.apache.flink.table.factories.FactoryUtil.SINK_PARALLELISM; 49 | 50 | /** MongoDB configuration. */ 51 | @Internal 52 | public class MongoConfiguration { 53 | 54 | private final ReadableConfig config; 55 | 56 | public MongoConfiguration(ReadableConfig config) { 57 | this.config = config; 58 | } 59 | 60 | // -----------------------------------Connection Config---------------------------------------- 61 | public String getUri() { 62 | return config.get(URI); 63 | } 64 | 65 | public String getDatabase() { 66 | return config.get(DATABASE); 67 | } 68 | 69 | public String getCollection() { 70 | return config.get(COLLECTION); 71 | } 72 | 73 | // -----------------------------------Read Config---------------------------------------- 74 | public int getFetchSize() { 75 | return config.get(SCAN_FETCH_SIZE); 76 | } 77 | 78 | public boolean isNoCursorTimeout() { 79 | return config.get(SCAN_CURSOR_NO_TIMEOUT); 80 | } 81 | 82 | public PartitionStrategy getPartitionStrategy() { 83 | return config.get(SCAN_PARTITION_STRATEGY); 84 | } 85 | 86 | public MemorySize getPartitionSize() { 87 | return config.get(SCAN_PARTITION_SIZE); 88 | } 89 | 90 | public int getSamplesPerPartition() { 91 | return config.get(SCAN_PARTITION_SAMPLES); 92 | } 93 | 94 | public Integer getPartitionRecordSize() { 95 | return config.get(SCAN_PARTITION_RECORD_SIZE); 96 | } 97 | 98 | // -----------------------------------Lookup Config---------------------------------------- 99 | public int getLookupMaxRetries() { 100 | return config.get(LookupOptions.MAX_RETRIES); 101 | } 102 | 103 | public long getLookupRetryIntervalMs() { 104 | return config.get(LOOKUP_RETRY_INTERVAL).toMillis(); 105 | } 106 | 107 | public FilterHandlingPolicy getFilterHandlingPolicy() { 108 | return config.get(FILTER_HANDLING_POLICY); 109 | } 110 | 111 | // -----------------------------------Write Config------------------------------------------ 112 | public int getBufferFlushMaxRows() { 113 | return config.get(BUFFER_FLUSH_MAX_ROWS); 114 | } 115 | 116 | public long getBufferFlushIntervalMs() { 117 | return config.get(BUFFER_FLUSH_INTERVAL).toMillis(); 118 | } 119 | 120 | public int getSinkMaxRetries() { 121 | return config.get(SINK_MAX_RETRIES); 122 | } 123 | 124 | public long getSinkRetryIntervalMs() { 125 | return config.get(SINK_RETRY_INTERVAL).toMillis(); 126 | } 127 | 128 | public DeliveryGuarantee getDeliveryGuarantee() { 129 | return config.get(DELIVERY_GUARANTEE); 130 | } 131 | 132 | @Nullable 133 | public Integer getSinkParallelism() { 134 | return config.getOptional(SINK_PARALLELISM).orElse(null); 135 | } 136 | 137 | @Override 138 | public boolean equals(Object o) { 139 | if (this == o) { 140 | return true; 141 | } 142 | if (o == null || getClass() != o.getClass()) { 143 | return false; 144 | } 145 | MongoConfiguration that = (MongoConfiguration) o; 146 | return Objects.equals(config, that.config); 147 | } 148 | 149 | @Override 150 | public int hashCode() { 151 | return Objects.hash(config); 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/table/serialization/MongoRowDataDeserializationSchema.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.table.serialization; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.api.common.typeinfo.TypeInformation; 22 | import org.apache.flink.connector.mongodb.source.reader.deserializer.MongoDeserializationSchema; 23 | import org.apache.flink.connector.mongodb.table.converter.BsonToRowDataConverters; 24 | import org.apache.flink.table.data.RowData; 25 | import org.apache.flink.table.types.logical.RowType; 26 | 27 | import org.bson.BsonDocument; 28 | 29 | /** Deserializer that maps {@link BsonDocument} to {@link RowData}. */ 30 | @Internal 31 | public class MongoRowDataDeserializationSchema implements MongoDeserializationSchema { 32 | 33 | /** Type information describing the result type. */ 34 | private final TypeInformation typeInfo; 35 | 36 | /** Runtime instance that performs the actual work. */ 37 | private final BsonToRowDataConverters.BsonToRowDataConverter runtimeConverter; 38 | 39 | public MongoRowDataDeserializationSchema(RowType rowType, TypeInformation typeInfo) { 40 | this.typeInfo = typeInfo; 41 | this.runtimeConverter = BsonToRowDataConverters.createConverter(rowType); 42 | } 43 | 44 | @Override 45 | public RowData deserialize(BsonDocument document) { 46 | return runtimeConverter.convert(document); 47 | } 48 | 49 | @Override 50 | public TypeInformation getProducedType() { 51 | return typeInfo; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/table/serialization/MongoRowDataSerializationSchema.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.table.serialization; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.connector.mongodb.sink.writer.context.MongoSinkContext; 22 | import org.apache.flink.connector.mongodb.sink.writer.serializer.MongoSerializationSchema; 23 | import org.apache.flink.connector.mongodb.table.converter.RowDataToBsonConverters; 24 | import org.apache.flink.table.api.TableException; 25 | import org.apache.flink.table.data.RowData; 26 | 27 | import com.mongodb.client.model.DeleteOneModel; 28 | import com.mongodb.client.model.InsertOneModel; 29 | import com.mongodb.client.model.UpdateOneModel; 30 | import com.mongodb.client.model.UpdateOptions; 31 | import com.mongodb.client.model.WriteModel; 32 | import org.bson.BsonDocument; 33 | import org.bson.BsonValue; 34 | 35 | import java.util.function.Function; 36 | 37 | /** The serialization schema for flink {@link RowData} to serialize records into MongoDB. */ 38 | @Internal 39 | public class MongoRowDataSerializationSchema implements MongoSerializationSchema { 40 | 41 | private final RowDataToBsonConverters.RowDataToBsonConverter rowDataToBsonConverter; 42 | private final Function primaryKeyExtractor; 43 | private final Function shardKeysExtractor; 44 | 45 | public MongoRowDataSerializationSchema( 46 | RowDataToBsonConverters.RowDataToBsonConverter rowDataToBsonConverter, 47 | Function primaryKeyExtractor, 48 | Function shardKeysExtractor) { 49 | this.rowDataToBsonConverter = rowDataToBsonConverter; 50 | this.primaryKeyExtractor = primaryKeyExtractor; 51 | this.shardKeysExtractor = shardKeysExtractor; 52 | } 53 | 54 | @Override 55 | public WriteModel serialize(RowData element, MongoSinkContext context) { 56 | switch (element.getRowKind()) { 57 | case INSERT: 58 | case UPDATE_AFTER: 59 | return processUpsert(element); 60 | case UPDATE_BEFORE: 61 | case DELETE: 62 | return processDelete(element); 63 | default: 64 | throw new TableException("Unsupported message kind: " + element.getRowKind()); 65 | } 66 | } 67 | 68 | private WriteModel processUpsert(RowData row) { 69 | final BsonDocument document = rowDataToBsonConverter.convert(row); 70 | final BsonValue key = primaryKeyExtractor.apply(row); 71 | if (key != null) { 72 | BsonDocument filter = new BsonDocument("_id", key); 73 | 74 | // For upsert operation on a sharded collection, the full sharded key must be included 75 | // in the filter. 76 | BsonDocument shardKeysFilter = shardKeysExtractor.apply(row); 77 | if (!shardKeysFilter.isEmpty()) { 78 | filter.putAll(shardKeysFilter); 79 | } 80 | 81 | // _id is immutable, so we remove it here to prevent exception. 82 | document.remove("_id"); 83 | BsonDocument update = new BsonDocument("$set", document); 84 | return new UpdateOneModel<>(filter, update, new UpdateOptions().upsert(true)); 85 | } else { 86 | return new InsertOneModel<>(document); 87 | } 88 | } 89 | 90 | private WriteModel processDelete(RowData row) { 91 | final BsonValue key = primaryKeyExtractor.apply(row); 92 | BsonDocument filter = new BsonDocument("_id", key); 93 | return new DeleteOneModel<>(filter); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | org.apache.flink.connector.mongodb.table.MongoDynamicTableFactory 17 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/test/java/org/apache/flink/architecture/ProductionCodeArchitectureTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.architecture; 20 | 21 | import org.apache.flink.architecture.common.ImportOptions; 22 | 23 | import com.tngtech.archunit.core.importer.ImportOption; 24 | import com.tngtech.archunit.junit.AnalyzeClasses; 25 | import com.tngtech.archunit.junit.ArchTest; 26 | import com.tngtech.archunit.junit.ArchTests; 27 | 28 | /** Architecture tests for production code. */ 29 | @AnalyzeClasses( 30 | packages = "org.apache.flink.connector.mongodb", 31 | importOptions = { 32 | ImportOption.DoNotIncludeTests.class, 33 | ImportOption.DoNotIncludeArchives.class, 34 | ImportOptions.ExcludeScalaImportOption.class, 35 | ImportOptions.ExcludeShadedImportOption.class 36 | }) 37 | public class ProductionCodeArchitectureTest { 38 | 39 | @ArchTest 40 | public static final ArchTests COMMON_TESTS = ArchTests.in(ProductionCodeArchitectureBase.class); 41 | } 42 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/test/java/org/apache/flink/architecture/TestCodeArchitectureTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.architecture; 20 | 21 | import org.apache.flink.architecture.common.ImportOptions; 22 | 23 | import com.tngtech.archunit.core.importer.ImportOption; 24 | import com.tngtech.archunit.junit.AnalyzeClasses; 25 | import com.tngtech.archunit.junit.ArchTest; 26 | import com.tngtech.archunit.junit.ArchTests; 27 | 28 | /** Architecture tests for test code. */ 29 | @AnalyzeClasses( 30 | packages = "org.apache.flink.connector.mongodb", 31 | importOptions = { 32 | ImportOption.OnlyIncludeTests.class, 33 | ImportOptions.ExcludeScalaImportOption.class, 34 | ImportOptions.ExcludeShadedImportOption.class 35 | }) 36 | public class TestCodeArchitectureTest { 37 | 38 | @ArchTest 39 | public static final ArchTests COMMON_TESTS = ArchTests.in(TestCodeArchitectureTestBase.class); 40 | } 41 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/test/java/org/apache/flink/connector/mongodb/common/utils/MongoSerdeUtilsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.common.utils; 19 | 20 | import org.junit.jupiter.api.Test; 21 | 22 | import java.io.ByteArrayInputStream; 23 | import java.io.ByteArrayOutputStream; 24 | import java.io.DataInput; 25 | import java.io.DataInputStream; 26 | import java.io.DataOutputStream; 27 | import java.io.IOException; 28 | import java.util.Arrays; 29 | import java.util.HashMap; 30 | import java.util.List; 31 | import java.util.Map; 32 | 33 | import static org.assertj.core.api.Assertions.assertThat; 34 | 35 | /** Unit tests for {@link MongoSerdeUtils}. */ 36 | class MongoSerdeUtilsTest { 37 | 38 | @Test 39 | void testSerializeList() throws IOException { 40 | List expected = Arrays.asList("config.collections", "config.chunks"); 41 | 42 | byte[] serialized = serializeList(expected); 43 | List deserialized = deserializeList(serialized); 44 | 45 | assertThat(deserialized).isEqualTo(expected); 46 | } 47 | 48 | @Test 49 | void testSerializeMap() throws IOException { 50 | Map expected = new HashMap<>(); 51 | expected.put("k0", "v0"); 52 | expected.put("k1", "v1"); 53 | expected.put("k2", "v2"); 54 | 55 | byte[] serialized = serializeMap(expected); 56 | 57 | Map deserialized = deserializeMap(serialized); 58 | 59 | assertThat(deserialized).isEqualTo(expected); 60 | } 61 | 62 | private static byte[] serializeList(List list) throws IOException { 63 | try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); 64 | DataOutputStream out = new DataOutputStream(baos)) { 65 | MongoSerdeUtils.serializeList(out, list, DataOutputStream::writeUTF); 66 | return baos.toByteArray(); 67 | } 68 | } 69 | 70 | private static List deserializeList(byte[] serialized) throws IOException { 71 | try (ByteArrayInputStream bais = new ByteArrayInputStream(serialized); 72 | DataInputStream in = new DataInputStream(bais)) { 73 | return MongoSerdeUtils.deserializeList(in, DataInput::readUTF); 74 | } 75 | } 76 | 77 | private static byte[] serializeMap(Map map) throws IOException { 78 | try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); 79 | DataOutputStream out = new DataOutputStream(baos)) { 80 | MongoSerdeUtils.serializeMap( 81 | out, map, DataOutputStream::writeUTF, DataOutputStream::writeUTF); 82 | return baos.toByteArray(); 83 | } 84 | } 85 | 86 | private static Map deserializeMap(byte[] serialized) throws IOException { 87 | try (ByteArrayInputStream bais = new ByteArrayInputStream(serialized); 88 | DataInputStream in = new DataInputStream(bais)) { 89 | return MongoSerdeUtils.deserializeMap(in, DataInput::readUTF, DataInput::readUTF); 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/test/java/org/apache/flink/connector/mongodb/source/enumerator/MongoSourceEnumStateSerializerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.enumerator; 19 | 20 | import org.apache.flink.connector.mongodb.source.split.MongoScanSourceSplit; 21 | 22 | import org.bson.BsonDocument; 23 | import org.bson.BsonInt32; 24 | import org.junit.jupiter.api.Test; 25 | 26 | import java.util.Arrays; 27 | import java.util.Collections; 28 | import java.util.List; 29 | import java.util.Map; 30 | 31 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.BSON_MAX_BOUNDARY; 32 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.ID_HINT; 33 | import static org.apache.flink.connector.mongodb.source.enumerator.MongoSourceEnumStateSerializer.INSTANCE; 34 | import static org.junit.jupiter.api.Assertions.assertEquals; 35 | import static org.junit.jupiter.api.Assertions.assertNotSame; 36 | 37 | /** Unit tests for {@link MongoSourceEnumStateSerializer}. */ 38 | class MongoSourceEnumStateSerializerTest { 39 | 40 | @Test 41 | void serializeAndDeserializeMongoSourceEnumState() throws Exception { 42 | boolean initialized = false; 43 | List remainingCollections = Arrays.asList("db.remains0", "db.remains1"); 44 | List alreadyProcessedCollections = Arrays.asList("db.processed0", "db.processed1"); 45 | List remainingScanSplits = 46 | Arrays.asList(createSourceSplit(0), createSourceSplit(1)); 47 | 48 | Map assignedScanSplits = 49 | Collections.singletonMap("split2", createSourceSplit(2)); 50 | 51 | MongoSourceEnumState state = 52 | new MongoSourceEnumState( 53 | remainingCollections, 54 | alreadyProcessedCollections, 55 | remainingScanSplits, 56 | assignedScanSplits, 57 | initialized); 58 | 59 | byte[] bytes = INSTANCE.serialize(state); 60 | MongoSourceEnumState state1 = INSTANCE.deserialize(INSTANCE.getVersion(), bytes); 61 | 62 | assertEquals(state.getRemainingCollections(), state1.getRemainingCollections()); 63 | assertEquals( 64 | state.getAlreadyProcessedCollections(), state1.getAlreadyProcessedCollections()); 65 | assertEquals(state.getRemainingScanSplits(), state1.getRemainingScanSplits()); 66 | assertEquals(state.getAssignedScanSplits(), state1.getAssignedScanSplits()); 67 | assertEquals(state.isInitialized(), state1.isInitialized()); 68 | 69 | assertNotSame(state, state1); 70 | } 71 | 72 | private static MongoScanSourceSplit createSourceSplit(int index) { 73 | return new MongoScanSourceSplit( 74 | "split" + index, 75 | "db", 76 | "coll", 77 | new BsonDocument("_id", new BsonInt32(index)), 78 | BSON_MAX_BOUNDARY, 79 | ID_HINT); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/test/java/org/apache/flink/connector/mongodb/source/enumerator/splitter/MongoSampleSplitterTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.source.enumerator.splitter; 19 | 20 | import org.apache.flink.configuration.MemorySize; 21 | import org.apache.flink.connector.mongodb.source.config.MongoReadOptions; 22 | import org.apache.flink.connector.mongodb.source.split.MongoScanSourceSplit; 23 | 24 | import com.mongodb.MongoNamespace; 25 | import org.bson.BsonDocument; 26 | import org.bson.BsonInt32; 27 | import org.junit.jupiter.api.Test; 28 | 29 | import java.util.ArrayList; 30 | import java.util.List; 31 | 32 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.BSON_MAX_BOUNDARY; 33 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.BSON_MIN_BOUNDARY; 34 | import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.ID_FIELD; 35 | import static org.assertj.core.api.Assertions.assertThat; 36 | 37 | /** Unit tests for {@link MongoSampleSplitter}. */ 38 | class MongoSampleSplitterTest { 39 | 40 | private static final MongoNamespace TEST_NS = new MongoNamespace("test.test"); 41 | 42 | @Test 43 | void testSplitEmptyCollection() { 44 | MongoSplitContext splitContext = 45 | new MongoSplitContext( 46 | MongoReadOptions.builder().build(), null, TEST_NS, false, 0, 0, 0); 47 | 48 | assertSingleSplit( 49 | new ArrayList<>( 50 | MongoSampleSplitter.split(splitContext, (i1, i2) -> new ArrayList<>()))); 51 | } 52 | 53 | @Test 54 | void testLargerSizedPartitions() { 55 | long totalNumDocuments = 10000L; 56 | 57 | MemorySize avgObjSize = new MemorySize(160L); 58 | MemorySize totalStorageSize = avgObjSize.multiply(totalNumDocuments); 59 | 60 | MongoSplitContext splitContext = 61 | new MongoSplitContext( 62 | MongoReadOptions.builder().setPartitionSize(totalStorageSize).build(), 63 | null, 64 | TEST_NS, 65 | false, 66 | totalNumDocuments, 67 | totalStorageSize.getBytes(), 68 | avgObjSize.getBytes()); 69 | 70 | assertSingleSplit( 71 | new ArrayList<>( 72 | MongoSampleSplitter.split(splitContext, (i1, i2) -> new ArrayList<>()))); 73 | } 74 | 75 | @Test 76 | void testNumberOfSampleCalculation() { 77 | long totalNumDocuments = 100L; 78 | int numPartitions = 10; 79 | 80 | MemorySize avgObjSize = MemorySize.ofMebiBytes(10); 81 | MemorySize totalStorageSize = avgObjSize.multiply(totalNumDocuments); 82 | MemorySize partitionSize = totalStorageSize.divide(numPartitions); 83 | 84 | int samplesPerPartition = 2; 85 | int numExpectedSamples = samplesPerPartition * numPartitions - 1; 86 | 87 | MongoSplitContext splitContext = 88 | new MongoSplitContext( 89 | MongoReadOptions.builder() 90 | .setPartitionSize(partitionSize) 91 | .setSamplesPerPartition(2) 92 | .build(), 93 | null, 94 | TEST_NS, 95 | false, 96 | totalNumDocuments, 97 | totalStorageSize.getBytes(), 98 | avgObjSize.getBytes()); 99 | 100 | MongoSampleSplitter.split( 101 | splitContext, 102 | (ignored, numRequestedSamples) -> { 103 | assertThat(numRequestedSamples).isEqualTo(numExpectedSamples); 104 | return createSamples(numRequestedSamples); 105 | }); 106 | } 107 | 108 | @Test 109 | void testSampleMerging() { 110 | final int numPartitions = 3; 111 | final int samplesPerPartition = 2; 112 | final List samples = createSamples(numPartitions * samplesPerPartition - 1); 113 | 114 | List splits = 115 | MongoSampleSplitter.createSplits(samples, samplesPerPartition, TEST_NS); 116 | 117 | // Samples: 0 1 2 3 4 118 | // Bounds: - + 119 | // Partitions: |-|-|-|-|-|-| 120 | // Splits: |---|---|---| 121 | assertThat(splits).hasSize(numPartitions); 122 | assertThat(splits.get(0)) 123 | .satisfies( 124 | split -> { 125 | assertThat(split.getMin()).isEqualTo(BSON_MIN_BOUNDARY); 126 | assertThat(split.getMax()).isEqualTo(samples.get(1)); 127 | }); 128 | assertThat(splits.get(1)) 129 | .satisfies( 130 | split -> { 131 | assertThat(split.getMin()).isEqualTo(samples.get(1)); 132 | assertThat(split.getMax()).isEqualTo(samples.get(3)); 133 | }); 134 | assertThat(splits.get(2)) 135 | .satisfies( 136 | split -> { 137 | assertThat(split.getMin()).isEqualTo(samples.get(3)); 138 | assertThat(split.getMax()).isEqualTo(BSON_MAX_BOUNDARY); 139 | }); 140 | } 141 | 142 | private static List createSamples(int samplesCount) { 143 | List samples = new ArrayList<>(samplesCount); 144 | for (int i = 0; i < samplesCount; i++) { 145 | samples.add(new BsonDocument(ID_FIELD, new BsonInt32(i))); 146 | } 147 | return samples; 148 | } 149 | 150 | private static void assertSingleSplit(List splits) { 151 | assertThat(splits.size()).isEqualTo(1); 152 | assertThat(splits.get(0).getMin()).isEqualTo(BSON_MIN_BOUNDARY); 153 | assertThat(splits.get(0).getMax()).isEqualTo(BSON_MAX_BOUNDARY); 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/test/java/org/apache/flink/connector/mongodb/table/MongoShardKeysExtractorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.table; 19 | 20 | import org.apache.flink.table.api.DataTypes; 21 | import org.apache.flink.table.catalog.Column; 22 | import org.apache.flink.table.catalog.ResolvedSchema; 23 | import org.apache.flink.table.catalog.UniqueConstraint; 24 | import org.apache.flink.table.data.GenericRowData; 25 | import org.apache.flink.table.data.RowData; 26 | import org.apache.flink.table.data.StringData; 27 | 28 | import org.bson.BsonDocument; 29 | import org.bson.BsonInt64; 30 | import org.bson.BsonObjectId; 31 | import org.bson.BsonString; 32 | import org.bson.types.ObjectId; 33 | import org.junit.jupiter.api.Test; 34 | 35 | import java.util.Arrays; 36 | import java.util.Collections; 37 | import java.util.function.Function; 38 | 39 | import static org.assertj.core.api.Assertions.assertThat; 40 | 41 | /** Tests for {@link MongoShardKeysExtractor}. */ 42 | class MongoShardKeysExtractorTest { 43 | 44 | @Test 45 | void testSingleShardKey() { 46 | ResolvedSchema schema = 47 | new ResolvedSchema( 48 | Arrays.asList( 49 | Column.physical("a", DataTypes.BIGINT().notNull()), 50 | Column.physical("b", DataTypes.STRING())), 51 | Collections.emptyList(), 52 | UniqueConstraint.primaryKey("pk", Collections.singletonList("a"))); 53 | 54 | String[] shardKeys = new String[] {"b"}; 55 | 56 | Function shardKeysExtractor = 57 | MongoShardKeysExtractor.createShardKeysExtractor(schema, shardKeys); 58 | 59 | BsonDocument actual = 60 | shardKeysExtractor.apply(GenericRowData.of(12L, StringData.fromString("ABCD"))); 61 | assertThat(actual).isEqualTo(new BsonDocument("b", new BsonString("ABCD"))); 62 | } 63 | 64 | @Test 65 | void testCompoundShardKey() { 66 | ResolvedSchema schema = 67 | new ResolvedSchema( 68 | Arrays.asList( 69 | Column.physical("a", DataTypes.BIGINT().notNull()), 70 | Column.physical("b", DataTypes.STRING().notNull()), 71 | Column.physical("c", DataTypes.BIGINT())), 72 | Collections.emptyList(), 73 | UniqueConstraint.primaryKey("pk", Collections.singletonList("a"))); 74 | 75 | String[] shardKeys = new String[] {"b", "c"}; 76 | 77 | Function shardKeysExtractor = 78 | MongoShardKeysExtractor.createShardKeysExtractor(schema, shardKeys); 79 | 80 | BsonDocument actual = 81 | shardKeysExtractor.apply( 82 | GenericRowData.of(12L, StringData.fromString("ABCD"), 13L)); 83 | assertThat(actual) 84 | .isEqualTo( 85 | new BsonDocument("b", new BsonString("ABCD")) 86 | .append("c", new BsonInt64(13L))); 87 | } 88 | 89 | @Test 90 | void testCompoundShardKeyWithObjectId() { 91 | ResolvedSchema schema = 92 | new ResolvedSchema( 93 | Arrays.asList( 94 | Column.physical("a", DataTypes.STRING().notNull()), 95 | Column.physical("b", DataTypes.STRING().notNull()), 96 | Column.physical("c", DataTypes.BIGINT())), 97 | Collections.emptyList(), 98 | UniqueConstraint.primaryKey("pk", Collections.singletonList("a"))); 99 | 100 | String[] shardKeys = new String[] {"a", "b"}; 101 | 102 | Function shardKeysExtractor = 103 | MongoShardKeysExtractor.createShardKeysExtractor(schema, shardKeys); 104 | 105 | ObjectId objectId = new ObjectId(); 106 | BsonDocument actual = 107 | shardKeysExtractor.apply( 108 | GenericRowData.of( 109 | StringData.fromString(objectId.toString()), 110 | StringData.fromString("ABCD"), 111 | 13L)); 112 | assertThat(actual) 113 | .isEqualTo( 114 | new BsonDocument("a", new BsonObjectId(objectId)) 115 | .append("b", new BsonString("ABCD"))); 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/test/java/org/apache/flink/connector/mongodb/table/MongoTablePlanTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.table; 19 | 20 | import org.apache.flink.table.api.DataTypes; 21 | import org.apache.flink.table.api.Schema; 22 | import org.apache.flink.table.api.TableConfig; 23 | import org.apache.flink.table.api.TableDescriptor; 24 | import org.apache.flink.table.api.TableEnvironment; 25 | import org.apache.flink.table.planner.utils.StreamTableTestUtil; 26 | import org.apache.flink.table.planner.utils.TableTestBase; 27 | 28 | import org.junit.jupiter.api.BeforeEach; 29 | import org.junit.jupiter.api.Test; 30 | import org.junit.jupiter.api.TestInfo; 31 | import org.junit.rules.TestName; 32 | 33 | import java.time.ZoneId; 34 | import java.util.Collections; 35 | import java.util.Map; 36 | 37 | /** Plan tests for Mongo connector, for example, testing projection push down. */ 38 | class MongoTablePlanTest extends TableTestBase { 39 | 40 | private final StreamTableTestUtil util = streamTestUtil(TableConfig.getDefault()); 41 | 42 | private TestInfo testInfo; 43 | 44 | @BeforeEach 45 | void setup(TestInfo testInfo) { 46 | this.testInfo = testInfo; 47 | TableEnvironment tEnv = util.tableEnv(); 48 | tEnv.getConfig().setLocalTimeZone(ZoneId.of("UTC")); 49 | } 50 | 51 | @Test 52 | void testFilterPushdown() { 53 | createTestTable(); 54 | util.verifyExecPlan( 55 | "SELECT id, timestamp3_col, int_col FROM mongo WHERE id = 900001 AND timestamp3_col <> TIMESTAMP '2022-09-07 10:25:28.127' OR double_col >= -1000.23"); 56 | } 57 | 58 | @Test 59 | void testFilterPartialPushdown() { 60 | createTestTable(); 61 | util.verifyExecPlan( 62 | "SELECT id, timestamp3_col, int_col FROM mongo WHERE id = 900001 AND boolean_col = (decimal_col > 2.0)"); 63 | } 64 | 65 | @Test 66 | void testFilterCannotPushdown() { 67 | createTestTable(); 68 | util.verifyExecPlan( 69 | "SELECT id, timestamp3_col, int_col FROM mongo WHERE id IS NOT NULL OR double_col = decimal_col"); 70 | } 71 | 72 | @Test 73 | void testNeverFilterPushdown() { 74 | createTestTable( 75 | Collections.singletonMap( 76 | MongoConnectorOptions.FILTER_HANDLING_POLICY.key(), 77 | FilterHandlingPolicy.NEVER.name())); 78 | util.verifyExecPlan( 79 | "SELECT id, timestamp3_col, int_col FROM mongo WHERE id = 900001 AND decimal_col > 1.0"); 80 | } 81 | 82 | private void createTestTable() { 83 | createTestTable(Collections.emptyMap()); 84 | } 85 | 86 | private void createTestTable(Map extraOptions) { 87 | TableDescriptor.Builder builder = 88 | TableDescriptor.forConnector("mongodb") 89 | .option("uri", "mongodb://127.0.0.1:27017") 90 | .option("database", "test_db") 91 | .option("collection", "test_coll") 92 | .schema( 93 | Schema.newBuilder() 94 | .column("id", DataTypes.BIGINT()) 95 | .column("description", DataTypes.VARCHAR(200)) 96 | .column("boolean_col", DataTypes.BOOLEAN()) 97 | .column("timestamp_col", DataTypes.TIMESTAMP_LTZ(0)) 98 | .column("timestamp3_col", DataTypes.TIMESTAMP_LTZ(3)) 99 | .column("int_col", DataTypes.INT()) 100 | .column("double_col", DataTypes.DOUBLE()) 101 | .column("decimal_col", DataTypes.DECIMAL(10, 4)) 102 | .build()); 103 | 104 | extraOptions.forEach(builder::option); 105 | 106 | util.tableEnv().createTable("mongo", builder.build()); 107 | } 108 | 109 | // A workaround to get the test method name for flink versions not completely migrated to JUnit5 110 | public TestName name() { 111 | return new TestName() { 112 | @Override 113 | public String getMethodName() { 114 | return testInfo.getTestMethod().get().getName(); 115 | } 116 | }; 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/test/java/org/apache/flink/connector/mongodb/testutils/MongoTestUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | 18 | package org.apache.flink.connector.mongodb.testutils; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | import org.apache.flink.connector.mongodb.table.MongoConnectorOptions; 22 | import org.apache.flink.table.factories.FactoryUtil; 23 | 24 | import com.mongodb.client.MongoClient; 25 | import com.mongodb.client.MongoCollection; 26 | import com.mongodb.client.MongoDatabase; 27 | import com.mongodb.client.model.Filters; 28 | import com.mongodb.client.model.IndexOptions; 29 | import org.bson.Document; 30 | import org.bson.conversions.Bson; 31 | import org.testcontainers.containers.MongoDBContainer; 32 | import org.testcontainers.containers.Network; 33 | import org.testcontainers.utility.DockerImageName; 34 | 35 | import java.util.ArrayList; 36 | import java.util.List; 37 | 38 | import static org.assertj.core.api.Assertions.assertThat; 39 | 40 | /** Collection of utility methods for MongoDB tests. */ 41 | @Internal 42 | public class MongoTestUtil { 43 | 44 | public static final String MONGODB_HOSTNAME = "mongodb"; 45 | 46 | public static final String MONGO_IMAGE_PREFIX = "mongo:"; 47 | 48 | public static final String ADMIN_DATABASE = "admin"; 49 | public static final String CONFIG_DATABASE = "config"; 50 | public static final String SETTINGS_COLLECTION = "settings"; 51 | public static final String CHUNK_SIZE_FIELD = "chunksize"; 52 | public static final String VALUE_FIELD = "value"; 53 | 54 | private MongoTestUtil() {} 55 | 56 | /** 57 | * Creates a preconfigured {@link MongoDBContainer}. 58 | * 59 | * @return configured MongoDB container 60 | */ 61 | public static MongoDBContainer createMongoDBContainer() { 62 | return new MongoDBContainer(mongoDockerImageName()); 63 | } 64 | 65 | /** 66 | * Creates a preconfigured {@link MongoDBContainer}. 67 | * 68 | * @param network for test containers 69 | * @return configured MongoDB sharded containers 70 | */ 71 | public static MongoShardedContainers createMongoDBShardedContainers(Network network) { 72 | return new MongoShardedContainers(mongoDockerImageName(), network); 73 | } 74 | 75 | public static DockerImageName mongoDockerImageName() { 76 | return DockerImageName.parse(MONGO_IMAGE_PREFIX + mongoVersion()); 77 | } 78 | 79 | public static String mongoVersion() { 80 | return System.getProperty("mongodb.version"); 81 | } 82 | 83 | public static void assertThatIdsAreNotWritten(MongoCollection coll, Integer... ids) { 84 | List idsAreWritten = new ArrayList<>(); 85 | coll.find(Filters.in("_id", ids)).map(d -> d.getInteger("_id")).into(idsAreWritten); 86 | 87 | assertThat(idsAreWritten).isEmpty(); 88 | } 89 | 90 | public static void assertThatIdsAreWritten(MongoCollection coll, Integer... ids) { 91 | List actualIds = new ArrayList<>(); 92 | coll.find(Filters.in("_id", ids)).map(d -> d.getInteger("_id")).into(actualIds); 93 | 94 | assertThat(actualIds).containsExactlyInAnyOrder(ids); 95 | } 96 | 97 | public static void assertThatIdsAreWrittenWithMaxWaitTime( 98 | MongoCollection coll, long maxWaitTimeMs, Integer... ids) 99 | throws InterruptedException { 100 | long startTimeMillis = System.currentTimeMillis(); 101 | while (System.currentTimeMillis() - startTimeMillis < maxWaitTimeMs) { 102 | if (coll.countDocuments(Filters.in("_id", ids)) == ids.length) { 103 | break; 104 | } 105 | Thread.sleep(1000L); 106 | } 107 | assertThatIdsAreWritten(coll, ids); 108 | } 109 | 110 | public static String getConnectorSql( 111 | String database, String collection, String connectionString) { 112 | return String.format("'%s'='%s',\n", FactoryUtil.CONNECTOR.key(), "mongodb") 113 | + String.format("'%s'='%s',\n", MongoConnectorOptions.URI.key(), connectionString) 114 | + String.format("'%s'='%s',\n", MongoConnectorOptions.DATABASE.key(), database) 115 | + String.format("'%s'='%s'\n", MongoConnectorOptions.COLLECTION.key(), collection); 116 | } 117 | 118 | public static void createIndex( 119 | MongoClient mongoClient, 120 | String databaseName, 121 | String collectionName, 122 | Bson keys, 123 | IndexOptions indexOptions) { 124 | mongoClient 125 | .getDatabase(databaseName) 126 | .getCollection(collectionName) 127 | .createIndex(keys, indexOptions); 128 | } 129 | 130 | public static void shardCollection( 131 | MongoClient mongoClient, String databaseName, String collectionName, Bson keys) { 132 | MongoDatabase admin = mongoClient.getDatabase(ADMIN_DATABASE); 133 | Document enableShardingCommand = new Document("enableSharding", databaseName); 134 | admin.runCommand(enableShardingCommand); 135 | 136 | Document shardCollectionCommand = 137 | new Document("shardCollection", databaseName + "." + collectionName) 138 | .append("key", keys); 139 | admin.runCommand(shardCollectionCommand); 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/test/resources/archunit.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | # By default we allow removing existing violations, but fail when new violations are added. 20 | freeze.store.default.allowStoreUpdate=true 21 | 22 | # Enable this if a new (frozen) rule has been added in order to create the initial store and record the existing violations. 23 | #freeze.store.default.allowStoreCreation=false 24 | 25 | # Enable this to add allow new violations to be recorded. 26 | # NOTE: Adding new violations should be avoided when possible. If the rule was correct to flag a new 27 | # violation, please try to avoid creating the violation. If the violation was created due to a 28 | # shortcoming of the rule, file a JIRA issue so the rule can be improved. 29 | #freeze.refreeze=true 30 | 31 | freeze.store.default.path=archunit-violations 32 | 33 | # TRUE by default since 0.23.0, restore the old behavior by setting the ArchUnit property archRule.failOnEmptyShould=false 34 | archRule.failOnEmptyShould=false 35 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/test/resources/log4j2-test.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | # Set root logger level to OFF to not flood build logs 20 | # set manually to INFO for debugging purposes 21 | rootLogger.level = OFF 22 | rootLogger.appenderRef.test.ref = TestLogger 23 | 24 | appender.testlogger.name = TestLogger 25 | appender.testlogger.type = CONSOLE 26 | appender.testlogger.target = SYSTEM_ERR 27 | appender.testlogger.layout.type = PatternLayout 28 | appender.testlogger.layout.pattern = %-4r [%t] %-5p %c %x - %m%n 29 | -------------------------------------------------------------------------------- /flink-connector-mongodb/src/test/resources/org/apache/flink/connector/mongodb/table/MongoTablePlanTest.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 20 | 21 | TIMESTAMP '2022-09-07 10:25:28.127' OR double_col >= -1000.23]]> 22 | 23 | 24 | ($4, CAST(2022-09-07 10:25:28.127:TIMESTAMP(3)):TIMESTAMP_WITH_LOCAL_TIME_ZONE(3) NOT NULL)), >=($6, -1000.23:DECIMAL(6, 2)))]) 27 | +- LogicalTableScan(table=[[default_catalog, default_database, mongo]]) 28 | ]]> 29 | 30 | 31 | =(double_col, -1000.23:DECIMAL(6, 2))), OR(<>(timestamp3_col, 2022-09-07 10:25:28.127:TIMESTAMP_WITH_LOCAL_TIME_ZONE(3)), >=(double_col, -1000.23:DECIMAL(6, 2))))], project=[id, timestamp3_col, int_col]]], fields=[id, timestamp3_col, int_col]) 33 | ]]> 34 | 35 | 36 | 37 | 38 | 2.0)]]> 39 | 40 | 41 | ($7, 2.0:DECIMAL(2, 1))))]) 44 | +- LogicalTableScan(table=[[default_catalog, default_database, mongo]]) 45 | ]]> 46 | 47 | 48 | 2.0))]) 50 | +- TableSourceScan(table=[[default_catalog, default_database, mongo, filter=[=(id, 900001:BIGINT)], project=[boolean_col, timestamp3_col, int_col, decimal_col]]], fields=[boolean_col, timestamp3_col, int_col, decimal_col]) 51 | ]]> 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 64 | 65 | 66 | 70 | 71 | 72 | 73 | 74 | 1.0]]> 75 | 76 | 77 | ($7, 1.0:DECIMAL(2, 1)))]) 80 | +- LogicalTableScan(table=[[default_catalog, default_database, mongo]]) 81 | ]]> 82 | 83 | 84 | 1.0))]) 86 | +- TableSourceScan(table=[[default_catalog, default_database, mongo, filter=[], project=[id, timestamp3_col, int_col, decimal_col]]], fields=[id, timestamp3_col, int_col, decimal_col]) 87 | ]]> 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /flink-sql-connector-mongodb/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 21 | 22 | 4.0.0 23 | 24 | 25 | flink-connector-mongodb-parent 26 | org.apache.flink 27 | 2.1-SNAPSHOT 28 | 29 | 30 | flink-sql-connector-mongodb 31 | Flink : Connectors : SQL : MongoDB 32 | 33 | jar 34 | 35 | 36 | true 37 | 38 | 39 | 40 | 41 | org.apache.flink 42 | flink-connector-mongodb 43 | ${project.version} 44 | 45 | 46 | 47 | 48 | 49 | 50 | org.apache.maven.plugins 51 | maven-shade-plugin 52 | 53 | 54 | shade-flink 55 | package 56 | 57 | shade 58 | 59 | 60 | 61 | 62 | *:* 63 | 64 | 65 | 66 | 67 | org.mongodb:mongodb-driver-core 68 | 69 | META-INF/native-image/** 70 | 71 | 72 | 73 | 74 | 75 | com.mongodb 76 | org.apache.flink.mongodb.shaded.com.mongodb 77 | 78 | 79 | org.bson 80 | org.apache.flink.mongodb.shaded.org.bson 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /flink-sql-connector-mongodb/src/main/resources/META-INF/NOTICE: -------------------------------------------------------------------------------- 1 | flink-sql-connector-mongodb 2 | Copyright 2014-2025 The Apache Software Foundation 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | 7 | This project bundles the following dependencies under the Apache Software License 2.0. (http://www.apache.org/licenses/LICENSE-2.0.txt) 8 | 9 | - org.mongodb:bson:5.1.1 10 | - org.mongodb:mongodb-driver-core:5.1.1 11 | - org.mongodb:mongodb-driver-sync:5.1.1 12 | -------------------------------------------------------------------------------- /tools/ci/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | rootLogger.level = INFO 20 | rootLogger.appenderRef.out.ref = ConsoleAppender 21 | 22 | # ----------------------------------------------------------------------------- 23 | # Console (use 'console') 24 | # ----------------------------------------------------------------------------- 25 | 26 | appender.console.name = ConsoleAppender 27 | appender.console.type = CONSOLE 28 | appender.console.layout.type = PatternLayout 29 | appender.console.layout.pattern = %d{HH:mm:ss,SSS} [%20t] %-5p %-60c %x - %m%n 30 | 31 | # ----------------------------------------------------------------------------- 32 | # File (use 'file') 33 | # ----------------------------------------------------------------------------- 34 | appender.file.name = FileAppender 35 | appender.file.type = FILE 36 | appender.file.fileName = ${sys:log.dir}/mvn-${sys:mvn.forkNumber:-output}.log 37 | appender.file.layout.type = PatternLayout 38 | appender.file.layout.pattern = %d{HH:mm:ss,SSS} [%20t] %-5p %-60c %x - %m%n 39 | appender.file.createOnDemand = true 40 | 41 | # suppress the irrelevant (wrong) warnings from the netty channel handler 42 | logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline 43 | logger.netty.level = ERROR 44 | -------------------------------------------------------------------------------- /tools/maven/suppressions.xml: -------------------------------------------------------------------------------- 1 | 2 | 20 | 21 | 24 | 25 | 26 | 27 | --------------------------------------------------------------------------------