├── .asf.yaml ├── .editorconfig ├── .github ├── boring-cyborg.yml └── workflows │ ├── push_pr.yml │ └── weekly.yml ├── .gitignore ├── .gitmodules ├── .idea └── vcs.xml ├── LICENSE ├── NOTICE ├── README.md ├── docs ├── content.zh │ └── docs │ │ └── connectors │ │ └── datastream │ │ └── cassandra.md └── content │ └── docs │ └── connectors │ └── datastream │ └── cassandra.md ├── flink-connector-cassandra ├── archunit-violations │ ├── 01b274c9-e1ef-4fad-accd-703c7e6ad9f3 │ ├── 69754155-7c30-42a8-8fd3-c5a488d6d1b9 │ ├── 738e8069-6550-4700-a662-dcd027d3ca55 │ ├── 7c16ca6f-4479-46d5-80cf-4f766ee5b442 │ ├── b7279bb1-1eb7-40c0-931d-f6db7971d126 │ ├── d181ab66-6399-4468-b7f8-1263b90d7577 │ ├── dc1ba6f4-3d84-498c-a085-e02ba5936201 │ ├── dcfaa83d-a12c-48e1-9e51-b8d3808cd287 │ ├── ea12954c-9e1e-4db3-bd78-2f30ec06d270 │ └── stored.rules ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── apache │ │ │ └── flink │ │ │ ├── batch │ │ │ └── connectors │ │ │ │ └── cassandra │ │ │ │ ├── CassandraColumnarOutputFormatBase.java │ │ │ │ ├── CassandraInputFormat.java │ │ │ │ ├── CassandraInputFormatBase.java │ │ │ │ ├── CassandraOutputFormat.java │ │ │ │ ├── CassandraOutputFormatBase.java │ │ │ │ ├── CassandraPojoInputFormat.java │ │ │ │ ├── CassandraPojoOutputFormat.java │ │ │ │ ├── CassandraRowOutputFormat.java │ │ │ │ └── CassandraTupleOutputFormat.java │ │ │ ├── connector │ │ │ └── cassandra │ │ │ │ └── source │ │ │ │ ├── CassandraSource.java │ │ │ │ ├── enumerator │ │ │ │ ├── CassandraEnumeratorState.java │ │ │ │ ├── CassandraEnumeratorStateSerializer.java │ │ │ │ └── CassandraSplitEnumerator.java │ │ │ │ ├── reader │ │ │ │ ├── CassandraRecordEmitter.java │ │ │ │ ├── CassandraRow.java │ │ │ │ ├── CassandraSourceReader.java │ │ │ │ ├── CassandraSourceReaderFactory.java │ │ │ │ └── CassandraSplitReader.java │ │ │ │ ├── split │ │ │ │ ├── CassandraSplit.java │ │ │ │ ├── CassandraSplitSerializer.java │ │ │ │ └── SplitsGenerator.java │ │ │ │ └── utils │ │ │ │ └── BigIntegerSerializationUtils.java │ │ │ └── streaming │ │ │ └── connectors │ │ │ └── cassandra │ │ │ ├── AbstractCassandraTupleSink.java │ │ │ ├── CassandraAppendTableSink.java │ │ │ ├── CassandraCommitter.java │ │ │ ├── CassandraFailureHandler.java │ │ │ ├── CassandraPojoSink.java │ │ │ ├── CassandraRowSink.java │ │ │ ├── CassandraRowWriteAheadSink.java │ │ │ ├── CassandraScalaProductSink.java │ │ │ ├── CassandraSink.java │ │ │ ├── CassandraSinkBase.java │ │ │ ├── CassandraSinkBaseConfig.java │ │ │ ├── CassandraTupleSink.java │ │ │ ├── CassandraTupleWriteAheadSink.java │ │ │ ├── ClusterBuilder.java │ │ │ ├── MapperOptions.java │ │ │ ├── NoOpCassandraFailureHandler.java │ │ │ └── SimpleMapperOptions.java │ └── resources │ │ └── META-INF │ │ └── NOTICE │ └── test │ ├── java │ └── org │ │ └── apache │ │ └── flink │ │ ├── architecture │ │ ├── ProductionCodeArchitectureTest.java │ │ └── TestCodeArchitectureTest.java │ │ ├── batch │ │ └── connectors │ │ │ └── cassandra │ │ │ └── example │ │ │ ├── BatchExample.java │ │ │ └── BatchPojoExample.java │ │ ├── connector │ │ └── cassandra │ │ │ ├── CassandraTestEnvironment.java │ │ │ └── source │ │ │ ├── CassandraSourceITCase.java │ │ │ ├── CassandraTestContext.java │ │ │ ├── enumerator │ │ │ └── CassandraEnumeratorStateSerializerTest.java │ │ │ ├── reader │ │ │ └── CassandraQueryTest.java │ │ │ └── split │ │ │ └── CassandraSplitSerializerTest.java │ │ ├── connectors │ │ └── cassandra │ │ │ └── utils │ │ │ ├── Pojo.java │ │ │ └── ResultSetFutures.java │ │ └── streaming │ │ └── connectors │ │ └── cassandra │ │ ├── CassandraConnectorITCase.java │ │ ├── CassandraSinkBaseTest.java │ │ ├── CassandraTupleWriteAheadSinkTest.java │ │ ├── Pojo.java │ │ └── example │ │ ├── CassandraPojoSinkExample.java │ │ ├── CassandraTupleSinkExample.java │ │ ├── CassandraTupleWriteAheadSinkExample.java │ │ └── Message.java │ └── resources │ ├── META-INF │ └── services │ │ └── org.junit.jupiter.api.extension.Extension │ ├── archunit.properties │ └── log4j2-test.properties ├── pom.xml └── tools ├── ci └── log4j.properties └── maven ├── checkstyle.xml └── suppressions.xml /.asf.yaml: -------------------------------------------------------------------------------- 1 | github: 2 | enabled_merge_buttons: 3 | squash: true 4 | merge: false 5 | rebase: true 6 | labels: 7 | - flink 8 | - cassandra 9 | - connector 10 | - datastream 11 | autolink_jira: FLINK 12 | collaborators: 13 | - flinkbot 14 | notifications: 15 | commits: commits@flink.apache.org 16 | issues: issues@flink.apache.org 17 | pullrequests: issues@flink.apache.org 18 | jobs: builds@flink.apache.org 19 | jira_options: link label 20 | -------------------------------------------------------------------------------- /.github/boring-cyborg.yml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | labelPRBasedOnFilePath: 20 | component=BuildSystem: 21 | - .github/**/* 22 | - tools/maven/* 23 | 24 | component=Documentation: 25 | - docs/**/* 26 | 27 | component=Connectors/Cassandra: 28 | - flink-connector-cassandra*/**/* 29 | - flink-sql-connector-cassandra*/**/* 30 | 31 | ###### IssueLink Adder ################################################################################################# 32 | # Insert Issue (Jira/Github etc) link in PR description based on the Issue ID in PR title. 33 | insertIssueLinkInPrDescription: 34 | # specify the placeholder for the issue link that should be present in the description 35 | descriptionIssuePlaceholderRegexp: "^Issue link: (.*)$" 36 | matchers: 37 | # you can have several matches - for different types of issues 38 | # only the first matching entry is replaced 39 | jiraIssueMatch: 40 | # specify the regexp of issue id that you can find in the title of the PR 41 | # the match groups can be used to build the issue id (${1}, ${2}, etc.). 42 | titleIssueIdRegexp: \[(FLINK-[0-9]+)\] 43 | # the issue link to be added. ${1}, ${2} ... are replaced with the match groups from the 44 | # title match (remember to use quotes) 45 | descriptionIssueLink: "[${1}](https://issues.apache.org/jira/browse/${1}/)" 46 | docOnlyIssueMatch: 47 | titleIssueIdRegexp: \[hotfix\] 48 | descriptionIssueLink: "`Documentation only change, no JIRA issue`" 49 | 50 | ###### Title Validator ################################################################################################# 51 | # Verifies if commit/PR titles match the regexp specified 52 | verifyTitles: 53 | # Regular expression that should be matched by titles of commits or PR 54 | titleRegexp: ^\[FLINK-[0-9]+\].*$|^\[FLINK-XXXXX\].*$|^\[hotfix].*$ 55 | # If set to true, it will always check the PR title (as opposed to the individual commits). 56 | alwaysUsePrTitle: false 57 | # If set to true, it will only check the commit in case there is a single commit. 58 | # In case of multiple commits it will check PR title. 59 | # This reflects the standard behaviour of Github that for `Squash & Merge` GitHub 60 | # uses the PR title rather than commit messages for the squashed commit ¯\_(ツ)_/¯ 61 | # For single-commit PRs it takes the squashed commit message from the commit as expected. 62 | # 63 | # If set to false it will check all commit messages. This is useful when you do not squash commits at merge. 64 | validateEitherPrOrSingleCommitTitle: true 65 | # The title the GitHub status should appear from. 66 | statusTitle: "Title Validator" 67 | # A custom message to be displayed when the title passes validation. 68 | successMessage: "Validation successful!" 69 | # A custom message to be displayed when the title fails validation. 70 | # Allows insertion of ${type} (commit/PR), ${title} (the title validated) and ${regex} (the titleRegexp above). 71 | failureMessage: "Wrong ${type} title: ${title}" 72 | 73 | # Various Flags to control behaviour of the "Labeler" 74 | labelerFlags: 75 | # If this flag is changed to 'false', labels would only be added when the PR is first created 76 | # and not when existing PR is updated. 77 | # The default is 'true' which means the labels would be added when PR is updated even if they 78 | # were removed by the user 79 | labelOnPRUpdates: true 80 | 81 | # Comment to be posted to welcome users when they open their first PR 82 | firstPRWelcomeComment: > 83 | Thanks for opening this pull request! Please check out our contributing guidelines. (https://flink.apache.org/contributing/how-to-contribute.html) 84 | 85 | # Comment to be posted to congratulate user on their first merged PR 86 | firstPRMergeComment: > 87 | Awesome work, congrats on your first merged pull request! 88 | -------------------------------------------------------------------------------- /.github/workflows/push_pr.yml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | name: Build flink-connector-cassandra 20 | on: [push, pull_request] 21 | concurrency: 22 | group: ${{ github.workflow }}-${{ github.ref }} 23 | cancel-in-progress: true 24 | jobs: 25 | compile_and_test: 26 | strategy: 27 | matrix: 28 | flink: [ 1.19.0 ] 29 | include: 30 | - flink: 1.18.1 31 | 32 | uses: apache/flink-connector-shared-utils/.github/workflows/ci.yml@ci_utils 33 | with: 34 | flink_version: ${{ matrix.flink }} 35 | jdk_version: ${{ matrix.jdk || '8, 11, 17' }} 36 | 37 | 38 | -------------------------------------------------------------------------------- /.github/workflows/weekly.yml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | name: Weekly 20 | on: 21 | schedule: 22 | - cron: "0 0 * * 0" 23 | workflow_dispatch: 24 | jobs: 25 | compile_and_test: 26 | if: github.repository_owner == 'apache' 27 | strategy: 28 | matrix: 29 | flink_branches: [{ 30 | flink: 1.18-SNAPSHOT, 31 | branch: main 32 | }, { 33 | flink: 1.19-SNAPSHOT, 34 | branch: main 35 | }, { 36 | flink: 1.20-SNAPSHOT, 37 | branch: main 38 | }] 39 | uses: apache/flink-connector-shared-utils/.github/workflows/ci.yml@ci_utils 40 | with: 41 | flink_version: ${{ matrix.flink_branches.flink }} 42 | connector_branch: ${{ matrix.flink_branches.branch }} 43 | jdk_version: ${{ matrix.flink_branches.jdk || '8, 11, 17' }} 44 | run_dependency_convergence: false 45 | 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .eslintcache 2 | .cache 3 | scalastyle-output.xml 4 | .classpath 5 | .idea/* 6 | !.idea/vcs.xml 7 | .metadata 8 | .settings 9 | .project 10 | .version.properties 11 | filter.properties 12 | logs.zip 13 | .mvn/wrapper/*.jar 14 | target 15 | tmp 16 | *.class 17 | *.iml 18 | *.swp 19 | *.jar 20 | *.zip 21 | *.log 22 | *.pyc 23 | .DS_Store 24 | build-target 25 | atlassian-ide-plugin.xml 26 | out/ 27 | /docs/api 28 | /docs/.bundle 29 | /docs/.rubydeps 30 | /docs/ruby2/.bundle 31 | /docs/ruby2/.rubydeps 32 | /docs/.jekyll-metadata 33 | *.ipr 34 | *.iws 35 | tools/flink 36 | tools/flink-* 37 | tools/releasing/release 38 | tools/japicmp-output 39 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tools/releasing/shared"] 2 | path = tools/releasing/shared 3 | url = https://github.com/apache/flink-connector-shared-utils 4 | branch = release_utils 5 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Apache Flink Cassandra Connector 2 | Copyright 2014-2024 The Apache Software Foundation 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | 7 | Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby 8 | granted, provided that this permission notice appear in all copies. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING 11 | ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, 12 | DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 13 | WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE 14 | USE OR PERFORMANCE OF THIS SOFTWARE. 15 | 16 | 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Apache Flink Cassandra Connector 2 | 3 | This repository contains the official Apache Flink Cassandra connector. 4 | 5 | ## Apache Flink 6 | 7 | Apache Flink is an open source stream processing framework with powerful stream- and batch-processing capabilities. 8 | 9 | Learn more about Flink at [https://flink.apache.org/](https://flink.apache.org/) 10 | 11 | ## Building the Apache Flink Cassandra Connector from Source 12 | 13 | Prerequisites: 14 | 15 | * Unix-like environment (we use Linux, Mac OS X) 16 | * Git 17 | * Maven (we recommend version 3.8.6) 18 | * Java 11 19 | 20 | ``` 21 | git clone https://github.com/apache/flink-connector-cassandra.git 22 | cd flink-connector-cassandra 23 | mvn clean package -DskipTests 24 | ``` 25 | 26 | The resulting jars can be found in the `target` directory of the respective module. 27 | 28 | ## Developing Flink 29 | 30 | The Flink committers use IntelliJ IDEA to develop the Flink codebase. 31 | We recommend IntelliJ IDEA for developing projects that involve Scala code. 32 | 33 | Minimal requirements for an IDE are: 34 | * Support for Java and Scala (also mixed projects) 35 | * Support for Maven with Java and Scala 36 | 37 | ### IntelliJ IDEA 38 | 39 | The IntelliJ IDE supports Maven out of the box and offers a plugin for Scala development. 40 | 41 | * IntelliJ download: [https://www.jetbrains.com/idea/](https://www.jetbrains.com/idea/) 42 | * IntelliJ Scala Plugin: [https://plugins.jetbrains.com/plugin/?id=1347](https://plugins.jetbrains.com/plugin/?id=1347) 43 | 44 | Check out our [Setting up IntelliJ](https://nightlies.apache.org/flink/flink-docs-master/flinkDev/ide_setup.html#intellij-idea) guide for details. 45 | 46 | ## Support 47 | 48 | Don’t hesitate to ask! 49 | 50 | Contact the developers and community on the [mailing lists](https://flink.apache.org/community.html#mailing-lists) if you need any help. 51 | 52 | [Open an issue](https://issues.apache.org/jira/browse/FLINK) if you found a bug in Flink. 53 | 54 | ## Documentation 55 | 56 | The documentation of Apache Flink is located on the website: [https://flink.apache.org](https://flink.apache.org) 57 | or in the `docs/` directory of the source code. 58 | 59 | ## Fork and Contribute 60 | 61 | This is an active open-source project. We are always open to people who want to use the system or contribute to it. 62 | Contact us if you are looking for implementation tasks that fit your skills. 63 | This article describes [how to contribute to Apache Flink](https://flink.apache.org/contributing/how-to-contribute.html). 64 | 65 | ## About 66 | 67 | Apache Flink is an open source project of The Apache Software Foundation (ASF). 68 | The Apache Flink project originated from the [Stratosphere](http://stratosphere.eu) research project. 69 | -------------------------------------------------------------------------------- /flink-connector-cassandra/archunit-violations/01b274c9-e1ef-4fad-accd-703c7e6ad9f3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/flink-connector-cassandra/d9c2e0fdab897c780a48f9d23c1b17688e7a2418/flink-connector-cassandra/archunit-violations/01b274c9-e1ef-4fad-accd-703c7e6ad9f3 -------------------------------------------------------------------------------- /flink-connector-cassandra/archunit-violations/69754155-7c30-42a8-8fd3-c5a488d6d1b9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/flink-connector-cassandra/d9c2e0fdab897c780a48f9d23c1b17688e7a2418/flink-connector-cassandra/archunit-violations/69754155-7c30-42a8-8fd3-c5a488d6d1b9 -------------------------------------------------------------------------------- /flink-connector-cassandra/archunit-violations/738e8069-6550-4700-a662-dcd027d3ca55: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/flink-connector-cassandra/d9c2e0fdab897c780a48f9d23c1b17688e7a2418/flink-connector-cassandra/archunit-violations/738e8069-6550-4700-a662-dcd027d3ca55 -------------------------------------------------------------------------------- /flink-connector-cassandra/archunit-violations/7c16ca6f-4479-46d5-80cf-4f766ee5b442: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/flink-connector-cassandra/d9c2e0fdab897c780a48f9d23c1b17688e7a2418/flink-connector-cassandra/archunit-violations/7c16ca6f-4479-46d5-80cf-4f766ee5b442 -------------------------------------------------------------------------------- /flink-connector-cassandra/archunit-violations/b7279bb1-1eb7-40c0-931d-f6db7971d126: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/flink-connector-cassandra/d9c2e0fdab897c780a48f9d23c1b17688e7a2418/flink-connector-cassandra/archunit-violations/b7279bb1-1eb7-40c0-931d-f6db7971d126 -------------------------------------------------------------------------------- /flink-connector-cassandra/archunit-violations/d181ab66-6399-4468-b7f8-1263b90d7577: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/flink-connector-cassandra/d9c2e0fdab897c780a48f9d23c1b17688e7a2418/flink-connector-cassandra/archunit-violations/d181ab66-6399-4468-b7f8-1263b90d7577 -------------------------------------------------------------------------------- /flink-connector-cassandra/archunit-violations/dc1ba6f4-3d84-498c-a085-e02ba5936201: -------------------------------------------------------------------------------- 1 | org.apache.flink.streaming.connectors.cassandra.CassandraConnectorITCase does not satisfy: only one of the following predicates match:\ 2 | * reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ 3 | * reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension or are , and of type MiniClusterTestEnvironment and annotated with @TestEnv\ 4 | * reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ 5 | * reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ 6 | or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule -------------------------------------------------------------------------------- /flink-connector-cassandra/archunit-violations/dcfaa83d-a12c-48e1-9e51-b8d3808cd287: -------------------------------------------------------------------------------- 1 | Constructor (org.apache.flink.streaming.connectors.cassandra.ClusterBuilder, long, java.lang.Class, java.lang.String, org.apache.flink.streaming.connectors.cassandra.MapperOptions)> calls method in (CassandraSource.java:138) 2 | Constructor (org.apache.flink.streaming.connectors.cassandra.ClusterBuilder, long, java.lang.Class, java.lang.String, org.apache.flink.streaming.connectors.cassandra.MapperOptions)> calls method in (CassandraSource.java:124) 3 | Constructor (org.apache.flink.streaming.connectors.cassandra.ClusterBuilder, long, java.lang.Class, java.lang.String, org.apache.flink.streaming.connectors.cassandra.MapperOptions)> calls method in (CassandraSource.java:125) 4 | Constructor (org.apache.flink.streaming.connectors.cassandra.ClusterBuilder, long, java.lang.Class, java.lang.String, org.apache.flink.streaming.connectors.cassandra.MapperOptions)> calls method in (CassandraSource.java:126) 5 | Constructor (org.apache.flink.streaming.connectors.cassandra.ClusterBuilder, long, java.lang.Class, java.lang.String, org.apache.flink.streaming.connectors.cassandra.MapperOptions)> calls method in (CassandraSource.java:127) 6 | Method calls method in (CassandraSource.java:145) 7 | Method calls method in (CassandraSource.java:149) 8 | Method is annotated with in (CassandraSource.java:0) 9 | Method is annotated with in (CassandraSplitReader.java:0) 10 | Method is annotated with in (SplitsGenerator.java:0) -------------------------------------------------------------------------------- /flink-connector-cassandra/archunit-violations/ea12954c-9e1e-4db3-bd78-2f30ec06d270: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/flink-connector-cassandra/d9c2e0fdab897c780a48f9d23c1b17688e7a2418/flink-connector-cassandra/archunit-violations/ea12954c-9e1e-4db3-bd78-2f30ec06d270 -------------------------------------------------------------------------------- /flink-connector-cassandra/archunit-violations/stored.rules: -------------------------------------------------------------------------------- 1 | # 2 | #Thu Jun 08 12:56:56 CEST 2023 3 | Classes\ in\ API\ packages\ should\ have\ at\ least\ one\ API\ visibility\ annotation.=ea12954c-9e1e-4db3-bd78-2f30ec06d270 4 | ITCASE\ tests\ should\ use\ a\ MiniCluster\ resource\ or\ extension=dc1ba6f4-3d84-498c-a085-e02ba5936201 5 | Tests\ inheriting\ from\ AbstractTestBase\ should\ have\ name\ ending\ with\ ITCase=b7279bb1-1eb7-40c0-931d-f6db7971d126 6 | Options\ for\ connectors\ and\ formats\ should\ reside\ in\ a\ consistent\ package\ and\ be\ public\ API.=69754155-7c30-42a8-8fd3-c5a488d6d1b9 7 | Production\ code\ must\ not\ call\ methods\ annotated\ with\ @VisibleForTesting=d181ab66-6399-4468-b7f8-1263b90d7577 8 | Connector\ production\ code\ must\ depend\ only\ on\ public\ API\ when\ outside\ of\ connector\ packages=dcfaa83d-a12c-48e1-9e51-b8d3808cd287 9 | Return\ and\ argument\ types\ of\ methods\ annotated\ with\ @Public\ must\ be\ annotated\ with\ @Public.=01b274c9-e1ef-4fad-accd-703c7e6ad9f3 10 | Return\ and\ argument\ types\ of\ methods\ annotated\ with\ @PublicEvolving\ must\ be\ annotated\ with\ @Public(Evolving).=738e8069-6550-4700-a662-dcd027d3ca55 11 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/batch/connectors/cassandra/CassandraColumnarOutputFormatBase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.batch.connectors.cassandra; 19 | 20 | import org.apache.flink.streaming.connectors.cassandra.ClusterBuilder; 21 | import org.apache.flink.util.Preconditions; 22 | 23 | import com.datastax.driver.core.PreparedStatement; 24 | import com.datastax.driver.core.ResultSet; 25 | import com.datastax.driver.core.ResultSetFuture; 26 | import com.google.common.base.Strings; 27 | 28 | import java.time.Duration; 29 | import java.util.concurrent.CompletionStage; 30 | 31 | /** 32 | * CassandraColumnarOutputFormatBase is the common abstract class for writing into Apache Cassandra 33 | * using column based output formats. 34 | * 35 | * @param Type of the elements to write. 36 | */ 37 | abstract class CassandraColumnarOutputFormatBase 38 | extends CassandraOutputFormatBase { 39 | private final String insertQuery; 40 | private transient PreparedStatement prepared; 41 | 42 | public CassandraColumnarOutputFormatBase( 43 | String insertQuery, 44 | ClusterBuilder builder, 45 | int maxConcurrentRequests, 46 | Duration maxConcurrentRequestsTimeout) { 47 | super(builder, maxConcurrentRequests, maxConcurrentRequestsTimeout); 48 | Preconditions.checkArgument( 49 | !Strings.isNullOrEmpty(insertQuery), "Query cannot be null or empty"); 50 | this.insertQuery = insertQuery; 51 | } 52 | 53 | @Override 54 | protected void postOpen() { 55 | super.postOpen(); 56 | this.prepared = session.prepare(insertQuery); 57 | } 58 | 59 | @Override 60 | protected CompletionStage send(OUT record) { 61 | Object[] fields = extractFields(record); 62 | final ResultSetFuture result = session.executeAsync(prepared.bind(fields)); 63 | return listenableFutureToCompletableFuture(result); 64 | } 65 | 66 | protected abstract Object[] extractFields(OUT record); 67 | } 68 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/batch/connectors/cassandra/CassandraInputFormat.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.batch.connectors.cassandra; 19 | 20 | import org.apache.flink.api.java.tuple.Tuple; 21 | import org.apache.flink.core.io.InputSplit; 22 | import org.apache.flink.streaming.connectors.cassandra.ClusterBuilder; 23 | 24 | import com.datastax.driver.core.ResultSet; 25 | import com.datastax.driver.core.Row; 26 | 27 | import java.io.IOException; 28 | 29 | /** 30 | * InputFormat to read data from Apache Cassandra and generate ${@link Tuple}. 31 | * 32 | * @param type of Tuple 33 | */ 34 | public class CassandraInputFormat extends CassandraInputFormatBase { 35 | 36 | private static final long serialVersionUID = 3642323148032444264L; 37 | private transient ResultSet resultSet; 38 | 39 | public CassandraInputFormat(String query, ClusterBuilder builder) { 40 | super(query, builder); 41 | } 42 | 43 | /** 44 | * Opens a Session and executes the query. 45 | * 46 | * @param ignored because parameter is not parallelizable. 47 | * @throws IOException 48 | */ 49 | @Override 50 | public void open(InputSplit ignored) throws IOException { 51 | this.session = cluster.connect(); 52 | this.resultSet = session.execute(query); 53 | } 54 | 55 | @Override 56 | public boolean reachedEnd() throws IOException { 57 | return resultSet.isExhausted(); 58 | } 59 | 60 | @Override 61 | public OUT nextRecord(OUT reuse) throws IOException { 62 | final Row item = resultSet.one(); 63 | for (int i = 0; i < reuse.getArity(); i++) { 64 | reuse.setField(item.getObject(i), i); 65 | } 66 | return reuse; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/batch/connectors/cassandra/CassandraInputFormatBase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.batch.connectors.cassandra; 19 | 20 | import org.apache.flink.api.common.io.DefaultInputSplitAssigner; 21 | import org.apache.flink.api.common.io.NonParallelInput; 22 | import org.apache.flink.api.common.io.RichInputFormat; 23 | import org.apache.flink.api.common.io.statistics.BaseStatistics; 24 | import org.apache.flink.configuration.Configuration; 25 | import org.apache.flink.core.io.GenericInputSplit; 26 | import org.apache.flink.core.io.InputSplit; 27 | import org.apache.flink.core.io.InputSplitAssigner; 28 | import org.apache.flink.streaming.connectors.cassandra.ClusterBuilder; 29 | import org.apache.flink.util.Preconditions; 30 | 31 | import com.datastax.driver.core.Cluster; 32 | import com.datastax.driver.core.Session; 33 | import com.google.common.base.Strings; 34 | import org.slf4j.Logger; 35 | import org.slf4j.LoggerFactory; 36 | 37 | /** 38 | * Base class for {@link RichInputFormat} to read data from Apache Cassandra and generate a custom 39 | * Cassandra annotated object. 40 | * 41 | * @param type of inputClass 42 | */ 43 | public abstract class CassandraInputFormatBase extends RichInputFormat 44 | implements NonParallelInput { 45 | private static final long serialVersionUID = -1519372881115104601L; 46 | protected final Logger logger = LoggerFactory.getLogger(getClass()); 47 | 48 | protected final String query; 49 | private final ClusterBuilder builder; 50 | 51 | protected transient Cluster cluster; 52 | protected transient Session session; 53 | 54 | public CassandraInputFormatBase(String query, ClusterBuilder builder) { 55 | Preconditions.checkArgument(!Strings.isNullOrEmpty(query), "Query cannot be null or empty"); 56 | Preconditions.checkNotNull(builder, "Builder cannot be null"); 57 | 58 | this.query = query; 59 | this.builder = builder; 60 | } 61 | 62 | @Override 63 | public void configure(Configuration parameters) { 64 | this.cluster = builder.getCluster(); 65 | } 66 | 67 | @Override 68 | public BaseStatistics getStatistics(BaseStatistics cachedStatistics) { 69 | return cachedStatistics; 70 | } 71 | 72 | @Override 73 | public InputSplit[] createInputSplits(int minNumSplits) { 74 | return new GenericInputSplit[] {new GenericInputSplit(0, 1)}; 75 | } 76 | 77 | @Override 78 | public InputSplitAssigner getInputSplitAssigner(InputSplit[] inputSplits) { 79 | return new DefaultInputSplitAssigner(inputSplits); 80 | } 81 | 82 | /** Closes all resources used. */ 83 | @Override 84 | public void close() { 85 | try { 86 | if (session != null) { 87 | session.close(); 88 | } 89 | } catch (Exception e) { 90 | logger.error("Error while closing session.", e); 91 | } 92 | 93 | try { 94 | if (cluster != null) { 95 | cluster.close(); 96 | } 97 | } catch (Exception e) { 98 | logger.error("Error while closing cluster.", e); 99 | } 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/batch/connectors/cassandra/CassandraOutputFormat.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.batch.connectors.cassandra; 19 | 20 | import org.apache.flink.api.java.tuple.Tuple; 21 | import org.apache.flink.streaming.connectors.cassandra.ClusterBuilder; 22 | 23 | /** 24 | * OutputFormat to write Flink {@link Tuple}s into a Cassandra cluster. 25 | * 26 | * @param Type of {@link Tuple} to write to Cassandra. 27 | * @deprecated Please use CassandraTupleOutputFormat instead. 28 | */ 29 | @Deprecated 30 | public class CassandraOutputFormat extends CassandraTupleOutputFormat { 31 | 32 | public CassandraOutputFormat(String insertQuery, ClusterBuilder builder) { 33 | super(insertQuery, builder); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/batch/connectors/cassandra/CassandraOutputFormatBase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.batch.connectors.cassandra; 19 | 20 | import org.apache.flink.api.common.io.OutputFormatBase; 21 | import org.apache.flink.configuration.Configuration; 22 | import org.apache.flink.streaming.connectors.cassandra.ClusterBuilder; 23 | import org.apache.flink.util.Preconditions; 24 | 25 | import com.datastax.driver.core.Cluster; 26 | import com.datastax.driver.core.Session; 27 | import com.google.common.util.concurrent.FutureCallback; 28 | import com.google.common.util.concurrent.Futures; 29 | import com.google.common.util.concurrent.ListenableFuture; 30 | import org.slf4j.Logger; 31 | import org.slf4j.LoggerFactory; 32 | 33 | import javax.annotation.Nullable; 34 | 35 | import java.time.Duration; 36 | import java.util.concurrent.CompletableFuture; 37 | 38 | /** 39 | * CassandraOutputFormatBase is the common abstract class for writing into Apache Cassandra using 40 | * output formats. 41 | * 42 | *

In case of experiencing the following error: {@code Error while sending value. 43 | * com.datastax.driver.core.exceptions.WriteTimeoutException: Cassandra timeout during write query 44 | * at consistency LOCAL_ONE (1 replica were required but only 0 acknowledged the write)}, 45 | * 46 | *

it is recommended to increase the Cassandra write timeout to adapt to your workload in your 47 | * Cassandra cluster so that such timeout errors do not happen. For that you need to raise 48 | * write_request_timeout_in_ms conf parameter in your cassandra.yml. Indeed, This exception means 49 | * that Cassandra coordinator node (internal Cassandra) waited too long for an internal replication 50 | * (replication to another node and did not ack the write. It is not recommended to lower the 51 | * replication factor in your Cassandra cluster because it is mandatory that you do not loose data 52 | * in case of a Cassandra cluster failure. Waiting for a single replica for write acknowledge is the 53 | * minimum level for this guarantee in Cassandra.} 54 | * 55 | * @param Type of the elements to write. 56 | */ 57 | abstract class CassandraOutputFormatBase extends OutputFormatBase { 58 | private static final Logger LOG = LoggerFactory.getLogger(CassandraOutputFormatBase.class); 59 | 60 | private final ClusterBuilder builder; 61 | private transient Cluster cluster; 62 | protected transient Session session; 63 | 64 | public CassandraOutputFormatBase( 65 | ClusterBuilder builder, 66 | int maxConcurrentRequests, 67 | Duration maxConcurrentRequestsTimeout) { 68 | super(maxConcurrentRequests, maxConcurrentRequestsTimeout); 69 | Preconditions.checkNotNull(builder, "Builder cannot be null"); 70 | this.builder = builder; 71 | } 72 | 73 | /** Configure the connection to Cassandra. */ 74 | @Override 75 | public void configure(Configuration parameters) { 76 | this.cluster = builder.getCluster(); 77 | } 78 | 79 | /** Opens a Session to Cassandra . */ 80 | @Override 81 | protected void postOpen() { 82 | this.session = cluster.connect(); 83 | } 84 | 85 | /** Closes all resources used by Cassandra connection. */ 86 | @Override 87 | protected void postClose() { 88 | try { 89 | if (session != null) { 90 | session.close(); 91 | } 92 | } catch (Exception e) { 93 | LOG.error("Error while closing session.", e); 94 | } 95 | try { 96 | if (cluster != null) { 97 | cluster.close(); 98 | } 99 | } catch (Exception e) { 100 | LOG.error("Error while closing cluster.", e); 101 | } 102 | } 103 | 104 | protected static CompletableFuture listenableFutureToCompletableFuture( 105 | final ListenableFuture listenableFuture) { 106 | CompletableFuture completable = new CompletableFuture(); 107 | Futures.addCallback(listenableFuture, new CompletableFutureCallback<>(completable)); 108 | return completable; 109 | } 110 | 111 | private static class CompletableFutureCallback implements FutureCallback { 112 | 113 | private final CompletableFuture completableFuture; 114 | 115 | public CompletableFutureCallback(CompletableFuture completableFuture) { 116 | this.completableFuture = completableFuture; 117 | } 118 | 119 | @Override 120 | public void onSuccess(@Nullable T result) { 121 | completableFuture.complete(result); 122 | } 123 | 124 | @Override 125 | public void onFailure(Throwable throwable) { 126 | completableFuture.completeExceptionally(throwable); 127 | } 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/batch/connectors/cassandra/CassandraPojoInputFormat.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.batch.connectors.cassandra; 19 | 20 | import org.apache.flink.core.io.InputSplit; 21 | import org.apache.flink.streaming.connectors.cassandra.ClusterBuilder; 22 | import org.apache.flink.streaming.connectors.cassandra.MapperOptions; 23 | import org.apache.flink.util.Preconditions; 24 | 25 | import com.datastax.driver.mapping.Mapper; 26 | import com.datastax.driver.mapping.MappingManager; 27 | import com.datastax.driver.mapping.Result; 28 | 29 | /** 30 | * InputFormat to read data from Apache Cassandra and generate a custom Cassandra annotated object. 31 | * 32 | * @param type of inputClass 33 | */ 34 | public class CassandraPojoInputFormat extends CassandraInputFormatBase { 35 | 36 | private static final long serialVersionUID = 1992091320180905115L; 37 | 38 | private transient Result resultSet; 39 | private final MapperOptions mapperOptions; 40 | private final Class inputClass; 41 | 42 | public CassandraPojoInputFormat(String query, ClusterBuilder builder, Class inputClass) { 43 | this(query, builder, inputClass, null); 44 | } 45 | 46 | public CassandraPojoInputFormat( 47 | String query, 48 | ClusterBuilder builder, 49 | Class inputClass, 50 | MapperOptions mapperOptions) { 51 | super(query, builder); 52 | this.mapperOptions = mapperOptions; 53 | this.inputClass = Preconditions.checkNotNull(inputClass, "InputClass cannot be null"); 54 | } 55 | 56 | @Override 57 | public void open(InputSplit split) { 58 | this.session = cluster.connect(); 59 | MappingManager manager = new MappingManager(session); 60 | 61 | Mapper mapper = manager.mapper(inputClass); 62 | 63 | if (mapperOptions != null) { 64 | Mapper.Option[] optionsArray = mapperOptions.getMapperOptions(); 65 | if (optionsArray != null) { 66 | mapper.setDefaultGetOptions(optionsArray); 67 | } 68 | } 69 | this.resultSet = mapper.map(session.execute(query)); 70 | } 71 | 72 | @Override 73 | public boolean reachedEnd() { 74 | return resultSet.isExhausted(); 75 | } 76 | 77 | @Override 78 | public OUT nextRecord(OUT reuse) { 79 | return resultSet.one(); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/batch/connectors/cassandra/CassandraPojoOutputFormat.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.batch.connectors.cassandra; 19 | 20 | import org.apache.flink.streaming.connectors.cassandra.ClusterBuilder; 21 | import org.apache.flink.streaming.connectors.cassandra.MapperOptions; 22 | import org.apache.flink.util.Preconditions; 23 | 24 | import com.datastax.driver.mapping.Mapper; 25 | import com.datastax.driver.mapping.MappingManager; 26 | import com.google.common.util.concurrent.ListenableFuture; 27 | 28 | import java.time.Duration; 29 | import java.util.concurrent.CompletionStage; 30 | 31 | /** 32 | * OutputFormat to write data to Apache Cassandra and from a custom Cassandra annotated object. 33 | * Please read the recommendations in {@linkplain CassandraOutputFormatBase}. 34 | * 35 | * @param type of outputClass 36 | */ 37 | public class CassandraPojoOutputFormat extends CassandraOutputFormatBase { 38 | 39 | private static final long serialVersionUID = -1701885135103942460L; 40 | 41 | private final MapperOptions mapperOptions; 42 | private final Class outputClass; 43 | private transient Mapper mapper; 44 | 45 | public CassandraPojoOutputFormat(ClusterBuilder builder, Class outputClass) { 46 | this(builder, outputClass, null); 47 | } 48 | 49 | public CassandraPojoOutputFormat( 50 | ClusterBuilder builder, Class outputClass, MapperOptions mapperOptions) { 51 | this( 52 | builder, 53 | outputClass, 54 | mapperOptions, 55 | Integer.MAX_VALUE, 56 | Duration.ofMillis(Long.MAX_VALUE)); 57 | } 58 | 59 | public CassandraPojoOutputFormat( 60 | ClusterBuilder builder, 61 | Class outputClass, 62 | MapperOptions mapperOptions, 63 | int maxConcurrentRequests, 64 | Duration maxConcurrentRequestsTimeout) { 65 | super(builder, maxConcurrentRequests, maxConcurrentRequestsTimeout); 66 | Preconditions.checkNotNull(outputClass, "OutputClass cannot be null"); 67 | this.mapperOptions = mapperOptions; 68 | this.outputClass = outputClass; 69 | } 70 | 71 | /** Opens a Session to Cassandra and initializes the prepared statement. */ 72 | @Override 73 | protected void postOpen() { 74 | super.postOpen(); 75 | MappingManager mappingManager = new MappingManager(session); 76 | this.mapper = mappingManager.mapper(outputClass); 77 | if (mapperOptions != null) { 78 | Mapper.Option[] optionsArray = mapperOptions.getMapperOptions(); 79 | if (optionsArray != null) { 80 | mapper.setDefaultSaveOptions(optionsArray); 81 | } 82 | } 83 | } 84 | 85 | @Override 86 | protected CompletionStage send(OUT record) { 87 | final ListenableFuture result = mapper.saveAsync(record); 88 | return listenableFutureToCompletableFuture(result); 89 | } 90 | 91 | /** Closes all resources used. */ 92 | @Override 93 | protected void postClose() { 94 | super.postClose(); 95 | mapper = null; 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/batch/connectors/cassandra/CassandraRowOutputFormat.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.batch.connectors.cassandra; 19 | 20 | import org.apache.flink.streaming.connectors.cassandra.ClusterBuilder; 21 | import org.apache.flink.types.Row; 22 | 23 | import java.time.Duration; 24 | 25 | /** 26 | * OutputFormat to write Flink {@link Row}s into a Cassandra cluster. * Please read the 27 | * recommendations in {@linkplain CassandraOutputFormatBase}. 28 | */ 29 | public class CassandraRowOutputFormat extends CassandraColumnarOutputFormatBase { 30 | 31 | public CassandraRowOutputFormat(String insertQuery, ClusterBuilder builder) { 32 | this(insertQuery, builder, Integer.MAX_VALUE, Duration.ofMillis(Long.MAX_VALUE)); 33 | } 34 | 35 | public CassandraRowOutputFormat( 36 | String insertQuery, 37 | ClusterBuilder builder, 38 | int maxConcurrentRequests, 39 | Duration maxConcurrentRequestsTimeout) { 40 | super(insertQuery, builder, maxConcurrentRequests, maxConcurrentRequestsTimeout); 41 | } 42 | 43 | @Override 44 | protected Object[] extractFields(Row record) { 45 | 46 | Object[] fields = new Object[record.getArity()]; 47 | for (int i = 0; i < fields.length; i++) { 48 | fields[i] = record.getField(i); 49 | } 50 | return fields; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/batch/connectors/cassandra/CassandraTupleOutputFormat.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.batch.connectors.cassandra; 19 | 20 | import org.apache.flink.api.java.tuple.Tuple; 21 | import org.apache.flink.streaming.connectors.cassandra.ClusterBuilder; 22 | 23 | import java.time.Duration; 24 | 25 | /** 26 | * OutputFormat to write Flink {@link Tuple}s into a Cassandra cluster. Please read the 27 | * recommendations in {@linkplain CassandraOutputFormatBase}. 28 | * 29 | * @param Type of {@link Tuple} to write to Cassandra. 30 | */ 31 | public class CassandraTupleOutputFormat 32 | extends CassandraColumnarOutputFormatBase { 33 | 34 | public CassandraTupleOutputFormat(String insertQuery, ClusterBuilder builder) { 35 | this(insertQuery, builder, Integer.MAX_VALUE, Duration.ofMillis(Long.MAX_VALUE)); 36 | } 37 | 38 | public CassandraTupleOutputFormat( 39 | String insertQuery, 40 | ClusterBuilder builder, 41 | int maxConcurrentRequests, 42 | Duration maxConcurrentRequestsTimeout) { 43 | super(insertQuery, builder, maxConcurrentRequests, maxConcurrentRequestsTimeout); 44 | } 45 | 46 | @Override 47 | protected Object[] extractFields(OUT record) { 48 | Object[] fields = new Object[record.getArity()]; 49 | for (int i = 0; i < fields.length; i++) { 50 | fields[i] = record.getField(i); 51 | } 52 | return fields; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/connector/cassandra/source/enumerator/CassandraEnumeratorState.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.connector.cassandra.source.enumerator; 20 | 21 | import org.apache.flink.connector.cassandra.source.split.CassandraSplit; 22 | import org.apache.flink.connector.cassandra.source.split.SplitsGenerator; 23 | 24 | import org.slf4j.Logger; 25 | import org.slf4j.LoggerFactory; 26 | 27 | import javax.annotation.Nullable; 28 | 29 | import java.math.BigInteger; 30 | import java.util.ArrayDeque; 31 | import java.util.Collection; 32 | import java.util.Objects; 33 | import java.util.Queue; 34 | 35 | /** 36 | * State for {@link CassandraSplitEnumerator}. It stores the offset ({@code startToken}) of the last 37 | * lazy {@link CassandraSplit} generation and the number of splits left to generate. Upon 38 | * restoration of this sate, {@link SplitsGenerator#prepareSplits()} is obviously not re-run. So we 39 | * need to store also the result of this initial splits preparation ({@code increment} and {@code 40 | * maxToken}). 41 | */ 42 | public class CassandraEnumeratorState { 43 | private static final Logger LOG = LoggerFactory.getLogger(CassandraEnumeratorState.class); 44 | private long numSplitsLeftToGenerate; 45 | private BigInteger increment; 46 | private BigInteger startToken; 47 | private BigInteger maxToken; 48 | // splits that were assigned to a failed reader and that were not part of a checkpoint, so after 49 | // restoration, they need to be reassigned. 50 | private final Queue splitsToReassign; 51 | 52 | CassandraEnumeratorState() { 53 | this.splitsToReassign = new ArrayDeque<>(); 54 | } 55 | 56 | public CassandraEnumeratorState( 57 | long numSplitsLeftToGenerate, 58 | BigInteger increment, 59 | BigInteger startToken, 60 | BigInteger maxToken, 61 | Queue splitsToReassign) { 62 | this.numSplitsLeftToGenerate = numSplitsLeftToGenerate; 63 | this.increment = increment; 64 | this.startToken = startToken; 65 | this.maxToken = maxToken; 66 | this.splitsToReassign = splitsToReassign; 67 | } 68 | 69 | Queue getSplitsToReassign() { 70 | return splitsToReassign; 71 | } 72 | 73 | public long getNumSplitsLeftToGenerate() { 74 | return numSplitsLeftToGenerate; 75 | } 76 | 77 | BigInteger getIncrement() { 78 | return increment; 79 | } 80 | 81 | BigInteger getStartToken() { 82 | return startToken; 83 | } 84 | 85 | BigInteger getMaxToken() { 86 | return maxToken; 87 | } 88 | 89 | void addSplitsBack(Collection splits) { 90 | LOG.info( 91 | "Add {} splits back to CassandraSplitEnumerator for reassignment after failover", 92 | splits.size()); 93 | splitsToReassign.addAll(splits); 94 | } 95 | 96 | /** 97 | * Provide a {@link CassandraSplit} that was assigned to a failed reader or lazily create one. 98 | * Splits contain a range of the Cassandra ring of {@code maxSplitMemorySize}. There is no way 99 | * to estimate the size of the data with the optional SQL filters without reading the data. So 100 | * the split can be smaller than {@code maxSplitMemorySize} when the query is actually executed. 101 | */ 102 | public @Nullable CassandraSplit getNextSplit() { 103 | // serve slits to reassign first 104 | final CassandraSplit splitToReassign = splitsToReassign.poll(); 105 | if (splitToReassign != null) { 106 | return splitToReassign; 107 | } // else no more splits to reassign, generate one 108 | if (numSplitsLeftToGenerate == 0) { 109 | return null; // enumerator will send the no more split message to the requesting reader 110 | } 111 | BigInteger endToken = 112 | numSplitsLeftToGenerate == 1 113 | // last split to generate, round up to the last token of the ring 114 | ? maxToken 115 | : startToken.add(increment); 116 | CassandraSplit split = new CassandraSplit(startToken, endToken); 117 | // prepare for next call 118 | this.startToken = endToken; 119 | numSplitsLeftToGenerate--; 120 | return split; 121 | } 122 | 123 | @Override 124 | public boolean equals(Object o) { 125 | if (this == o) { 126 | return true; 127 | } 128 | if (o == null || getClass() != o.getClass()) { 129 | return false; 130 | } 131 | CassandraEnumeratorState that = (CassandraEnumeratorState) o; 132 | if (this.splitsToReassign.size() != that.splitsToReassign.size()) { 133 | return false; 134 | } 135 | for (CassandraSplit cassandraSplit : splitsToReassign) { 136 | if (!that.splitsToReassign.contains(cassandraSplit)) { 137 | return false; 138 | } 139 | } 140 | return numSplitsLeftToGenerate == that.numSplitsLeftToGenerate 141 | && increment.equals(that.increment) 142 | && startToken.equals(that.startToken) 143 | && maxToken.equals(that.maxToken); 144 | } 145 | 146 | @Override 147 | public int hashCode() { 148 | return Objects.hash( 149 | numSplitsLeftToGenerate, increment, startToken, maxToken, splitsToReassign); 150 | } 151 | 152 | @Override 153 | public String toString() { 154 | return "CassandraEnumeratorState{" 155 | + "numSplitsLeftToGenerate=" 156 | + numSplitsLeftToGenerate 157 | + ", increment=" 158 | + increment 159 | + ", startToken=" 160 | + startToken 161 | + ", maxToken=" 162 | + maxToken 163 | + ", splitsToReassign=" 164 | + splitsToReassign 165 | + '}'; 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/connector/cassandra/source/enumerator/CassandraEnumeratorStateSerializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.connector.cassandra.source.enumerator; 20 | 21 | import org.apache.flink.connector.cassandra.source.split.CassandraSplit; 22 | import org.apache.flink.connector.cassandra.source.split.CassandraSplitSerializer; 23 | import org.apache.flink.connector.cassandra.source.utils.BigIntegerSerializationUtils; 24 | import org.apache.flink.core.io.SimpleVersionedSerializer; 25 | 26 | import java.io.ByteArrayInputStream; 27 | import java.io.ByteArrayOutputStream; 28 | import java.io.IOException; 29 | import java.io.ObjectInputStream; 30 | import java.io.ObjectOutputStream; 31 | import java.math.BigInteger; 32 | import java.util.ArrayDeque; 33 | import java.util.Queue; 34 | 35 | /** Serializer for {@link CassandraEnumeratorState}. */ 36 | public class CassandraEnumeratorStateSerializer 37 | implements SimpleVersionedSerializer { 38 | 39 | public static final CassandraEnumeratorStateSerializer INSTANCE = 40 | new CassandraEnumeratorStateSerializer(); 41 | public static final int CURRENT_VERSION = 0; 42 | 43 | private CassandraEnumeratorStateSerializer() {} 44 | 45 | @Override 46 | public int getVersion() { 47 | return CURRENT_VERSION; 48 | } 49 | 50 | @Override 51 | public byte[] serialize(CassandraEnumeratorState cassandraEnumeratorState) throws IOException { 52 | final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); 53 | try (final ObjectOutputStream objectOutputStream = 54 | new ObjectOutputStream(byteArrayOutputStream)) { 55 | final Queue splitsToReassign = 56 | cassandraEnumeratorState.getSplitsToReassign(); 57 | objectOutputStream.writeInt(splitsToReassign.size()); 58 | for (CassandraSplit cassandraSplit : splitsToReassign) { 59 | final byte[] serializedSplit = 60 | CassandraSplitSerializer.INSTANCE.serialize(cassandraSplit); 61 | objectOutputStream.writeInt(serializedSplit.length); 62 | objectOutputStream.write(serializedSplit); 63 | } 64 | 65 | objectOutputStream.writeLong(cassandraEnumeratorState.getNumSplitsLeftToGenerate()); 66 | BigIntegerSerializationUtils.write( 67 | cassandraEnumeratorState.getIncrement(), objectOutputStream); 68 | BigIntegerSerializationUtils.write( 69 | cassandraEnumeratorState.getStartToken(), objectOutputStream); 70 | BigIntegerSerializationUtils.write( 71 | cassandraEnumeratorState.getMaxToken(), objectOutputStream); 72 | } 73 | return byteArrayOutputStream.toByteArray(); 74 | } 75 | 76 | @Override 77 | public CassandraEnumeratorState deserialize(int version, byte[] serialized) throws IOException { 78 | try (final ByteArrayInputStream byteArrayInputStream = 79 | new ByteArrayInputStream(serialized); 80 | final ObjectInputStream objectInputStream = 81 | new ObjectInputStream(byteArrayInputStream)) { 82 | final Queue splitsToReassign = new ArrayDeque<>(); 83 | final int splitsToReassignSize = objectInputStream.readInt(); 84 | for (int i = 0; i < splitsToReassignSize; i++) { 85 | final int splitSize = objectInputStream.readInt(); 86 | final byte[] splitBytes = new byte[splitSize]; 87 | objectInputStream.readFully(splitBytes); 88 | final CassandraSplit split = 89 | CassandraSplitSerializer.INSTANCE.deserialize( 90 | CassandraSplitSerializer.CURRENT_VERSION, splitBytes); 91 | splitsToReassign.add(split); 92 | } 93 | 94 | final long numSplitsLeftToGenerate = objectInputStream.readLong(); 95 | final BigInteger increment = BigIntegerSerializationUtils.read(objectInputStream); 96 | final BigInteger startToken = BigIntegerSerializationUtils.read(objectInputStream); 97 | final BigInteger maxToken = BigIntegerSerializationUtils.read(objectInputStream); 98 | 99 | return new CassandraEnumeratorState( 100 | numSplitsLeftToGenerate, increment, startToken, maxToken, splitsToReassign); 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/connector/cassandra/source/enumerator/CassandraSplitEnumerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.connector.cassandra.source.enumerator; 20 | 21 | import org.apache.flink.api.connector.source.SplitEnumerator; 22 | import org.apache.flink.api.connector.source.SplitEnumeratorContext; 23 | import org.apache.flink.connector.cassandra.source.split.CassandraSplit; 24 | import org.apache.flink.connector.cassandra.source.split.SplitsGenerator; 25 | import org.apache.flink.streaming.connectors.cassandra.ClusterBuilder; 26 | 27 | import com.datastax.driver.core.Cluster; 28 | import com.datastax.driver.core.Session; 29 | import org.slf4j.Logger; 30 | import org.slf4j.LoggerFactory; 31 | 32 | import javax.annotation.Nullable; 33 | 34 | import java.io.IOException; 35 | import java.util.List; 36 | 37 | import static org.apache.flink.connector.cassandra.source.split.SplitsGenerator.CassandraPartitioner.MURMUR3PARTITIONER; 38 | import static org.apache.flink.connector.cassandra.source.split.SplitsGenerator.CassandraPartitioner.RANDOMPARTITIONER; 39 | 40 | /** {@link SplitEnumerator} that splits Cassandra cluster into {@link CassandraSplit}s. */ 41 | public final class CassandraSplitEnumerator 42 | implements SplitEnumerator { 43 | private static final Logger LOG = LoggerFactory.getLogger(CassandraSplitEnumerator.class); 44 | 45 | private final SplitEnumeratorContext enumeratorContext; 46 | private CassandraEnumeratorState state; 47 | private final Cluster cluster; 48 | private final Long maxSplitMemorySize; 49 | private final Session session; 50 | private final String keyspace; 51 | private final String table; 52 | 53 | public CassandraSplitEnumerator( 54 | SplitEnumeratorContext enumeratorContext, 55 | CassandraEnumeratorState state, 56 | ClusterBuilder clusterBuilder, 57 | Long maxSplitMemorySize, 58 | String keyspace, 59 | String table) { 60 | this.enumeratorContext = enumeratorContext; 61 | this.state = state == null ? new CassandraEnumeratorState() : state /* snapshot restore*/; 62 | this.cluster = clusterBuilder.getCluster(); 63 | this.maxSplitMemorySize = maxSplitMemorySize; 64 | this.session = cluster.newSession(); 65 | this.keyspace = keyspace; 66 | this.table = table; 67 | } 68 | 69 | @Override 70 | public void start() { 71 | enumeratorContext.callAsync( 72 | this::prepareSplits, 73 | (preparedState, throwable) -> { 74 | LOG.debug("Initialized CassandraEnumeratorState: {}", preparedState.toString()); 75 | state = preparedState; 76 | }); 77 | } 78 | 79 | private CassandraEnumeratorState prepareSplits() { 80 | final int parallelism = enumeratorContext.currentParallelism(); 81 | final String partitionerName = cluster.getMetadata().getPartitioner(); 82 | final SplitsGenerator.CassandraPartitioner partitioner = 83 | partitionerName.contains(MURMUR3PARTITIONER.getClassName()) 84 | ? MURMUR3PARTITIONER 85 | : RANDOMPARTITIONER; 86 | final SplitsGenerator splitsGenerator = 87 | new SplitsGenerator( 88 | partitioner, session, keyspace, table, parallelism, maxSplitMemorySize); 89 | return splitsGenerator.prepareSplits(); 90 | } 91 | 92 | @Override 93 | public void handleSplitRequest(int subtaskId, @Nullable String requesterHostname) { 94 | checkReaderRegistered(subtaskId); 95 | final CassandraSplit cassandraSplit = state.getNextSplit(); 96 | if (cassandraSplit != null) { 97 | LOG.info("Assigning splits to reader {}", subtaskId); 98 | enumeratorContext.assignSplit(cassandraSplit, subtaskId); 99 | } else { 100 | LOG.info( 101 | "No split assigned to reader {} because the enumerator has no unassigned split left. Sending NoMoreSplitsEvent to reader", 102 | subtaskId); 103 | enumeratorContext.signalNoMoreSplits(subtaskId); 104 | } 105 | } 106 | 107 | @Override 108 | public void addSplitsBack(List splits, int subtaskId) { 109 | // splits that were assigned to a failed reader and that were not part of a checkpoint, so 110 | // after restoration, they need to be reassigned 111 | state.addSplitsBack(splits); 112 | } 113 | 114 | @Override 115 | public void addReader(int subtaskId) { 116 | // nothing to do on reader registration as the CassandraSplits are generated lazily 117 | } 118 | 119 | private void checkReaderRegistered(int readerId) { 120 | if (!enumeratorContext.registeredReaders().containsKey(readerId)) { 121 | throw new IllegalStateException( 122 | String.format("Reader %d is not registered to source coordinator", readerId)); 123 | } 124 | } 125 | 126 | @Override 127 | public CassandraEnumeratorState snapshotState(long checkpointId) { 128 | return state; 129 | } 130 | 131 | @Override 132 | public void close() throws IOException { 133 | try { 134 | if (session != null) { 135 | session.close(); 136 | } 137 | } catch (Exception e) { 138 | LOG.error("Error while closing session.", e); 139 | } 140 | try { 141 | if (cluster != null) { 142 | cluster.close(); 143 | } 144 | } catch (Exception e) { 145 | LOG.error("Error while closing cluster.", e); 146 | } 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/connector/cassandra/source/reader/CassandraRecordEmitter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.connector.cassandra.source.reader; 20 | 21 | import org.apache.flink.api.connector.source.SourceOutput; 22 | import org.apache.flink.connector.base.source.reader.RecordEmitter; 23 | import org.apache.flink.connector.cassandra.source.split.CassandraSplit; 24 | 25 | import com.datastax.driver.core.ColumnDefinitions; 26 | import com.datastax.driver.core.ExecutionInfo; 27 | import com.datastax.driver.core.ResultSet; 28 | import com.datastax.driver.core.Row; 29 | import com.google.common.util.concurrent.Futures; 30 | import com.google.common.util.concurrent.ListenableFuture; 31 | 32 | import java.util.Collections; 33 | import java.util.Iterator; 34 | import java.util.List; 35 | import java.util.function.Function; 36 | 37 | /** 38 | * {@link RecordEmitter} that converts the {@link CassandraRow} read by the {@link 39 | * CassandraSplitReader} to specified POJO and output it. This class uses the Cassandra driver 40 | * mapper to map the row to the POJO. 41 | * 42 | * @param type of POJO record to output 43 | */ 44 | class CassandraRecordEmitter implements RecordEmitter { 45 | 46 | private final Function map; 47 | 48 | public CassandraRecordEmitter(Function map) { 49 | this.map = map; 50 | } 51 | 52 | @Override 53 | public void emitRecord( 54 | CassandraRow cassandraRow, SourceOutput output, CassandraSplit cassandraSplit) { 55 | // Mapping from a row to a Class is a complex operation involving reflection API. 56 | // It is better to use Cassandra mapper for it. 57 | // But the mapper takes only a resultSet as input hence forging one containing only the Row 58 | ResultSet resultSet = new SingleRowResultSet(cassandraRow); 59 | // output the pojo based on the cassandraRow 60 | output.collect(map.apply(resultSet)); 61 | } 62 | 63 | private static class SingleRowResultSet implements ResultSet { 64 | private final CassandraRow cassandraRow; 65 | private final Row row; 66 | 67 | private SingleRowResultSet(CassandraRow cassandraRow) { 68 | this.cassandraRow = cassandraRow; 69 | this.row = cassandraRow.getRow(); 70 | } 71 | 72 | @Override 73 | public Row one() { 74 | return row; 75 | } 76 | 77 | @Override 78 | public ColumnDefinitions getColumnDefinitions() { 79 | return row.getColumnDefinitions(); 80 | } 81 | 82 | @Override 83 | public boolean wasApplied() { 84 | return true; 85 | } 86 | 87 | @Override 88 | public boolean isExhausted() { 89 | return true; 90 | } 91 | 92 | @Override 93 | public boolean isFullyFetched() { 94 | return true; 95 | } 96 | 97 | @Override 98 | public int getAvailableWithoutFetching() { 99 | return 1; 100 | } 101 | 102 | @Override 103 | public ListenableFuture fetchMoreResults() { 104 | return Futures.immediateFuture(null); 105 | } 106 | 107 | @Override 108 | public List all() { 109 | return Collections.singletonList(row); 110 | } 111 | 112 | @Override 113 | public Iterator iterator() { 114 | return new Iterator() { 115 | 116 | @Override 117 | public boolean hasNext() { 118 | return true; 119 | } 120 | 121 | @Override 122 | public Row next() { 123 | return row; 124 | } 125 | }; 126 | } 127 | 128 | @Override 129 | public ExecutionInfo getExecutionInfo() { 130 | return cassandraRow.getExecutionInfo(); 131 | } 132 | 133 | @Override 134 | public List getAllExecutionInfo() { 135 | return Collections.singletonList(cassandraRow.getExecutionInfo()); 136 | } 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/connector/cassandra/source/reader/CassandraRow.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.connector.cassandra.source.reader; 20 | 21 | import com.datastax.driver.core.ExecutionInfo; 22 | import com.datastax.driver.core.Row; 23 | 24 | /** 25 | * Wrapper for Cassandra {@link Row} that stores {@link ExecutionInfo} Cassandra statistics about 26 | * the query execution that produced this row. {@link ExecutionInfo} is useful for using the 27 | * Cassandra mapper during row translation to pojo. 28 | */ 29 | public class CassandraRow { 30 | 31 | private final Row row; 32 | private final ExecutionInfo executionInfo; 33 | 34 | public CassandraRow(Row row, ExecutionInfo executionInfo) { 35 | this.row = row; 36 | this.executionInfo = executionInfo; 37 | } 38 | 39 | public Row getRow() { 40 | return row; 41 | } 42 | 43 | public ExecutionInfo getExecutionInfo() { 44 | return executionInfo; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/connector/cassandra/source/reader/CassandraSourceReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.connector.cassandra.source.reader; 20 | 21 | import org.apache.flink.api.connector.source.SourceReader; 22 | import org.apache.flink.api.connector.source.SourceReaderContext; 23 | import org.apache.flink.connector.base.source.reader.SingleThreadMultiplexSourceReaderBase; 24 | import org.apache.flink.connector.cassandra.source.split.CassandraSplit; 25 | 26 | import com.datastax.driver.core.Cluster; 27 | import com.datastax.driver.core.Session; 28 | import com.datastax.driver.mapping.Mapper; 29 | import org.slf4j.Logger; 30 | import org.slf4j.LoggerFactory; 31 | 32 | import java.util.Map; 33 | 34 | /** 35 | * Cassandra {@link SourceReader} that reads one {@link CassandraSplit} using a single thread. 36 | * 37 | * @param the type of elements produced by the source 38 | */ 39 | class CassandraSourceReader 40 | extends SingleThreadMultiplexSourceReaderBase< 41 | CassandraRow, OUT, CassandraSplit, CassandraSplit> { 42 | 43 | private static final Logger LOG = LoggerFactory.getLogger(CassandraSourceReader.class); 44 | 45 | private final Cluster cluster; 46 | private final Session session; 47 | 48 | // created by the factory 49 | CassandraSourceReader( 50 | SourceReaderContext context, 51 | String query, 52 | String keyspace, 53 | String table, 54 | Cluster cluster, 55 | Session session, 56 | Mapper mapper) { 57 | super( 58 | () -> new CassandraSplitReader(cluster, session, query, keyspace, table), 59 | new CassandraRecordEmitter<>(resultSet -> mapper.map(resultSet).one()), 60 | context.getConfiguration(), 61 | context); 62 | this.cluster = cluster; 63 | this.session = session; 64 | } 65 | 66 | @Override 67 | public void start() { 68 | context.sendSplitRequest(); 69 | } 70 | 71 | @Override 72 | protected void onSplitFinished(Map finishedSplitIds) { 73 | context.sendSplitRequest(); 74 | } 75 | 76 | @Override 77 | protected CassandraSplit initializedState(CassandraSplit cassandraSplit) { 78 | return cassandraSplit; 79 | } 80 | 81 | @Override 82 | protected CassandraSplit toSplitType(String splitId, CassandraSplit cassandraSplit) { 83 | return cassandraSplit; 84 | } 85 | 86 | @Override 87 | public void close() throws Exception { 88 | super.close(); 89 | try { 90 | if (session != null) { 91 | session.close(); 92 | } 93 | } catch (Exception e) { 94 | LOG.error("Error while closing session.", e); 95 | } 96 | try { 97 | if (cluster != null) { 98 | cluster.close(); 99 | } 100 | } catch (Exception e) { 101 | LOG.error("Error while closing cluster.", e); 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/connector/cassandra/source/reader/CassandraSourceReaderFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.connector.cassandra.source.reader; 20 | 21 | import org.apache.flink.api.connector.source.SourceReaderContext; 22 | import org.apache.flink.streaming.connectors.cassandra.ClusterBuilder; 23 | import org.apache.flink.streaming.connectors.cassandra.MapperOptions; 24 | 25 | import com.datastax.driver.core.Cluster; 26 | import com.datastax.driver.core.Session; 27 | import com.datastax.driver.mapping.Mapper; 28 | import com.datastax.driver.mapping.MappingManager; 29 | 30 | /** 31 | * Factory to create {@link CassandraSourceReader}s and allow sharing the cluster and the session 32 | * objects. 33 | */ 34 | public class CassandraSourceReaderFactory { 35 | public CassandraSourceReader create( 36 | SourceReaderContext context, 37 | ClusterBuilder clusterBuilder, 38 | Class pojoClass, 39 | String query, 40 | String keyspace, 41 | String table, 42 | MapperOptions mapperOptions) { 43 | Cluster cluster = clusterBuilder.getCluster(); 44 | Session session = cluster.connect(); 45 | Mapper mapper = new MappingManager(session).mapper(pojoClass); 46 | if (mapperOptions != null) { 47 | Mapper.Option[] optionsArray = mapperOptions.getMapperOptions(); 48 | if (optionsArray != null) { 49 | mapper.setDefaultGetOptions(optionsArray); 50 | } 51 | } 52 | return new CassandraSourceReader<>( 53 | context, query, keyspace, table, cluster, session, mapper); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/connector/cassandra/source/split/CassandraSplit.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.connector.cassandra.source.split; 20 | 21 | import org.apache.flink.api.connector.source.SourceSplit; 22 | 23 | import java.io.Serializable; 24 | import java.math.BigInteger; 25 | 26 | /** 27 | * Immutable {@link SourceSplit} for Cassandra source. A Cassandra split is a slice of the Cassandra 28 | * tokens ring (i.e. a ringRange). 29 | */ 30 | public class CassandraSplit implements SourceSplit, Serializable { 31 | 32 | private final BigInteger ringRangeStart; 33 | private final BigInteger ringRangeEnd; 34 | 35 | public CassandraSplit(BigInteger ringRangeStart, BigInteger ringRangeEnd) { 36 | this.ringRangeStart = ringRangeStart; 37 | this.ringRangeEnd = ringRangeEnd; 38 | } 39 | 40 | public BigInteger getRingRangeStart() { 41 | return ringRangeStart; 42 | } 43 | 44 | public BigInteger getRingRangeEnd() { 45 | return ringRangeEnd; 46 | } 47 | 48 | @Override 49 | public String splitId() { 50 | return String.format("(%s,%s)", ringRangeStart.toString(), ringRangeEnd.toString()); 51 | } 52 | 53 | @Override 54 | public String toString() { 55 | return splitId(); 56 | } 57 | 58 | @Override 59 | public boolean equals(Object o) { 60 | if (this == o) { 61 | return true; 62 | } 63 | if (o == null || getClass() != o.getClass()) { 64 | return false; 65 | } 66 | CassandraSplit other = (CassandraSplit) o; 67 | return ringRangeStart.equals(other.ringRangeStart) 68 | && ringRangeEnd.equals(other.ringRangeEnd); 69 | } 70 | 71 | @Override 72 | public int hashCode() { 73 | return 31 * ringRangeStart.hashCode() + ringRangeEnd.hashCode(); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/connector/cassandra/source/split/CassandraSplitSerializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.connector.cassandra.source.split; 20 | 21 | import org.apache.flink.connector.cassandra.source.utils.BigIntegerSerializationUtils; 22 | import org.apache.flink.core.io.SimpleVersionedSerializer; 23 | 24 | import java.io.ByteArrayInputStream; 25 | import java.io.ByteArrayOutputStream; 26 | import java.io.IOException; 27 | import java.io.ObjectInputStream; 28 | import java.io.ObjectOutputStream; 29 | import java.math.BigInteger; 30 | 31 | /** Serializer for {@link CassandraSplit}. */ 32 | public class CassandraSplitSerializer implements SimpleVersionedSerializer { 33 | 34 | public static final CassandraSplitSerializer INSTANCE = new CassandraSplitSerializer(); 35 | 36 | public static final int CURRENT_VERSION = 0; 37 | 38 | private CassandraSplitSerializer() {} 39 | 40 | @Override 41 | public int getVersion() { 42 | return CURRENT_VERSION; 43 | } 44 | 45 | @Override 46 | public byte[] serialize(CassandraSplit cassandraSplit) throws IOException { 47 | final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); 48 | try (final ObjectOutputStream objectOutputStream = 49 | new ObjectOutputStream(byteArrayOutputStream)) { 50 | BigIntegerSerializationUtils.write( 51 | cassandraSplit.getRingRangeStart(), objectOutputStream); 52 | BigIntegerSerializationUtils.write( 53 | cassandraSplit.getRingRangeEnd(), objectOutputStream); 54 | } 55 | return byteArrayOutputStream.toByteArray(); 56 | } 57 | 58 | @Override 59 | public CassandraSplit deserialize(int version, byte[] serialized) throws IOException { 60 | try (final ByteArrayInputStream byteArrayInputStream = 61 | new ByteArrayInputStream(serialized); 62 | final ObjectInputStream objectInputStream = 63 | new ObjectInputStream(byteArrayInputStream)) { 64 | final BigInteger ringRangeStart = BigIntegerSerializationUtils.read(objectInputStream); 65 | final BigInteger ringRangeEnd = BigIntegerSerializationUtils.read(objectInputStream); 66 | return new CassandraSplit(ringRangeStart, ringRangeEnd); 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/connector/cassandra/source/utils/BigIntegerSerializationUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.connector.cassandra.source.utils; 20 | 21 | import java.io.DataInput; 22 | import java.io.DataOutput; 23 | import java.io.IOException; 24 | import java.math.BigInteger; 25 | 26 | /** Utils for BigInteger reading and writing in serde context. */ 27 | public class BigIntegerSerializationUtils { 28 | public static void write(BigInteger bigInteger, DataOutput output) throws IOException { 29 | final byte[] bigIntegerBytes = bigInteger.toByteArray(); 30 | output.writeInt(bigIntegerBytes.length); 31 | output.write(bigIntegerBytes); 32 | } 33 | 34 | public static BigInteger read(DataInput input) throws IOException { 35 | final int bigIntegerSize = input.readInt(); 36 | final byte[] bigIntegerBytes = new byte[bigIntegerSize]; 37 | input.readFully(bigIntegerBytes); 38 | return new BigInteger(bigIntegerBytes); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/streaming/connectors/cassandra/AbstractCassandraTupleSink.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.streaming.connectors.cassandra; 19 | 20 | import org.apache.flink.configuration.Configuration; 21 | 22 | import com.datastax.driver.core.BoundStatement; 23 | import com.datastax.driver.core.PreparedStatement; 24 | import com.datastax.driver.core.ResultSet; 25 | import com.google.common.util.concurrent.ListenableFuture; 26 | 27 | /** 28 | * Abstract sink to write tuple-like values into a Cassandra cluster. 29 | * 30 | * @param Type of the elements emitted by this sink 31 | */ 32 | public abstract class AbstractCassandraTupleSink extends CassandraSinkBase { 33 | private final String insertQuery; 34 | private transient PreparedStatement ps; 35 | private final boolean ignoreNullFields; 36 | 37 | public AbstractCassandraTupleSink( 38 | String insertQuery, 39 | ClusterBuilder builder, 40 | CassandraSinkBaseConfig config, 41 | CassandraFailureHandler failureHandler) { 42 | super(builder, config, failureHandler); 43 | this.insertQuery = insertQuery; 44 | this.ignoreNullFields = config.getIgnoreNullFields(); 45 | } 46 | 47 | @Override 48 | public void open(Configuration configuration) { 49 | super.open(configuration); 50 | this.ps = session.prepare(insertQuery); 51 | } 52 | 53 | @Override 54 | public ListenableFuture send(IN value) { 55 | Object[] fields = extract(value); 56 | return session.executeAsync(bind(fields)); 57 | } 58 | 59 | private BoundStatement bind(Object[] fields) { 60 | BoundStatement bs = ps.bind(fields); 61 | if (ignoreNullFields) { 62 | for (int i = 0; i < fields.length; i++) { 63 | if (fields[i] == null) { 64 | bs.unset(i); 65 | } 66 | } 67 | } 68 | return bs; 69 | } 70 | 71 | protected abstract Object[] extract(IN record); 72 | } 73 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/streaming/connectors/cassandra/CassandraAppendTableSink.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.streaming.connectors.cassandra; 19 | 20 | import org.apache.flink.api.common.typeinfo.TypeInformation; 21 | import org.apache.flink.api.java.typeutils.RowTypeInfo; 22 | import org.apache.flink.streaming.api.datastream.DataStream; 23 | import org.apache.flink.streaming.api.datastream.DataStreamSink; 24 | import org.apache.flink.table.api.TableException; 25 | import org.apache.flink.table.sinks.AppendStreamTableSink; 26 | import org.apache.flink.table.utils.TableConnectorUtils; 27 | import org.apache.flink.types.Row; 28 | import org.apache.flink.util.Preconditions; 29 | 30 | import java.util.Properties; 31 | 32 | /** An {@link AppendStreamTableSink} to write an append stream Table to a Cassandra table. */ 33 | public class CassandraAppendTableSink implements AppendStreamTableSink { 34 | 35 | private final ClusterBuilder builder; 36 | private final String cql; 37 | private String[] fieldNames; 38 | private TypeInformation[] fieldTypes; 39 | private final Properties properties; 40 | 41 | public CassandraAppendTableSink(ClusterBuilder builder, String cql) { 42 | this.builder = Preconditions.checkNotNull(builder, "ClusterBuilder must not be null."); 43 | this.cql = Preconditions.checkNotNull(cql, "CQL query must not be null."); 44 | this.properties = new Properties(); 45 | } 46 | 47 | public CassandraAppendTableSink(ClusterBuilder builder, String cql, Properties properties) { 48 | this.builder = Preconditions.checkNotNull(builder, "ClusterBuilder must not be null."); 49 | this.cql = Preconditions.checkNotNull(cql, "CQL query must not be null."); 50 | this.properties = Preconditions.checkNotNull(properties, "Properties must not be null."); 51 | } 52 | 53 | @Override 54 | public TypeInformation getOutputType() { 55 | return new RowTypeInfo(fieldTypes); 56 | } 57 | 58 | @Override 59 | public String[] getFieldNames() { 60 | return this.fieldNames; 61 | } 62 | 63 | @Override 64 | public TypeInformation[] getFieldTypes() { 65 | return this.fieldTypes; 66 | } 67 | 68 | @Override 69 | public CassandraAppendTableSink configure( 70 | String[] fieldNames, TypeInformation[] fieldTypes) { 71 | CassandraAppendTableSink cassandraTableSink = 72 | new CassandraAppendTableSink(this.builder, this.cql, this.properties); 73 | cassandraTableSink.fieldNames = 74 | Preconditions.checkNotNull(fieldNames, "Field names must not be null."); 75 | cassandraTableSink.fieldTypes = 76 | Preconditions.checkNotNull(fieldTypes, "Field types must not be null."); 77 | Preconditions.checkArgument( 78 | fieldNames.length == fieldTypes.length, 79 | "Number of provided field names and types does not match."); 80 | return cassandraTableSink; 81 | } 82 | 83 | @Override 84 | public DataStreamSink consumeDataStream(DataStream dataStream) { 85 | if (!(dataStream.getType() instanceof RowTypeInfo)) { 86 | throw new TableException( 87 | "No support for the type of the given DataStream: " + dataStream.getType()); 88 | } 89 | 90 | CassandraRowSink sink = 91 | new CassandraRowSink( 92 | dataStream.getType().getArity(), 93 | cql, 94 | builder, 95 | CassandraSinkBaseConfig.newBuilder().build(), 96 | new NoOpCassandraFailureHandler()); 97 | 98 | return dataStream 99 | .addSink(sink) 100 | .setParallelism(dataStream.getParallelism()) 101 | .name(TableConnectorUtils.generateRuntimeName(this.getClass(), fieldNames)); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/streaming/connectors/cassandra/CassandraCommitter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.streaming.connectors.cassandra; 20 | 21 | import org.apache.flink.api.common.ExecutionConfig; 22 | import org.apache.flink.api.java.ClosureCleaner; 23 | import org.apache.flink.streaming.runtime.operators.CheckpointCommitter; 24 | 25 | import com.datastax.driver.core.Cluster; 26 | import com.datastax.driver.core.Row; 27 | import com.datastax.driver.core.Session; 28 | 29 | import java.util.HashMap; 30 | import java.util.Iterator; 31 | import java.util.Map; 32 | 33 | /** 34 | * CheckpointCommitter that saves information about completed checkpoints within a separate table in 35 | * a cassandra database. 36 | * 37 | *

Entries are in the form |operator_id | subtask_id | last_completed_checkpoint| 38 | */ 39 | public class CassandraCommitter extends CheckpointCommitter { 40 | 41 | private static final long serialVersionUID = 1L; 42 | 43 | private final ClusterBuilder builder; 44 | private transient Cluster cluster; 45 | private transient Session session; 46 | 47 | private String keySpace = "flink_auxiliary"; 48 | private String table = "checkpoints_"; 49 | 50 | /** 51 | * A cache of the last committed checkpoint ids per subtask index. This is used to avoid 52 | * redundant round-trips to Cassandra (see {@link #isCheckpointCommitted(int, long)}. 53 | */ 54 | private final Map lastCommittedCheckpoints = new HashMap<>(); 55 | 56 | public CassandraCommitter(ClusterBuilder builder) { 57 | this.builder = builder; 58 | ClosureCleaner.clean(builder, ExecutionConfig.ClosureCleanerLevel.RECURSIVE, true); 59 | } 60 | 61 | public CassandraCommitter(ClusterBuilder builder, String keySpace) { 62 | this(builder); 63 | this.keySpace = keySpace; 64 | } 65 | 66 | /** Internally used to set the job ID after instantiation. */ 67 | public void setJobId(String id) throws Exception { 68 | super.setJobId(id); 69 | table += id; 70 | } 71 | 72 | /** 73 | * Generates the necessary tables to store information. 74 | * 75 | * @throws Exception 76 | */ 77 | @Override 78 | public void createResource() throws Exception { 79 | cluster = builder.getCluster(); 80 | session = cluster.connect(); 81 | 82 | session.execute( 83 | String.format( 84 | "CREATE KEYSPACE IF NOT EXISTS %s with replication={'class':'SimpleStrategy', 'replication_factor':1};", 85 | keySpace)); 86 | session.execute( 87 | String.format( 88 | "CREATE TABLE IF NOT EXISTS %s.%s (sink_id text, sub_id int, checkpoint_id bigint, PRIMARY KEY (sink_id, sub_id));", 89 | keySpace, table)); 90 | 91 | try { 92 | session.close(); 93 | } catch (Exception e) { 94 | LOG.error("Error while closing session.", e); 95 | } 96 | try { 97 | cluster.close(); 98 | } catch (Exception e) { 99 | LOG.error("Error while closing cluster.", e); 100 | } 101 | } 102 | 103 | @Override 104 | public void open() throws Exception { 105 | if (builder == null) { 106 | throw new RuntimeException("No ClusterBuilder was set."); 107 | } 108 | cluster = builder.getCluster(); 109 | session = cluster.connect(); 110 | } 111 | 112 | @Override 113 | public void close() throws Exception { 114 | this.lastCommittedCheckpoints.clear(); 115 | try { 116 | session.close(); 117 | } catch (Exception e) { 118 | LOG.error("Error while closing session.", e); 119 | } 120 | try { 121 | cluster.close(); 122 | } catch (Exception e) { 123 | LOG.error("Error while closing cluster.", e); 124 | } 125 | } 126 | 127 | @Override 128 | public void commitCheckpoint(int subtaskIdx, long checkpointId) { 129 | String statement = 130 | String.format( 131 | "UPDATE %s.%s set checkpoint_id=%d where sink_id='%s' and sub_id=%d;", 132 | keySpace, table, checkpointId, operatorId, subtaskIdx); 133 | 134 | session.execute(statement); 135 | lastCommittedCheckpoints.put(subtaskIdx, checkpointId); 136 | } 137 | 138 | @Override 139 | public boolean isCheckpointCommitted(int subtaskIdx, long checkpointId) { 140 | // Pending checkpointed buffers are committed in ascending order of their 141 | // checkpoint id. This way we can tell if a checkpointed buffer was committed 142 | // just by asking the third-party storage system for the last checkpoint id 143 | // committed by the specified subtask. 144 | 145 | Long lastCommittedCheckpoint = lastCommittedCheckpoints.get(subtaskIdx); 146 | if (lastCommittedCheckpoint == null) { 147 | String statement = 148 | String.format( 149 | "SELECT checkpoint_id FROM %s.%s where sink_id='%s' and sub_id=%d;", 150 | keySpace, table, operatorId, subtaskIdx); 151 | 152 | Iterator resultIt = session.execute(statement).iterator(); 153 | if (resultIt.hasNext()) { 154 | lastCommittedCheckpoint = resultIt.next().getLong("checkpoint_id"); 155 | lastCommittedCheckpoints.put(subtaskIdx, lastCommittedCheckpoint); 156 | } 157 | } 158 | return lastCommittedCheckpoint != null && checkpointId <= lastCommittedCheckpoint; 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/streaming/connectors/cassandra/CassandraFailureHandler.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.streaming.connectors.cassandra; 19 | 20 | import org.apache.flink.annotation.PublicEvolving; 21 | 22 | import java.io.IOException; 23 | import java.io.Serializable; 24 | 25 | /** 26 | * An implementation of {@link CassandraFailureHandler} is provided by the user to define how {@link 27 | * Throwable Throwable} should be handled, e.g. dropping them if the failure is only temporary. 28 | * 29 | *

Example: 30 | * 31 | *

{@code
32 |  * 	private static class ExampleFailureHandler implements CassandraFailureHandler {
33 |  *
34 |  * 		@Override
35 |  * 		void onFailure(Throwable failure) throws IOException {
36 |  * 			if (ExceptionUtils.findThrowable(failure, WriteTimeoutException.class).isPresent()) {
37 |  * 				// drop exception
38 |  * 			} else {
39 |  * 				// for all other failures, fail the sink;
40 |  * 				// here the failure is simply rethrown, but users can also choose to throw custom exceptions
41 |  * 				throw failure;
42 |  * 			}
43 |  * 		}
44 |  * 	}
45 |  *
46 |  * }
47 | * 48 | *

The above example will let the sink ignore the WriteTimeoutException, without failing the 49 | * sink. For all other failures, the sink will fail. 50 | */ 51 | @PublicEvolving 52 | public interface CassandraFailureHandler extends Serializable { 53 | 54 | /** 55 | * Handle a failed {@link Throwable}. 56 | * 57 | * @param failure the cause of failure 58 | * @throws IOException if the sink should fail on this failure, the implementation should 59 | * rethrow the throwable or a custom one 60 | */ 61 | void onFailure(Throwable failure) throws IOException; 62 | } 63 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/streaming/connectors/cassandra/CassandraPojoSink.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.streaming.connectors.cassandra; 19 | 20 | import org.apache.flink.configuration.Configuration; 21 | 22 | import com.datastax.driver.core.ResultSet; 23 | import com.datastax.driver.core.Session; 24 | import com.datastax.driver.mapping.Mapper; 25 | import com.datastax.driver.mapping.MappingManager; 26 | import com.google.common.util.concurrent.ListenableFuture; 27 | 28 | import javax.annotation.Nullable; 29 | 30 | /** 31 | * Flink Sink to save data into a Cassandra cluster using Mapper, 33 | * which it uses annotations from 35 | * com.datastax.driver.mapping.annotations. Please read the recommendations in {@linkplain 36 | * CassandraSinkBase}. 37 | * 38 | * @param Type of the elements emitted by this sink 39 | */ 40 | public class CassandraPojoSink extends CassandraSinkBase { 41 | 42 | private static final long serialVersionUID = 1L; 43 | 44 | protected final Class clazz; 45 | private final MapperOptions options; 46 | private final String keyspace; 47 | protected transient Mapper mapper; 48 | protected transient MappingManager mappingManager; 49 | 50 | /** 51 | * The main constructor for creating CassandraPojoSink. 52 | * 53 | * @param clazz Class instance 54 | */ 55 | public CassandraPojoSink(Class clazz, ClusterBuilder builder) { 56 | this(clazz, builder, null, null); 57 | } 58 | 59 | public CassandraPojoSink( 60 | Class clazz, ClusterBuilder builder, @Nullable MapperOptions options) { 61 | this(clazz, builder, options, null); 62 | } 63 | 64 | public CassandraPojoSink(Class clazz, ClusterBuilder builder, String keyspace) { 65 | this(clazz, builder, null, keyspace); 66 | } 67 | 68 | public CassandraPojoSink( 69 | Class clazz, 70 | ClusterBuilder builder, 71 | @Nullable MapperOptions options, 72 | String keyspace) { 73 | this(clazz, builder, options, keyspace, CassandraSinkBaseConfig.newBuilder().build()); 74 | } 75 | 76 | CassandraPojoSink( 77 | Class clazz, 78 | ClusterBuilder builder, 79 | @Nullable MapperOptions options, 80 | String keyspace, 81 | CassandraSinkBaseConfig config) { 82 | this(clazz, builder, options, keyspace, config, new NoOpCassandraFailureHandler()); 83 | } 84 | 85 | CassandraPojoSink( 86 | Class clazz, 87 | ClusterBuilder builder, 88 | @Nullable MapperOptions options, 89 | String keyspace, 90 | CassandraSinkBaseConfig config, 91 | CassandraFailureHandler failureHandler) { 92 | super(builder, config, failureHandler); 93 | this.clazz = clazz; 94 | this.options = options; 95 | this.keyspace = keyspace; 96 | } 97 | 98 | @Override 99 | public void open(Configuration configuration) { 100 | super.open(configuration); 101 | try { 102 | this.mappingManager = new MappingManager(session); 103 | this.mapper = mappingManager.mapper(clazz); 104 | if (options != null) { 105 | Mapper.Option[] optionsArray = options.getMapperOptions(); 106 | if (optionsArray != null) { 107 | this.mapper.setDefaultSaveOptions(optionsArray); 108 | } 109 | } 110 | } catch (Exception e) { 111 | throw new RuntimeException( 112 | "Cannot create CassandraPojoSink with input: " + clazz.getSimpleName(), e); 113 | } 114 | } 115 | 116 | @Override 117 | protected Session createSession() { 118 | return cluster.connect(keyspace); 119 | } 120 | 121 | @Override 122 | public ListenableFuture send(IN value) { 123 | return session.executeAsync(mapper.saveQuery(value)); 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/streaming/connectors/cassandra/CassandraRowSink.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.streaming.connectors.cassandra; 19 | 20 | import org.apache.flink.types.Row; 21 | 22 | /** 23 | * A SinkFunction to write Row records into a Cassandra table. Please read the recommendations in 24 | * {@linkplain CassandraSinkBase}. 25 | */ 26 | public class CassandraRowSink extends AbstractCassandraTupleSink { 27 | 28 | private final int rowArity; 29 | 30 | public CassandraRowSink(int rowArity, String insertQuery, ClusterBuilder builder) { 31 | this(rowArity, insertQuery, builder, CassandraSinkBaseConfig.newBuilder().build()); 32 | } 33 | 34 | CassandraRowSink( 35 | int rowArity, 36 | String insertQuery, 37 | ClusterBuilder builder, 38 | CassandraSinkBaseConfig config) { 39 | this(rowArity, insertQuery, builder, config, new NoOpCassandraFailureHandler()); 40 | } 41 | 42 | CassandraRowSink( 43 | int rowArity, 44 | String insertQuery, 45 | ClusterBuilder builder, 46 | CassandraSinkBaseConfig config, 47 | CassandraFailureHandler failureHandler) { 48 | super(insertQuery, builder, config, failureHandler); 49 | this.rowArity = rowArity; 50 | } 51 | 52 | @Override 53 | protected Object[] extract(Row record) { 54 | Object[] al = new Object[rowArity]; 55 | for (int i = 0; i < rowArity; i++) { 56 | al[i] = record.getField(i); 57 | } 58 | return al; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/streaming/connectors/cassandra/CassandraRowWriteAheadSink.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.streaming.connectors.cassandra; 20 | 21 | import org.apache.flink.api.common.ExecutionConfig; 22 | import org.apache.flink.api.common.typeutils.TypeSerializer; 23 | import org.apache.flink.api.java.ClosureCleaner; 24 | import org.apache.flink.api.java.typeutils.runtime.RowSerializer; 25 | import org.apache.flink.streaming.runtime.operators.CheckpointCommitter; 26 | import org.apache.flink.streaming.runtime.operators.GenericWriteAheadSink; 27 | import org.apache.flink.types.Row; 28 | 29 | import com.datastax.driver.core.BoundStatement; 30 | import com.datastax.driver.core.Cluster; 31 | import com.datastax.driver.core.PreparedStatement; 32 | import com.datastax.driver.core.ResultSet; 33 | import com.datastax.driver.core.ResultSetFuture; 34 | import com.datastax.driver.core.Session; 35 | import com.google.common.util.concurrent.FutureCallback; 36 | import com.google.common.util.concurrent.Futures; 37 | 38 | import java.util.UUID; 39 | import java.util.concurrent.atomic.AtomicInteger; 40 | import java.util.concurrent.atomic.AtomicReference; 41 | 42 | /** 43 | * Sink that emits its input elements into a Cassandra table. This sink stores incoming records 44 | * within a {@link org.apache.flink.runtime.state.AbstractStateBackend}, and only commits them to 45 | * Cassandra if a checkpoint is completed. 46 | */ 47 | public class CassandraRowWriteAheadSink extends GenericWriteAheadSink { 48 | private static final long serialVersionUID = 1L; 49 | 50 | protected transient Cluster cluster; 51 | protected transient Session session; 52 | 53 | private final String insertQuery; 54 | private transient PreparedStatement preparedStatement; 55 | 56 | private ClusterBuilder builder; 57 | 58 | private int arity; 59 | private transient Object[] fields; 60 | 61 | protected CassandraRowWriteAheadSink( 62 | String insertQuery, 63 | TypeSerializer serializer, 64 | ClusterBuilder builder, 65 | CheckpointCommitter committer) 66 | throws Exception { 67 | super(committer, serializer, UUID.randomUUID().toString().replace("-", "_")); 68 | this.insertQuery = insertQuery; 69 | this.builder = builder; 70 | ClosureCleaner.clean(builder, ExecutionConfig.ClosureCleanerLevel.RECURSIVE, true); 71 | } 72 | 73 | public void open() throws Exception { 74 | super.open(); 75 | if (!getRuntimeContext().isCheckpointingEnabled()) { 76 | throw new IllegalStateException( 77 | "The write-ahead log requires checkpointing to be enabled."); 78 | } 79 | cluster = builder.getCluster(); 80 | session = cluster.connect(); 81 | preparedStatement = session.prepare(insertQuery); 82 | 83 | arity = ((RowSerializer) serializer).getArity(); 84 | fields = new Object[arity]; 85 | } 86 | 87 | @Override 88 | public void close() throws Exception { 89 | super.close(); 90 | try { 91 | if (session != null) { 92 | session.close(); 93 | } 94 | } catch (Exception e) { 95 | LOG.error("Error while closing session.", e); 96 | } 97 | try { 98 | if (cluster != null) { 99 | cluster.close(); 100 | } 101 | } catch (Exception e) { 102 | LOG.error("Error while closing cluster.", e); 103 | } 104 | } 105 | 106 | @Override 107 | protected boolean sendValues(Iterable values, long checkpointId, long timestamp) 108 | throws Exception { 109 | final AtomicInteger updatesCount = new AtomicInteger(0); 110 | final AtomicInteger updatesConfirmed = new AtomicInteger(0); 111 | 112 | final AtomicReference exception = new AtomicReference<>(); 113 | 114 | FutureCallback callback = 115 | new FutureCallback() { 116 | @Override 117 | public void onSuccess(ResultSet resultSet) { 118 | updatesConfirmed.incrementAndGet(); 119 | if (updatesCount.get() > 0) { // only set if all updates have been sent 120 | if (updatesCount.get() == updatesConfirmed.get()) { 121 | synchronized (updatesConfirmed) { 122 | updatesConfirmed.notifyAll(); 123 | } 124 | } 125 | } 126 | } 127 | 128 | @Override 129 | public void onFailure(Throwable throwable) { 130 | if (exception.compareAndSet(null, throwable)) { 131 | LOG.error("Error while sending value.", throwable); 132 | synchronized (updatesConfirmed) { 133 | updatesConfirmed.notifyAll(); 134 | } 135 | } 136 | } 137 | }; 138 | 139 | // set values for prepared statement 140 | int updatesSent = 0; 141 | for (Row value : values) { 142 | for (int x = 0; x < arity; x++) { 143 | fields[x] = value.getField(x); 144 | } 145 | // insert values and send to cassandra 146 | BoundStatement s = preparedStatement.bind(fields); 147 | s.setDefaultTimestamp(timestamp); 148 | ResultSetFuture result = session.executeAsync(s); 149 | updatesSent++; 150 | if (result != null) { 151 | // add callback to detect errors 152 | Futures.addCallback(result, callback); 153 | } 154 | } 155 | updatesCount.set(updatesSent); 156 | 157 | synchronized (updatesConfirmed) { 158 | while (exception.get() == null && updatesSent != updatesConfirmed.get()) { 159 | updatesConfirmed.wait(); 160 | } 161 | } 162 | 163 | if (exception.get() != null) { 164 | LOG.warn("Sending a value failed.", exception.get()); 165 | return false; 166 | } else { 167 | return true; 168 | } 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/streaming/connectors/cassandra/CassandraScalaProductSink.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.streaming.connectors.cassandra; 20 | 21 | import scala.Product; 22 | 23 | /** 24 | * Sink to write scala tuples and case classes into a Cassandra cluster. Please read the 25 | * recommendations in {@linkplain CassandraSinkBase}. 26 | * 27 | * @param Type of the elements emitted by this sink, it must extend {@link Product} 28 | */ 29 | public class CassandraScalaProductSink extends AbstractCassandraTupleSink { 30 | public CassandraScalaProductSink(String insertQuery, ClusterBuilder builder) { 31 | this(insertQuery, builder, CassandraSinkBaseConfig.newBuilder().build()); 32 | } 33 | 34 | CassandraScalaProductSink( 35 | String insertQuery, ClusterBuilder builder, CassandraSinkBaseConfig config) { 36 | this(insertQuery, builder, config, new NoOpCassandraFailureHandler()); 37 | } 38 | 39 | CassandraScalaProductSink( 40 | String insertQuery, 41 | ClusterBuilder builder, 42 | CassandraSinkBaseConfig config, 43 | CassandraFailureHandler failureHandler) { 44 | super(insertQuery, builder, config, failureHandler); 45 | } 46 | 47 | @Override 48 | protected Object[] extract(IN record) { 49 | Object[] al = new Object[record.productArity()]; 50 | for (int i = 0; i < record.productArity(); i++) { 51 | al[i] = record.productElement(i); 52 | } 53 | return al; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/streaming/connectors/cassandra/CassandraSinkBaseConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.streaming.connectors.cassandra; 19 | 20 | import org.apache.flink.util.Preconditions; 21 | 22 | import java.io.Serializable; 23 | import java.time.Duration; 24 | 25 | /** Configuration for {@link CassandraSinkBase}. */ 26 | public final class CassandraSinkBaseConfig implements Serializable { 27 | // ------------------------ Default Configurations ------------------------ 28 | 29 | /** The default maximum number of concurrent requests. By default, {@code Integer.MAX_VALUE}. */ 30 | public static final int DEFAULT_MAX_CONCURRENT_REQUESTS = Integer.MAX_VALUE; 31 | 32 | /** 33 | * The default timeout duration when acquiring a permit to execute. By default, {@code 34 | * Long.MAX_VALUE}. 35 | */ 36 | public static final Duration DEFAULT_MAX_CONCURRENT_REQUESTS_TIMEOUT = 37 | Duration.ofMillis(Long.MAX_VALUE); 38 | 39 | /** The default option to ignore null fields on insertion. By default, {@code false}. */ 40 | public static final boolean DEFAULT_IGNORE_NULL_FIELDS = false; 41 | 42 | // ------------------------- Configuration Fields ------------------------- 43 | 44 | /** Maximum number of concurrent requests allowed. */ 45 | private final int maxConcurrentRequests; 46 | 47 | /** Timeout duration when acquiring a permit to execute. */ 48 | private final Duration maxConcurrentRequestsTimeout; 49 | 50 | /** Whether to ignore null fields on insert. */ 51 | private final boolean ignoreNullFields; 52 | 53 | private CassandraSinkBaseConfig( 54 | int maxConcurrentRequests, 55 | Duration maxConcurrentRequestsTimeout, 56 | boolean ignoreNullFields) { 57 | Preconditions.checkArgument( 58 | maxConcurrentRequests > 0, "Max concurrent requests is expected to be positive"); 59 | Preconditions.checkNotNull( 60 | maxConcurrentRequestsTimeout, "Max concurrent requests timeout cannot be null"); 61 | Preconditions.checkArgument( 62 | !maxConcurrentRequestsTimeout.isNegative(), 63 | "Max concurrent requests timeout is expected to be positive"); 64 | this.maxConcurrentRequests = maxConcurrentRequests; 65 | this.maxConcurrentRequestsTimeout = maxConcurrentRequestsTimeout; 66 | this.ignoreNullFields = ignoreNullFields; 67 | } 68 | 69 | public int getMaxConcurrentRequests() { 70 | return maxConcurrentRequests; 71 | } 72 | 73 | public Duration getMaxConcurrentRequestsTimeout() { 74 | return maxConcurrentRequestsTimeout; 75 | } 76 | 77 | public boolean getIgnoreNullFields() { 78 | return ignoreNullFields; 79 | } 80 | 81 | @Override 82 | public String toString() { 83 | return "CassandraSinkBaseConfig{" 84 | + "maxConcurrentRequests=" 85 | + maxConcurrentRequests 86 | + ", maxConcurrentRequestsTimeout=" 87 | + maxConcurrentRequestsTimeout 88 | + ", ignoreNullFields=" 89 | + ignoreNullFields 90 | + '}'; 91 | } 92 | 93 | public static Builder newBuilder() { 94 | return new Builder(); 95 | } 96 | 97 | /** Builder for the {@link CassandraSinkBaseConfig}. */ 98 | public static class Builder { 99 | private int maxConcurrentRequests = DEFAULT_MAX_CONCURRENT_REQUESTS; 100 | private Duration maxConcurrentRequestsTimeout = DEFAULT_MAX_CONCURRENT_REQUESTS_TIMEOUT; 101 | private boolean ignoreNullFields = DEFAULT_IGNORE_NULL_FIELDS; 102 | 103 | Builder() {} 104 | 105 | public Builder setMaxConcurrentRequests(int maxConcurrentRequests) { 106 | this.maxConcurrentRequests = maxConcurrentRequests; 107 | return this; 108 | } 109 | 110 | public Builder setMaxConcurrentRequestsTimeout(Duration timeout) { 111 | this.maxConcurrentRequestsTimeout = timeout; 112 | return this; 113 | } 114 | 115 | public Builder setIgnoreNullFields(boolean ignoreNullFields) { 116 | this.ignoreNullFields = ignoreNullFields; 117 | return this; 118 | } 119 | 120 | public CassandraSinkBaseConfig build() { 121 | return new CassandraSinkBaseConfig( 122 | maxConcurrentRequests, maxConcurrentRequestsTimeout, ignoreNullFields); 123 | } 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/streaming/connectors/cassandra/CassandraTupleSink.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.streaming.connectors.cassandra; 19 | 20 | import org.apache.flink.api.java.tuple.Tuple; 21 | 22 | /** 23 | * Sink to write Flink {@link Tuple}s into a Cassandra cluster. Please read the recommendations in 24 | * {@linkplain CassandraSinkBase}. 25 | * 26 | * @param Type of the elements emitted by this sink, it must extend {@link Tuple} 27 | */ 28 | public class CassandraTupleSink extends AbstractCassandraTupleSink { 29 | public CassandraTupleSink(String insertQuery, ClusterBuilder builder) { 30 | this(insertQuery, builder, CassandraSinkBaseConfig.newBuilder().build()); 31 | } 32 | 33 | CassandraTupleSink(String insertQuery, ClusterBuilder builder, CassandraSinkBaseConfig config) { 34 | this(insertQuery, builder, config, new NoOpCassandraFailureHandler()); 35 | } 36 | 37 | CassandraTupleSink( 38 | String insertQuery, 39 | ClusterBuilder builder, 40 | CassandraSinkBaseConfig config, 41 | CassandraFailureHandler failureHandler) { 42 | super(insertQuery, builder, config, failureHandler); 43 | } 44 | 45 | @Override 46 | protected Object[] extract(IN record) { 47 | Object[] al = new Object[record.getArity()]; 48 | for (int i = 0; i < record.getArity(); i++) { 49 | al[i] = record.getField(i); 50 | } 51 | return al; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/streaming/connectors/cassandra/CassandraTupleWriteAheadSink.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.streaming.connectors.cassandra; 20 | 21 | import org.apache.flink.api.common.ExecutionConfig; 22 | import org.apache.flink.api.common.typeutils.TypeSerializer; 23 | import org.apache.flink.api.java.ClosureCleaner; 24 | import org.apache.flink.api.java.tuple.Tuple; 25 | import org.apache.flink.api.java.typeutils.runtime.TupleSerializer; 26 | import org.apache.flink.streaming.runtime.operators.CheckpointCommitter; 27 | import org.apache.flink.streaming.runtime.operators.GenericWriteAheadSink; 28 | 29 | import com.datastax.driver.core.BoundStatement; 30 | import com.datastax.driver.core.Cluster; 31 | import com.datastax.driver.core.PreparedStatement; 32 | import com.datastax.driver.core.ResultSet; 33 | import com.datastax.driver.core.ResultSetFuture; 34 | import com.datastax.driver.core.Session; 35 | import com.google.common.util.concurrent.FutureCallback; 36 | import com.google.common.util.concurrent.Futures; 37 | 38 | import java.util.UUID; 39 | import java.util.concurrent.atomic.AtomicInteger; 40 | import java.util.concurrent.atomic.AtomicReference; 41 | 42 | /** 43 | * Sink that emits its input elements into a Cassandra database. This sink stores incoming records 44 | * within a {@link org.apache.flink.runtime.state.AbstractStateBackend}, and only commits them to 45 | * cassandra if a checkpoint is completed. 46 | * 47 | * @param Type of the elements emitted by this sink 48 | */ 49 | public class CassandraTupleWriteAheadSink extends GenericWriteAheadSink { 50 | private static final long serialVersionUID = 1L; 51 | 52 | protected transient Cluster cluster; 53 | protected transient Session session; 54 | 55 | private final String insertQuery; 56 | private transient PreparedStatement preparedStatement; 57 | 58 | private ClusterBuilder builder; 59 | 60 | private transient Object[] fields; 61 | 62 | protected CassandraTupleWriteAheadSink( 63 | String insertQuery, 64 | TypeSerializer serializer, 65 | ClusterBuilder builder, 66 | CheckpointCommitter committer) 67 | throws Exception { 68 | super(committer, serializer, UUID.randomUUID().toString().replace("-", "_")); 69 | this.insertQuery = insertQuery; 70 | this.builder = builder; 71 | ClosureCleaner.clean(builder, ExecutionConfig.ClosureCleanerLevel.RECURSIVE, true); 72 | } 73 | 74 | public void open() throws Exception { 75 | super.open(); 76 | if (!getRuntimeContext().isCheckpointingEnabled()) { 77 | throw new IllegalStateException( 78 | "The write-ahead log requires checkpointing to be enabled."); 79 | } 80 | cluster = builder.getCluster(); 81 | session = cluster.connect(); 82 | preparedStatement = session.prepare(insertQuery); 83 | 84 | fields = new Object[((TupleSerializer) serializer).getArity()]; 85 | } 86 | 87 | @Override 88 | public void close() throws Exception { 89 | super.close(); 90 | try { 91 | if (session != null) { 92 | session.close(); 93 | } 94 | } catch (Exception e) { 95 | LOG.error("Error while closing session.", e); 96 | } 97 | try { 98 | if (cluster != null) { 99 | cluster.close(); 100 | } 101 | } catch (Exception e) { 102 | LOG.error("Error while closing cluster.", e); 103 | } 104 | } 105 | 106 | @Override 107 | protected boolean sendValues(Iterable values, long checkpointId, long timestamp) 108 | throws Exception { 109 | final AtomicInteger updatesCount = new AtomicInteger(0); 110 | final AtomicInteger updatesConfirmed = new AtomicInteger(0); 111 | 112 | final AtomicReference exception = new AtomicReference<>(); 113 | 114 | FutureCallback callback = 115 | new FutureCallback() { 116 | @Override 117 | public void onSuccess(ResultSet resultSet) { 118 | updatesConfirmed.incrementAndGet(); 119 | if (updatesCount.get() > 0) { // only set if all updates have been sent 120 | if (updatesCount.get() == updatesConfirmed.get()) { 121 | synchronized (updatesConfirmed) { 122 | updatesConfirmed.notifyAll(); 123 | } 124 | } 125 | } 126 | } 127 | 128 | @Override 129 | public void onFailure(Throwable throwable) { 130 | if (exception.compareAndSet(null, throwable)) { 131 | LOG.error("Error while sending value.", throwable); 132 | synchronized (updatesConfirmed) { 133 | updatesConfirmed.notifyAll(); 134 | } 135 | } 136 | } 137 | }; 138 | 139 | // set values for prepared statement 140 | int updatesSent = 0; 141 | for (IN value : values) { 142 | for (int x = 0; x < value.getArity(); x++) { 143 | fields[x] = value.getField(x); 144 | } 145 | // insert values and send to cassandra 146 | BoundStatement s = preparedStatement.bind(fields); 147 | s.setDefaultTimestamp(timestamp); 148 | ResultSetFuture result = session.executeAsync(s); 149 | updatesSent++; 150 | if (result != null) { 151 | // add callback to detect errors 152 | Futures.addCallback(result, callback); 153 | } 154 | } 155 | updatesCount.set(updatesSent); 156 | 157 | synchronized (updatesConfirmed) { 158 | while (exception.get() == null && updatesSent != updatesConfirmed.get()) { 159 | updatesConfirmed.wait(); 160 | } 161 | } 162 | 163 | if (exception.get() != null) { 164 | LOG.warn("Sending a value failed.", exception.get()); 165 | return false; 166 | } else { 167 | return true; 168 | } 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/streaming/connectors/cassandra/ClusterBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.streaming.connectors.cassandra; 20 | 21 | import com.datastax.driver.core.Cluster; 22 | 23 | import java.io.Serializable; 24 | 25 | /** 26 | * This class is used to configure a {@link com.datastax.driver.core.Cluster} after deployment. The 27 | * cluster represents the connection that will be established to Cassandra. Cassandra driver metrics 28 | * are not integrated with Flink metrics, so they are disabled. 29 | */ 30 | public abstract class ClusterBuilder implements Serializable { 31 | 32 | public Cluster getCluster() { 33 | return buildCluster(Cluster.builder().withoutMetrics()); 34 | } 35 | 36 | /** 37 | * Configures the connection to Cassandra. The configuration is done by calling methods on the 38 | * builder object and finalizing the configuration with build(). 39 | * 40 | * @param builder connection builder 41 | * @return configured connection 42 | */ 43 | protected abstract Cluster buildCluster(Cluster.Builder builder); 44 | } 45 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/streaming/connectors/cassandra/MapperOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.streaming.connectors.cassandra; 20 | 21 | import com.datastax.driver.mapping.Mapper; 22 | 23 | import java.io.Serializable; 24 | 25 | /** 26 | * This class is used to configure a {@link com.datastax.driver.mapping.Mapper} after deployment. 27 | */ 28 | public interface MapperOptions extends Serializable { 29 | 30 | /** 31 | * Returns an array of {@link com.datastax.driver.mapping.Mapper.Option} that are used configure 32 | * the mapper. 33 | * 34 | * @return array of options used to configure the mapper. 35 | */ 36 | Mapper.Option[] getMapperOptions(); 37 | } 38 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/streaming/connectors/cassandra/NoOpCassandraFailureHandler.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.streaming.connectors.cassandra; 19 | 20 | import org.apache.flink.annotation.Internal; 21 | 22 | import java.io.IOException; 23 | 24 | /** A {@link CassandraFailureHandler} that simply fails the sink on any failures. */ 25 | @Internal 26 | public class NoOpCassandraFailureHandler implements CassandraFailureHandler { 27 | 28 | private static final long serialVersionUID = 737941343410827885L; 29 | 30 | @Override 31 | public void onFailure(Throwable failure) throws IOException { 32 | // simply fail the sink 33 | throw new IOException("Error while sending value.", failure); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/java/org/apache/flink/streaming/connectors/cassandra/SimpleMapperOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.streaming.connectors.cassandra; 20 | 21 | import com.datastax.driver.core.ConsistencyLevel; 22 | import com.datastax.driver.core.ProtocolVersion; 23 | import com.datastax.driver.mapping.Mapper; 24 | 25 | import java.util.ArrayList; 26 | 27 | /** A simple MapperOptions implementation. */ 28 | public class SimpleMapperOptions implements MapperOptions { 29 | 30 | private static final long serialVersionUID = 1L; 31 | 32 | private final ArrayList options; 33 | 34 | public SimpleMapperOptions() { 35 | options = new ArrayList<>(); 36 | } 37 | 38 | /** 39 | * Adds time-to-live option to a mapper operation. This is only valid for save operations. 40 | * 41 | *

Note that this option is only available if using {@link ProtocolVersion#V2} or above. 42 | * 43 | * @param ttl the TTL (in seconds). 44 | */ 45 | public SimpleMapperOptions ttl(int ttl) { 46 | options.add(Mapper.Option.ttl(ttl)); 47 | return this; 48 | } 49 | 50 | /** 51 | * Adds a timestamp option to a mapper operation. This is only valid for save and delete 52 | * operations. 53 | * 54 | *

Note that this option is only available if using {@link ProtocolVersion#V2} or above. 55 | * 56 | * @param timestamp the timestamp (in microseconds). 57 | */ 58 | public SimpleMapperOptions timestamp(long timestamp) { 59 | options.add(Mapper.Option.timestamp(timestamp)); 60 | return this; 61 | } 62 | 63 | /** 64 | * Adds a consistency level value option to a mapper operation. This is valid for save, delete 65 | * and get operations. 66 | * 67 | *

Note that the consistency level can also be defined at the mapper level, as a parameter of 68 | * the {@link com.datastax.driver.mapping.annotations.Table} annotation (this is redundant for 69 | * backward compatibility). This option, whether defined on a specific call or as the default, 70 | * will always take precedence over the annotation. 71 | * 72 | * @param cl the {@link com.datastax.driver.core.ConsistencyLevel} to use for the operation. 73 | */ 74 | public SimpleMapperOptions consistencyLevel(ConsistencyLevel cl) { 75 | options.add(Mapper.Option.consistencyLevel(cl)); 76 | return this; 77 | } 78 | 79 | /** 80 | * Enables query tracing for a mapper operation. This is valid for save, delete and get 81 | * operations. 82 | * 83 | * @param enabled whether to enable tracing. 84 | */ 85 | public SimpleMapperOptions tracing(boolean enabled) { 86 | options.add(Mapper.Option.tracing(enabled)); 87 | return this; 88 | } 89 | 90 | /** 91 | * Specifies whether null entity fields should be included in insert queries. This option is 92 | * valid only for save operations. 93 | * 94 | *

If this option is not specified, it defaults to {@code true} (null fields are saved). 95 | * 96 | * @param enabled whether to include null fields in queries. 97 | */ 98 | public SimpleMapperOptions saveNullFields(boolean enabled) { 99 | options.add(Mapper.Option.saveNullFields(enabled)); 100 | return this; 101 | } 102 | 103 | /** 104 | * Specifies whether an IF NOT EXISTS clause should be included in insert queries. This option 105 | * is valid only for save operations. 106 | * 107 | *

If this option is not specified, it defaults to {@code false} (IF NOT EXISTS statements 108 | * are not used). 109 | * 110 | * @param enabled whether to include an IF NOT EXISTS clause in queries. 111 | */ 112 | public SimpleMapperOptions ifNotExists(boolean enabled) { 113 | options.add(Mapper.Option.ifNotExists(enabled)); 114 | return this; 115 | } 116 | 117 | @Override 118 | public Mapper.Option[] getMapperOptions() { 119 | return options.toArray(new Mapper.Option[0]); 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/main/resources/META-INF/NOTICE: -------------------------------------------------------------------------------- 1 | flink-connector-cassandra 2 | Copyright 2014-2024 The Apache Software Foundation 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | 7 | This project bundles the following dependencies under the Apache Software License 2.0. (http://www.apache.org/licenses/LICENSE-2.0.txt) 8 | 9 | - com.datastax.cassandra:cassandra-driver-core:shaded:3.11.2 10 | - com.datastax.cassandra:cassandra-driver-mapping:3.11.2 11 | - com.google.guava:guava:19.0 12 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/java/org/apache/flink/architecture/ProductionCodeArchitectureTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.architecture; 20 | 21 | import org.apache.flink.architecture.common.ImportOptions; 22 | 23 | import com.tngtech.archunit.core.importer.ImportOption; 24 | import com.tngtech.archunit.junit.AnalyzeClasses; 25 | import com.tngtech.archunit.junit.ArchTest; 26 | import com.tngtech.archunit.junit.ArchTests; 27 | 28 | /** product code Architecture tests. */ 29 | @AnalyzeClasses( 30 | packages = "org.apache.flink.connector", 31 | importOptions = { 32 | ImportOption.DoNotIncludeTests.class, 33 | ImportOption.DoNotIncludeArchives.class, 34 | ImportOptions.ExcludeScalaImportOption.class, 35 | ImportOptions.ExcludeShadedImportOption.class 36 | }) 37 | public class ProductionCodeArchitectureTest { 38 | 39 | @ArchTest 40 | public static final ArchTests COMMON_TESTS = ArchTests.in(ProductionCodeArchitectureBase.class); 41 | } 42 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/java/org/apache/flink/architecture/TestCodeArchitectureTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.architecture; 20 | 21 | import org.apache.flink.architecture.common.ImportOptions; 22 | 23 | import com.tngtech.archunit.core.importer.ImportOption; 24 | import com.tngtech.archunit.junit.AnalyzeClasses; 25 | import com.tngtech.archunit.junit.ArchTest; 26 | import com.tngtech.archunit.junit.ArchTests; 27 | 28 | /** Architecture tests for test code. */ 29 | @AnalyzeClasses( 30 | packages = { 31 | "org.apache.flink.batch.connectors.cassandra", 32 | "org.apache.flink.streaming.connectors.cassandra", 33 | "org.apache.flink.connector.cassandra", 34 | "org.apache.flink.connectors.cassandra" 35 | }, 36 | importOptions = { 37 | ImportOption.OnlyIncludeTests.class, 38 | ImportOptions.ExcludeScalaImportOption.class, 39 | ImportOptions.ExcludeShadedImportOption.class 40 | }) 41 | public class TestCodeArchitectureTest { 42 | 43 | @ArchTest 44 | public static final ArchTests COMMON_TESTS = ArchTests.in(TestCodeArchitectureTestBase.class); 45 | } 46 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/java/org/apache/flink/batch/connectors/cassandra/example/BatchExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.batch.connectors.cassandra.example; 19 | 20 | import org.apache.flink.api.common.typeinfo.TypeHint; 21 | import org.apache.flink.api.java.DataSet; 22 | import org.apache.flink.api.java.ExecutionEnvironment; 23 | import org.apache.flink.api.java.tuple.Tuple2; 24 | import org.apache.flink.api.java.typeutils.TupleTypeInfo; 25 | import org.apache.flink.batch.connectors.cassandra.CassandraInputFormat; 26 | import org.apache.flink.batch.connectors.cassandra.CassandraTupleOutputFormat; 27 | import org.apache.flink.streaming.connectors.cassandra.ClusterBuilder; 28 | 29 | import com.datastax.driver.core.Cluster; 30 | import com.datastax.driver.core.Cluster.Builder; 31 | 32 | import java.util.ArrayList; 33 | 34 | /** 35 | * This is an example showing the to use the Cassandra Input-/OutputFormats in the Batch API. 36 | * 37 | *

The example assumes that a table exists in a local cassandra database, according to the 38 | * following queries: CREATE KEYSPACE IF NOT EXISTS test WITH replication = {'class': 39 | * 'SimpleStrategy', 'replication_factor': '1'}; CREATE TABLE IF NOT EXISTS test.batches (number 40 | * int, strings text, PRIMARY KEY(number, strings)); 41 | */ 42 | public class BatchExample { 43 | private static final String INSERT_QUERY = 44 | "INSERT INTO test.batches (number, strings) VALUES (?,?);"; 45 | private static final String SELECT_QUERY = "SELECT number, strings FROM test.batches;"; 46 | 47 | /* 48 | * table script: "CREATE TABLE test.batches (number int, strings text, PRIMARY KEY(number, strings));" 49 | */ 50 | public static void main(String[] args) throws Exception { 51 | 52 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 53 | env.setParallelism(1); 54 | 55 | ArrayList> collection = new ArrayList<>(20); 56 | for (int i = 0; i < 20; i++) { 57 | collection.add(new Tuple2<>(i, "string " + i)); 58 | } 59 | 60 | DataSet> dataSet = env.fromCollection(collection); 61 | 62 | dataSet.output( 63 | new CassandraTupleOutputFormat>( 64 | INSERT_QUERY, 65 | new ClusterBuilder() { 66 | @Override 67 | protected Cluster buildCluster(Builder builder) { 68 | return builder.addContactPoints("127.0.0.1").build(); 69 | } 70 | })); 71 | 72 | env.execute("Write"); 73 | 74 | DataSet> inputDS = 75 | env.createInput( 76 | new CassandraInputFormat>( 77 | SELECT_QUERY, 78 | new ClusterBuilder() { 79 | @Override 80 | protected Cluster buildCluster(Builder builder) { 81 | return builder.addContactPoints("127.0.0.1").build(); 82 | } 83 | }), 84 | TupleTypeInfo.of(new TypeHint>() {})); 85 | 86 | inputDS.print(); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/java/org/apache/flink/batch/connectors/cassandra/example/BatchPojoExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.batch.connectors.cassandra.example; 19 | 20 | import org.apache.flink.api.java.DataSet; 21 | import org.apache.flink.api.java.ExecutionEnvironment; 22 | import org.apache.flink.batch.connectors.cassandra.CassandraPojoInputFormat; 23 | import org.apache.flink.batch.connectors.cassandra.CassandraPojoOutputFormat; 24 | import org.apache.flink.connectors.cassandra.utils.Pojo; 25 | import org.apache.flink.streaming.connectors.cassandra.ClusterBuilder; 26 | 27 | import com.datastax.driver.core.Cluster; 28 | import com.datastax.driver.core.ConsistencyLevel; 29 | import com.datastax.driver.mapping.Mapper; 30 | 31 | import java.util.List; 32 | import java.util.UUID; 33 | import java.util.stream.Collectors; 34 | import java.util.stream.IntStream; 35 | 36 | /** 37 | * This is an example showing the to use the {@link CassandraPojoInputFormat}/{@link 38 | * CassandraPojoOutputFormat} in the Batch API. 39 | * 40 | *

The example assumes that a table exists in a local cassandra database, according to the 41 | * following queries: CREATE KEYSPACE IF NOT EXISTS flink WITH replication = {'class': 42 | * 'SimpleStrategy', 'replication_factor': '1'}; CREATE TABLE IF NOT EXISTS flink.batches (id text, 43 | * counter int, batch_id int, PRIMARY KEY(id, counter, batchId)); 44 | */ 45 | public class BatchPojoExample { 46 | private static final String SELECT_QUERY = "SELECT id, counter, batch_id FROM flink.batches;"; 47 | 48 | public static void main(String[] args) throws Exception { 49 | 50 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 51 | env.setParallelism(1); 52 | 53 | List customCassandraAnnotatedPojos = 54 | IntStream.range(0, 20) 55 | .mapToObj(x -> new Pojo(UUID.randomUUID().toString(), x, 0)) 56 | .collect(Collectors.toList()); 57 | 58 | DataSet dataSet = env.fromCollection(customCassandraAnnotatedPojos); 59 | 60 | ClusterBuilder clusterBuilder = 61 | new ClusterBuilder() { 62 | private static final long serialVersionUID = -1754532803757154795L; 63 | 64 | @Override 65 | protected Cluster buildCluster(Cluster.Builder builder) { 66 | return builder.addContactPoints("127.0.0.1").build(); 67 | } 68 | }; 69 | 70 | dataSet.output( 71 | new CassandraPojoOutputFormat<>( 72 | clusterBuilder, 73 | Pojo.class, 74 | () -> new Mapper.Option[] {Mapper.Option.saveNullFields(true)})); 75 | 76 | env.execute("Write"); 77 | 78 | /* 79 | * This is for the purpose of showing an example of creating a DataSet using CassandraPojoInputFormat. 80 | */ 81 | DataSet inputDS = 82 | env.createInput( 83 | new CassandraPojoInputFormat<>( 84 | SELECT_QUERY, 85 | clusterBuilder, 86 | Pojo.class, 87 | () -> 88 | new Mapper.Option[] { 89 | Mapper.Option.consistencyLevel(ConsistencyLevel.ANY) 90 | })); 91 | 92 | inputDS.print(); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/java/org/apache/flink/connector/cassandra/source/CassandraTestContext.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.connector.cassandra.source; 20 | 21 | import org.apache.flink.api.common.typeinfo.TypeInformation; 22 | import org.apache.flink.api.connector.source.Source; 23 | import org.apache.flink.connector.cassandra.CassandraTestEnvironment; 24 | import org.apache.flink.connector.testframe.external.ExternalContextFactory; 25 | import org.apache.flink.connector.testframe.external.ExternalSystemSplitDataWriter; 26 | import org.apache.flink.connector.testframe.external.source.DataStreamSourceExternalContext; 27 | import org.apache.flink.connector.testframe.external.source.TestingSourceSettings; 28 | import org.apache.flink.connectors.cassandra.utils.Pojo; 29 | import org.apache.flink.streaming.connectors.cassandra.MapperOptions; 30 | 31 | import com.datastax.driver.mapping.Mapper; 32 | import com.datastax.driver.mapping.MappingManager; 33 | 34 | import java.net.URL; 35 | import java.util.ArrayList; 36 | import java.util.Collections; 37 | import java.util.List; 38 | 39 | /** 40 | * Junit {@link DataStreamSourceExternalContext} that contains everything related to Cassandra 41 | * source test cases especially test table management. 42 | */ 43 | public class CassandraTestContext implements DataStreamSourceExternalContext { 44 | 45 | static final String TABLE_NAME = "batches"; 46 | 47 | private static final String CREATE_TABLE_QUERY = 48 | "CREATE TABLE " 49 | + CassandraTestEnvironment.KEYSPACE 50 | + "." 51 | + TABLE_NAME 52 | + " (id text PRIMARY KEY, counter int, batch_id int)" 53 | + ";"; 54 | 55 | private static final String DROP_TABLE_QUERY = 56 | "DROP TABLE " + CassandraTestEnvironment.KEYSPACE + "." + TABLE_NAME + ";"; 57 | 58 | private static final int RECORDS_PER_SPLIT = 20; 59 | 60 | private final Mapper mapper; 61 | private final MapperOptions mapperOptions; 62 | private final CassandraTestEnvironment cassandraTestEnvironment; 63 | 64 | public CassandraTestContext(CassandraTestEnvironment cassandraTestEnvironment) { 65 | this.cassandraTestEnvironment = cassandraTestEnvironment; 66 | createTable(); 67 | mapper = new MappingManager(cassandraTestEnvironment.getSession()).mapper(Pojo.class); 68 | mapperOptions = () -> new Mapper.Option[] {Mapper.Option.saveNullFields(true)}; 69 | } 70 | 71 | @Override 72 | public TypeInformation getProducedType() { 73 | return TypeInformation.of(Pojo.class); 74 | } 75 | 76 | @Override 77 | public List getConnectorJarPaths() { 78 | return Collections.emptyList(); 79 | } 80 | 81 | @Override 82 | public Source createSource(TestingSourceSettings sourceSettings) 83 | throws UnsupportedOperationException { 84 | 85 | return new CassandraSource<>( 86 | cassandraTestEnvironment.getBuilderForReading(), 87 | Pojo.class, 88 | String.format( 89 | "SELECT * FROM %s.%s;", CassandraTestEnvironment.KEYSPACE, TABLE_NAME), 90 | mapperOptions); 91 | } 92 | 93 | @Override 94 | public ExternalSystemSplitDataWriter createSourceSplitDataWriter( 95 | TestingSourceSettings sourceSettings) { 96 | return new ExternalSystemSplitDataWriter() { 97 | 98 | @Override 99 | public void writeRecords(List records) { 100 | for (Pojo pojo : records) { 101 | mapper.save(pojo, mapperOptions.getMapperOptions()); 102 | } 103 | } 104 | 105 | @Override 106 | public void close() { 107 | // nothing to do, cluster/session is shared at the CassandraTestEnvironment 108 | // level 109 | } 110 | }; 111 | } 112 | 113 | @Override 114 | public List generateTestData( 115 | TestingSourceSettings sourceSettings, int splitIndex, long seed) { 116 | List testData = new ArrayList<>(RECORDS_PER_SPLIT); 117 | // generate RECORDS_PER_SPLIT pojos per split and use splitId as pojo batchIndex so that 118 | // pojos are considered equal when they belong to the same split 119 | // as requested in implementation notes. 120 | for (int i = 0; i < RECORDS_PER_SPLIT; i++) { 121 | Pojo pojo = new Pojo(String.valueOf(seed + i), i, splitIndex); 122 | testData.add(pojo); 123 | } 124 | return testData; 125 | } 126 | 127 | @Override 128 | public void close() throws Exception { 129 | dropTable(); 130 | // NB: cluster/session is shared at the CassandraTestEnvironment level 131 | } 132 | 133 | private void createTable() { 134 | cassandraTestEnvironment.executeRequestWithTimeout(CREATE_TABLE_QUERY); 135 | } 136 | 137 | private void dropTable() { 138 | cassandraTestEnvironment.executeRequestWithTimeout(DROP_TABLE_QUERY); 139 | } 140 | 141 | static class CassandraTestContextFactory 142 | implements ExternalContextFactory { 143 | 144 | private final CassandraTestEnvironment cassandraTestEnvironment; 145 | 146 | public CassandraTestContextFactory(CassandraTestEnvironment cassandraTestEnvironment) { 147 | this.cassandraTestEnvironment = cassandraTestEnvironment; 148 | } 149 | 150 | @Override 151 | public CassandraTestContext createExternalContext(String testName) { 152 | return new CassandraTestContext(cassandraTestEnvironment); 153 | } 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/java/org/apache/flink/connector/cassandra/source/enumerator/CassandraEnumeratorStateSerializerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.connector.cassandra.source.enumerator; 20 | 21 | import org.apache.flink.connector.cassandra.source.split.CassandraSplit; 22 | 23 | import com.google.common.collect.ImmutableList; 24 | import org.junit.jupiter.api.Test; 25 | 26 | import java.math.BigInteger; 27 | import java.util.ArrayDeque; 28 | import java.util.Queue; 29 | 30 | import static org.assertj.core.api.Assertions.assertThat; 31 | 32 | /** Tests for {@link CassandraEnumeratorStateSerializer}. */ 33 | class CassandraEnumeratorStateSerializerTest { 34 | 35 | @Test 36 | public void testSerdeRoundtrip() throws Exception { 37 | final Queue splitsToReassign = 38 | new ArrayDeque<>( 39 | ImmutableList.of( 40 | new CassandraSplit(BigInteger.ZERO, BigInteger.TEN), 41 | new CassandraSplit(BigInteger.TEN, BigInteger.ZERO))); 42 | 43 | final CassandraEnumeratorState cassandraEnumeratorState = 44 | new CassandraEnumeratorState( 45 | 10, BigInteger.ONE, BigInteger.ZERO, BigInteger.TEN, splitsToReassign); 46 | 47 | final byte[] serialized = 48 | CassandraEnumeratorStateSerializer.INSTANCE.serialize(cassandraEnumeratorState); 49 | final CassandraEnumeratorState deserialized = 50 | CassandraEnumeratorStateSerializer.INSTANCE.deserialize( 51 | CassandraEnumeratorStateSerializer.CURRENT_VERSION, serialized); 52 | assertThat(deserialized) 53 | .isEqualTo(cassandraEnumeratorState) 54 | .withFailMessage( 55 | "CassandraEnumeratorState is not the same as input object after serde roundtrip"); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/java/org/apache/flink/connector/cassandra/source/reader/CassandraQueryTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.connector.cassandra.source.reader; 20 | 21 | import org.apache.flink.connector.cassandra.source.CassandraSource; 22 | 23 | import org.junit.jupiter.api.Test; 24 | 25 | import java.util.Arrays; 26 | import java.util.regex.Matcher; 27 | 28 | import static org.assertj.core.api.Assertions.assertThat; 29 | import static org.assertj.core.api.Assertions.assertThatThrownBy; 30 | 31 | /** tests for query generation and query sanity checks. */ 32 | class CassandraQueryTest { 33 | 34 | @Test 35 | public void testKeySpaceTableExtractionRegexp() { 36 | Arrays.asList( 37 | "select field FROM keyspace.table where field = value;", 38 | "select * FROM keyspace.table;", 39 | "select field1, field2 from keyspace.table;", 40 | "select field1, field2 from keyspace.table LIMIT(1000);", 41 | "select field1 from keyspace.table ;", 42 | "select field1 from keyspace.table where field1=1;") 43 | .forEach(CassandraQueryTest::assertQueryFormatCorrect); 44 | 45 | Arrays.asList( 46 | "select field1 from table;", // missing keyspace 47 | "select field1 from .table", // undefined keyspace var in a script 48 | "select field1 from keyspace.;", // undefined table var in a script 49 | "select field1 from keyspace.table" // missing ";" 50 | ) 51 | .forEach(CassandraQueryTest::assertQueryFormatIncorrect); 52 | } 53 | 54 | @Test 55 | public void testProhibitedClauses() { 56 | Arrays.asList( 57 | "SELECT COUNT(*) from flink.table;", 58 | "SELECT AVG(*) from flink.table;", 59 | "SELECT MIN(*) from flink.table;", 60 | "SELECT MAX(*) from flink.table;", 61 | "SELECT SUM(*) from flink.table;", 62 | "SELECT field1, field2 from flink.table ORDER BY field1;", 63 | "SELECT field1, field2 from flink.table GROUP BY field1;") 64 | .forEach(CassandraQueryTest::assertProhibitedClauseRejected); 65 | } 66 | 67 | @Test 68 | public void testGenerateRangeQuery() { 69 | String query; 70 | String outputQuery; 71 | 72 | // query with where clause 73 | query = "SELECT field FROM keyspace.table WHERE field = value;"; 74 | outputQuery = CassandraSplitReader.generateRangeQuery(query, "field"); 75 | assertThat(outputQuery) 76 | .isEqualTo( 77 | "SELECT field FROM keyspace.table WHERE (token(field) >= ?) AND (token(field) < ?) AND field = value;"); 78 | 79 | // query without where clause 80 | query = "SELECT * FROM keyspace.table;"; 81 | outputQuery = CassandraSplitReader.generateRangeQuery(query, "field"); 82 | assertThat(outputQuery) 83 | .isEqualTo( 84 | "SELECT * FROM keyspace.table WHERE (token(field) >= ?) AND (token(field) < ?);"); 85 | 86 | // query without where clause but with another trailing clause 87 | query = "SELECT field FROM keyspace.table LIMIT(1000);"; 88 | outputQuery = CassandraSplitReader.generateRangeQuery(query, "field"); 89 | assertThat(outputQuery) 90 | .isEqualTo( 91 | "SELECT field FROM keyspace.table WHERE (token(field) >= ?) AND (token(field) < ?) LIMIT(1000);"); 92 | 93 | // query with where clause and another trailing clause 94 | query = "SELECT field FROM keyspace.table WHERE field = value LIMIT(1000);"; 95 | outputQuery = CassandraSplitReader.generateRangeQuery(query, "field"); 96 | assertThat(outputQuery) 97 | .isEqualTo( 98 | "SELECT field FROM keyspace.table WHERE (token(field) >= ?) AND (token(field) < ?) AND field = value LIMIT(1000);"); 99 | } 100 | 101 | private static void assertQueryFormatIncorrect(String query) { 102 | assertThatThrownBy(() -> CassandraSource.checkQueryValidity(query)) 103 | .hasMessageContaining( 104 | "Query must be of the form select ... from keyspace.table ...;"); 105 | } 106 | 107 | private static void assertQueryFormatCorrect(String query) { 108 | Matcher matcher = CassandraSource.SELECT_REGEXP.matcher(query); 109 | assertThat(matcher.matches()).isTrue(); 110 | assertThat(matcher.group(1)).isEqualTo("keyspace"); 111 | assertThat(matcher.group(2)).isEqualTo("table"); 112 | } 113 | 114 | private static void assertProhibitedClauseRejected(String query) { 115 | assertThatThrownBy(() -> CassandraSource.checkQueryValidity(query)) 116 | .hasMessageContaining( 117 | "Aggregations/OrderBy are not supported because the query is executed on subsets/partitions of the input table"); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/java/org/apache/flink/connector/cassandra/source/split/CassandraSplitSerializerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.connector.cassandra.source.split; 20 | 21 | import org.junit.jupiter.api.Test; 22 | 23 | import java.io.IOException; 24 | import java.math.BigInteger; 25 | 26 | import static org.assertj.core.api.Assertions.assertThat; 27 | 28 | /** Tests for {@link CassandraSplitSerializer}. */ 29 | class CassandraSplitSerializerTest { 30 | 31 | @Test 32 | public void testSerdeRoundtrip() throws IOException { 33 | final CassandraSplit testData = new CassandraSplit(BigInteger.ONE, BigInteger.TEN); 34 | final byte[] serialized = CassandraSplitSerializer.INSTANCE.serialize(testData); 35 | final CassandraSplit deserialized = 36 | CassandraSplitSerializer.INSTANCE.deserialize( 37 | CassandraSplitSerializer.CURRENT_VERSION, serialized); 38 | assertThat(deserialized) 39 | .isEqualTo(testData) 40 | .withFailMessage( 41 | "CassandraSplit is not the same as input object after serde roundtrip"); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/java/org/apache/flink/connectors/cassandra/utils/Pojo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.connectors.cassandra.utils; 19 | 20 | import com.datastax.driver.mapping.annotations.Column; 21 | import com.datastax.driver.mapping.annotations.Table; 22 | 23 | import java.io.Serializable; 24 | import java.util.Objects; 25 | 26 | /** Test Pojo with DataStax annotations used. */ 27 | @Table(keyspace = "flink", name = "batches") 28 | public class Pojo implements Serializable { 29 | 30 | private static final long serialVersionUID = 1038054554690916991L; 31 | 32 | @Column(name = "id") 33 | private String id; 34 | 35 | @Column(name = "counter") 36 | private int counter; 37 | 38 | @Column(name = "batch_id") 39 | private int batchID; 40 | 41 | // required for deserialization 42 | public Pojo() {} 43 | 44 | public Pojo(String id, int counter, int batchID) { 45 | this.id = id; 46 | this.counter = counter; 47 | this.batchID = batchID; 48 | } 49 | 50 | public String getId() { 51 | return id; 52 | } 53 | 54 | public void setId(String id) { 55 | this.id = id; 56 | } 57 | 58 | public int getCounter() { 59 | return counter; 60 | } 61 | 62 | public void setCounter(int counter) { 63 | this.counter = counter; 64 | } 65 | 66 | public int getBatchID() { 67 | return batchID; 68 | } 69 | 70 | public void setBatchID(int batchId) { 71 | this.batchID = batchId; 72 | } 73 | 74 | @Override 75 | public String toString() { 76 | return String.format( 77 | "{\"id\":\"%s\", \"counter\":%d, \"batchID\":%d}", id, counter, batchID); 78 | } 79 | 80 | @Override 81 | public boolean equals(Object o) { 82 | if (this == o) { 83 | return true; 84 | } 85 | if (o == null || getClass() != o.getClass()) { 86 | return false; 87 | } 88 | Pojo pojo = (Pojo) o; 89 | return counter == pojo.counter && batchID == pojo.batchID && id.equals(pojo.id); 90 | } 91 | 92 | @Override 93 | public int hashCode() { 94 | return Objects.hash(id, counter, batchID); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/java/org/apache/flink/connectors/cassandra/utils/ResultSetFutures.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.connectors.cassandra.utils; 20 | 21 | import com.datastax.driver.core.ResultSet; 22 | import com.datastax.driver.core.ResultSetFuture; 23 | 24 | import java.util.concurrent.CompletableFuture; 25 | import java.util.concurrent.ExecutionException; 26 | import java.util.concurrent.Executor; 27 | import java.util.concurrent.TimeUnit; 28 | import java.util.concurrent.TimeoutException; 29 | 30 | import static org.apache.flink.util.Preconditions.checkNotNull; 31 | 32 | /** Utility class to create {@link com.datastax.driver.core.ResultSetFuture}s. */ 33 | public class ResultSetFutures { 34 | 35 | private ResultSetFutures() {} 36 | 37 | public static ResultSetFuture fromCompletableFuture(CompletableFuture future) { 38 | checkNotNull(future); 39 | return new CompletableResultSetFuture(future); 40 | } 41 | 42 | private static class CompletableResultSetFuture implements ResultSetFuture { 43 | 44 | private final CompletableFuture completableFuture; 45 | 46 | CompletableResultSetFuture(CompletableFuture future) { 47 | this.completableFuture = future; 48 | } 49 | 50 | @Override 51 | public ResultSet getUninterruptibly() { 52 | try { 53 | return completableFuture.get(); 54 | } catch (InterruptedException e) { 55 | return getUninterruptibly(); 56 | } catch (ExecutionException e) { 57 | throw new RuntimeException(e); 58 | } 59 | } 60 | 61 | @Override 62 | public ResultSet getUninterruptibly(long l, TimeUnit timeUnit) throws TimeoutException { 63 | try { 64 | return completableFuture.get(l, timeUnit); 65 | } catch (InterruptedException e) { 66 | return getUninterruptibly(); 67 | } catch (ExecutionException e) { 68 | throw new RuntimeException(e); 69 | } 70 | } 71 | 72 | @Override 73 | public boolean cancel(boolean b) { 74 | return completableFuture.cancel(b); 75 | } 76 | 77 | @Override 78 | public boolean isCancelled() { 79 | return completableFuture.isCancelled(); 80 | } 81 | 82 | @Override 83 | public boolean isDone() { 84 | return completableFuture.isDone(); 85 | } 86 | 87 | @Override 88 | public ResultSet get() throws InterruptedException, ExecutionException { 89 | return completableFuture.get(); 90 | } 91 | 92 | @Override 93 | public ResultSet get(long timeout, TimeUnit unit) 94 | throws InterruptedException, ExecutionException, TimeoutException { 95 | return completableFuture.get(timeout, unit); 96 | } 97 | 98 | @Override 99 | public void addListener(Runnable listener, Executor executor) { 100 | completableFuture.whenComplete((result, error) -> listener.run()); 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/java/org/apache/flink/streaming/connectors/cassandra/CassandraTupleWriteAheadSinkTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.streaming.connectors.cassandra; 20 | 21 | import org.apache.flink.api.common.ExecutionConfig; 22 | import org.apache.flink.api.java.tuple.Tuple0; 23 | import org.apache.flink.api.java.typeutils.TupleTypeInfo; 24 | import org.apache.flink.streaming.runtime.operators.CheckpointCommitter; 25 | import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness; 26 | 27 | import com.datastax.driver.core.BoundStatement; 28 | import com.datastax.driver.core.Cluster; 29 | import com.datastax.driver.core.PreparedStatement; 30 | import com.datastax.driver.core.ResultSetFuture; 31 | import com.datastax.driver.core.Session; 32 | import org.junit.jupiter.api.Test; 33 | import org.junit.jupiter.api.Timeout; 34 | import org.mockito.ArgumentMatchers; 35 | import org.mockito.invocation.InvocationOnMock; 36 | import org.mockito.stubbing.Answer; 37 | 38 | import java.util.Collections; 39 | import java.util.concurrent.Executor; 40 | import java.util.concurrent.TimeUnit; 41 | import java.util.concurrent.atomic.AtomicReference; 42 | 43 | import static org.assertj.core.api.Assertions.assertThat; 44 | import static org.mockito.ArgumentMatchers.any; 45 | import static org.mockito.ArgumentMatchers.anyString; 46 | import static org.mockito.Mockito.doAnswer; 47 | import static org.mockito.Mockito.mock; 48 | import static org.mockito.Mockito.when; 49 | 50 | /** Tests for the {@link CassandraTupleWriteAheadSink}. */ 51 | class CassandraTupleWriteAheadSinkTest { 52 | 53 | @Test 54 | @Timeout(value = 20_000, unit = TimeUnit.MILLISECONDS) 55 | void testAckLoopExitOnException() throws Exception { 56 | final AtomicReference runnableFuture = new AtomicReference<>(); 57 | 58 | final ClusterBuilder clusterBuilder = 59 | new ClusterBuilder() { 60 | private static final long serialVersionUID = 4624400760492936756L; 61 | 62 | @Override 63 | protected Cluster buildCluster(Cluster.Builder builder) { 64 | try { 65 | BoundStatement boundStatement = mock(BoundStatement.class); 66 | when(boundStatement.setDefaultTimestamp(any(long.class))) 67 | .thenReturn(boundStatement); 68 | 69 | PreparedStatement preparedStatement = mock(PreparedStatement.class); 70 | when(preparedStatement.bind(ArgumentMatchers.any())) 71 | .thenReturn(boundStatement); 72 | 73 | ResultSetFuture future = mock(ResultSetFuture.class); 74 | when(future.get()) 75 | .thenThrow(new RuntimeException("Expected exception.")); 76 | 77 | doAnswer( 78 | new Answer() { 79 | @Override 80 | public Void answer( 81 | InvocationOnMock invocationOnMock) 82 | throws Throwable { 83 | synchronized (runnableFuture) { 84 | runnableFuture.set( 85 | (((Runnable) 86 | invocationOnMock 87 | .getArguments()[ 88 | 0]))); 89 | runnableFuture.notifyAll(); 90 | } 91 | return null; 92 | } 93 | }) 94 | .when(future) 95 | .addListener(any(Runnable.class), any(Executor.class)); 96 | 97 | Session session = mock(Session.class); 98 | when(session.prepare(anyString())).thenReturn(preparedStatement); 99 | when(session.executeAsync(any(BoundStatement.class))) 100 | .thenReturn(future); 101 | 102 | Cluster cluster = mock(Cluster.class); 103 | when(cluster.connect()).thenReturn(session); 104 | return cluster; 105 | } catch (Exception e) { 106 | throw new RuntimeException(e); 107 | } 108 | } 109 | }; 110 | 111 | // Our asynchronous executor thread 112 | new Thread( 113 | new Runnable() { 114 | @Override 115 | public void run() { 116 | synchronized (runnableFuture) { 117 | while (runnableFuture.get() == null) { 118 | try { 119 | runnableFuture.wait(); 120 | } catch (InterruptedException e) { 121 | // ignore interrupts 122 | } 123 | } 124 | } 125 | runnableFuture.get().run(); 126 | } 127 | }) 128 | .start(); 129 | 130 | CheckpointCommitter cc = mock(CheckpointCommitter.class); 131 | final CassandraTupleWriteAheadSink sink = 132 | new CassandraTupleWriteAheadSink<>( 133 | "abc", 134 | TupleTypeInfo.of(Tuple0.class).createSerializer(new ExecutionConfig()), 135 | clusterBuilder, 136 | cc); 137 | 138 | OneInputStreamOperatorTestHarness harness = 139 | new OneInputStreamOperatorTestHarness<>(sink); 140 | harness.getEnvironment().getTaskConfiguration().setBoolean("checkpointing", true); 141 | 142 | harness.setup(); 143 | sink.open(); 144 | 145 | // we should leave the loop and return false since we've seen an exception 146 | assertThat(sink.sendValues(Collections.singleton(new Tuple0()), 1L, 0L)).isFalse(); 147 | 148 | sink.close(); 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/java/org/apache/flink/streaming/connectors/cassandra/Pojo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.streaming.connectors.cassandra; 19 | 20 | import com.datastax.driver.mapping.annotations.Column; 21 | 22 | import java.io.Serializable; 23 | 24 | /** Test Pojo with DataStax annotations created dynamically. */ 25 | public class Pojo implements Serializable { 26 | 27 | private static final long serialVersionUID = 1038054554690916991L; 28 | 29 | @Column(name = "id") 30 | private String id; 31 | 32 | @Column(name = "counter") 33 | private int counter; 34 | 35 | @Column(name = "batch_id") 36 | private int batchID; 37 | 38 | // required for deserialization 39 | public Pojo() {} 40 | 41 | public Pojo(String id, int counter, int batchID) { 42 | this.id = id; 43 | this.counter = counter; 44 | this.batchID = batchID; 45 | } 46 | 47 | public String getId() { 48 | return id; 49 | } 50 | 51 | public void setId(String id) { 52 | this.id = id; 53 | } 54 | 55 | public int getCounter() { 56 | return counter; 57 | } 58 | 59 | public void setCounter(int counter) { 60 | this.counter = counter; 61 | } 62 | 63 | public int getBatchID() { 64 | return batchID; 65 | } 66 | 67 | public void setBatchID(int batchId) { 68 | this.batchID = batchId; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/java/org/apache/flink/streaming/connectors/cassandra/example/CassandraPojoSinkExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.streaming.connectors.cassandra.example; 19 | 20 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 22 | import org.apache.flink.streaming.connectors.cassandra.CassandraSink; 23 | import org.apache.flink.streaming.connectors.cassandra.ClusterBuilder; 24 | 25 | import com.datastax.driver.core.Cluster; 26 | import com.datastax.driver.core.Cluster.Builder; 27 | import com.datastax.driver.mapping.Mapper; 28 | 29 | import java.util.ArrayList; 30 | 31 | /** 32 | * This is an example showing the to use the Pojo Cassandra Sink in the Streaming API. 33 | * 34 | *

Pojo's have to be annotated with datastax annotations to work with this sink. 35 | * 36 | *

The example assumes that a table exists in a local cassandra database, according to the 37 | * following queries: CREATE KEYSPACE IF NOT EXISTS test WITH replication = {'class': 38 | * 'SimpleStrategy', 'replication_factor': '1'}; CREATE TABLE IF NOT EXISTS test.message(body txt 39 | * PRIMARY KEY) 40 | */ 41 | public class CassandraPojoSinkExample { 42 | private static final ArrayList messages = new ArrayList<>(20); 43 | 44 | static { 45 | for (long i = 0; i < 20; i++) { 46 | messages.add(new Message("cassandra-" + i)); 47 | } 48 | } 49 | 50 | public static void main(String[] args) throws Exception { 51 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 52 | 53 | DataStreamSource source = env.fromCollection(messages); 54 | 55 | CassandraSink.addSink(source) 56 | .setClusterBuilder( 57 | new ClusterBuilder() { 58 | @Override 59 | protected Cluster buildCluster(Builder builder) { 60 | return builder.addContactPoint("127.0.0.1").build(); 61 | } 62 | }) 63 | .setMapperOptions(() -> new Mapper.Option[] {Mapper.Option.saveNullFields(true)}) 64 | .build(); 65 | 66 | env.execute("Cassandra Sink example"); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/java/org/apache/flink/streaming/connectors/cassandra/example/CassandraTupleSinkExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.streaming.connectors.cassandra.example; 19 | 20 | import org.apache.flink.api.java.tuple.Tuple2; 21 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 22 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 23 | import org.apache.flink.streaming.connectors.cassandra.CassandraSink; 24 | import org.apache.flink.streaming.connectors.cassandra.ClusterBuilder; 25 | 26 | import com.datastax.driver.core.Cluster; 27 | import com.datastax.driver.core.Cluster.Builder; 28 | 29 | import java.util.ArrayList; 30 | 31 | /** 32 | * This is an example showing the to use the Tuple Cassandra Sink in the Streaming API. 33 | * 34 | *

The example assumes that a table exists in a local cassandra database, according to the 35 | * following queries: CREATE KEYSPACE IF NOT EXISTS test WITH replication = {'class': 36 | * 'SimpleStrategy', 'replication_factor': '1'}; CREATE TABLE IF NOT EXISTS test.writetuple(element1 37 | * text PRIMARY KEY, element2 int) 38 | */ 39 | public class CassandraTupleSinkExample { 40 | private static final String INSERT = 41 | "INSERT INTO test.writetuple (element1, element2) VALUES (?, ?)"; 42 | private static final ArrayList> collection = new ArrayList<>(20); 43 | 44 | static { 45 | for (int i = 0; i < 20; i++) { 46 | collection.add(new Tuple2<>("cassandra-" + i, i)); 47 | } 48 | } 49 | 50 | public static void main(String[] args) throws Exception { 51 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 52 | 53 | DataStreamSource> source = env.fromCollection(collection); 54 | 55 | CassandraSink.addSink(source) 56 | .setQuery(INSERT) 57 | .setClusterBuilder( 58 | new ClusterBuilder() { 59 | @Override 60 | protected Cluster buildCluster(Builder builder) { 61 | return builder.addContactPoint("127.0.0.1").build(); 62 | } 63 | }) 64 | .build(); 65 | 66 | env.execute("WriteTupleIntoCassandra"); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/java/org/apache/flink/streaming/connectors/cassandra/example/CassandraTupleWriteAheadSinkExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.streaming.connectors.cassandra.example; 20 | 21 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 22 | import org.apache.flink.api.java.tuple.Tuple2; 23 | import org.apache.flink.runtime.state.filesystem.FsStateBackend; 24 | import org.apache.flink.streaming.api.checkpoint.ListCheckpointed; 25 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 26 | import org.apache.flink.streaming.api.functions.source.SourceFunction; 27 | import org.apache.flink.streaming.connectors.cassandra.CassandraSink; 28 | import org.apache.flink.streaming.connectors.cassandra.ClusterBuilder; 29 | 30 | import com.datastax.driver.core.Cluster; 31 | 32 | import java.util.Collections; 33 | import java.util.List; 34 | import java.util.UUID; 35 | 36 | /** 37 | * This is an example showing the to use the Cassandra Sink (with write-ahead log) in the Streaming 38 | * API. 39 | * 40 | *

The example assumes that a table exists in a local cassandra database, according to the 41 | * following queries: CREATE KEYSPACE IF NOT EXISTS example WITH replication = {'class': 42 | * 'SimpleStrategy', 'replication_factor': '1'}; CREATE TABLE example.values (id text, count int, 43 | * PRIMARY KEY(id)); 44 | * 45 | *

Important things to note are that checkpointing is enabled, a StateBackend is set and the 46 | * enableWriteAheadLog() call when creating the CassandraSink. 47 | */ 48 | public class CassandraTupleWriteAheadSinkExample { 49 | public static void main(String[] args) throws Exception { 50 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 51 | env.setParallelism(1); 52 | env.enableCheckpointing(1000); 53 | env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 1000)); 54 | env.setStateBackend( 55 | new FsStateBackend( 56 | "file:///" + System.getProperty("java.io.tmpdir") + "/flink/backend")); 57 | 58 | CassandraSink> sink = 59 | CassandraSink.addSink(env.addSource(new MySource())) 60 | .setQuery("INSERT INTO example.values (id, counter) values (?, ?);") 61 | .enableWriteAheadLog() 62 | .setClusterBuilder( 63 | new ClusterBuilder() { 64 | 65 | private static final long serialVersionUID = 66 | 2793938419775311824L; 67 | 68 | @Override 69 | public Cluster buildCluster(Cluster.Builder builder) { 70 | return builder.addContactPoint("127.0.0.1").build(); 71 | } 72 | }) 73 | .build(); 74 | 75 | sink.name("Cassandra Sink").disableChaining().setParallelism(1).uid("hello"); 76 | 77 | env.execute(); 78 | } 79 | 80 | private static class MySource 81 | implements SourceFunction>, ListCheckpointed { 82 | private static final long serialVersionUID = 4022367939215095610L; 83 | 84 | private int counter = 0; 85 | private boolean stop = false; 86 | 87 | @Override 88 | public void run(SourceContext> ctx) throws Exception { 89 | while (!stop) { 90 | Thread.sleep(50); 91 | ctx.collect(new Tuple2<>("" + UUID.randomUUID(), 1)); 92 | counter++; 93 | if (counter == 100) { 94 | stop = true; 95 | } 96 | } 97 | } 98 | 99 | @Override 100 | public void cancel() { 101 | stop = true; 102 | } 103 | 104 | @Override 105 | public List snapshotState(long checkpointId, long timestamp) throws Exception { 106 | return Collections.singletonList(this.counter); 107 | } 108 | 109 | @Override 110 | public void restoreState(List state) throws Exception { 111 | if (state.isEmpty() || state.size() > 1) { 112 | throw new RuntimeException( 113 | "Test failed due to unexpected recovered state size " + state.size()); 114 | } 115 | this.counter = state.get(0); 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/java/org/apache/flink/streaming/connectors/cassandra/example/Message.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.flink.streaming.connectors.cassandra.example; 19 | 20 | import com.datastax.driver.mapping.annotations.Column; 21 | import com.datastax.driver.mapping.annotations.Table; 22 | 23 | import java.io.Serializable; 24 | 25 | /** Pojo with DataStax annotations.. */ 26 | @Table(keyspace = "test", name = "message") 27 | public class Message implements Serializable { 28 | 29 | private static final long serialVersionUID = 1123119384361005680L; 30 | 31 | @Column(name = "body") 32 | private String message; 33 | 34 | public Message() { 35 | this(null); 36 | } 37 | 38 | public Message(String word) { 39 | this.message = word; 40 | } 41 | 42 | public String getMessage() { 43 | return message; 44 | } 45 | 46 | public void setMessage(String word) { 47 | this.message = word; 48 | } 49 | 50 | public boolean equals(Object other) { 51 | if (other instanceof Message) { 52 | Message that = (Message) other; 53 | return this.message.equals(that.message); 54 | } 55 | return false; 56 | } 57 | 58 | @Override 59 | public int hashCode() { 60 | return message.hashCode(); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/resources/META-INF/services/org.junit.jupiter.api.extension.Extension: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | org.apache.flink.util.TestLoggerExtension -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/resources/archunit.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | # This controls if the violation store is writable 20 | freeze.store.default.allowStoreUpdate=true 21 | 22 | # Enable this if a new rule was added or the description of an existing rule has changed. 23 | # It is needed to record future violations of this rule. 24 | # If omitted, future violations of the rule will not be considered as violations as the initial store was not created for this rule. 25 | #freeze.store.default.allowStoreCreation=true 26 | 27 | # Enable this to record the current state of violations. 28 | # This can make sense, because current violations are consciously accepted and should be added to the store 29 | # By default we allow removing existing violations, but fail when new violations are added 30 | # NOTE: Adding new violations should be avoided when possible. If the rule was correct to flag a new 31 | # violation, please try to avoid creating the violation. If the violation was created due to a 32 | # shortcoming of the rule, file a JIRA issue so the rule can be improved. 33 | #freeze.refreeze=true 34 | 35 | freeze.store.default.path=archunit-violations 36 | 37 | # To allow all rules to be evaluated without checking any classes you can set the following property 38 | archRule.failOnEmptyShould = false 39 | -------------------------------------------------------------------------------- /flink-connector-cassandra/src/test/resources/log4j2-test.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | # Set root logger level to OFF to not flood build logs 20 | # set manually to INFO for debugging purposes 21 | rootLogger.level = OFF 22 | rootLogger.appenderRef.test.ref = TestLogger 23 | 24 | appender.testlogger.name = TestLogger 25 | appender.testlogger.type = CONSOLE 26 | appender.testlogger.target = SYSTEM_ERR 27 | appender.testlogger.layout.type = PatternLayout 28 | appender.testlogger.layout.pattern = %-4r [%t] %-5p %c %x - %m%n 29 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 21 | 22 | 23 | org.apache.flink 24 | flink-connector-parent 25 | 1.1.0 26 | 27 | 28 | 4.0.0 29 | 30 | flink-connector-cassandra-parent 31 | 4.0-SNAPSHOT 32 | Flink : Connectors : Cassandra : Parent 33 | pom 34 | 2022 35 | 36 | 37 | https://github.com/apache/flink-connector-cassandra 38 | git@github.com:apache/flink-connector-cassandra.git 39 | 40 | scm:git:https://gitbox.apache.org/repos/asf/flink-connector-cassandra.git 41 | 42 | 43 | 44 | 45 | 1.18.0 46 | 3.1.0-1.17 47 | 19.0 48 | 49 | 50 | 51 | flink-connector-cassandra 52 | 53 | 54 | 55 | 56 | 57 | 58 | com.google.guava 59 | guava 60 | ${guava.version} 61 | 62 | 63 | 64 | 65 | org.xerial.snappy 66 | snappy-java 67 | 1.1.10.4 68 | 69 | 70 | 71 | 72 | io.dropwizard.metrics 73 | metrics-core 74 | 3.2.2 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | org.apache.maven.plugins 83 | maven-shade-plugin 84 | 85 | 86 | 87 | org.codehaus.mojo 88 | exec-maven-plugin 89 | false 90 | 91 | 92 | org.apache.flink 93 | flink-ci-tools 94 | ${flink.version} 95 | 96 | 97 | 98 | 99 | 100 | org.apache.maven.plugins 101 | maven-jar-plugin 102 | 103 | 104 | 105 | 106 | io.github.zentol.japicmp 107 | japicmp-maven-plugin 108 | 109 | 110 | 111 | org.apache.rat 112 | apache-rat-plugin 113 | false 114 | 115 | 116 | 117 | org.apache.maven.plugins 118 | maven-checkstyle-plugin 119 | 120 | 121 | 122 | com.diffplug.spotless 123 | spotless-maven-plugin 124 | 125 | 126 | 127 | org.apache.maven.plugins 128 | maven-compiler-plugin 129 | 130 | 131 | 132 | org.apache.maven.plugins 133 | maven-surefire-plugin 134 | 135 | 136 | 137 | org.apache.maven.plugins 138 | maven-enforcer-plugin 139 | 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /tools/ci/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | rootLogger.level = INFO 20 | rootLogger.appenderRef.out.ref = ConsoleAppender 21 | 22 | # ----------------------------------------------------------------------------- 23 | # Console (use 'console') 24 | # ----------------------------------------------------------------------------- 25 | 26 | appender.console.name = ConsoleAppender 27 | appender.console.type = CONSOLE 28 | appender.console.layout.type = PatternLayout 29 | appender.console.layout.pattern = %d{HH:mm:ss,SSS} [%20t] %-5p %-60c %x - %m%n 30 | 31 | # ----------------------------------------------------------------------------- 32 | # File (use 'file') 33 | # ----------------------------------------------------------------------------- 34 | appender.file.name = FileAppender 35 | appender.file.type = FILE 36 | appender.file.fileName = ${sys:log.dir}/mvn-${sys:mvn.forkNumber:-output}.log 37 | appender.file.layout.type = PatternLayout 38 | appender.file.layout.pattern = %d{HH:mm:ss,SSS} [%20t] %-5p %-60c %x - %m%n 39 | appender.file.createOnDemand = true 40 | 41 | # suppress the irrelevant (wrong) warnings from the netty channel handler 42 | logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline 43 | logger.netty.level = ERROR 44 | -------------------------------------------------------------------------------- /tools/maven/suppressions.xml: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 23 | 24 | 25 | 26 | --------------------------------------------------------------------------------