├── .devcontainer ├── Dockerfile ├── devcontainer.json ├── library-scripts │ ├── azcli-debian.sh │ ├── confluent-debian.sh │ └── docker-debian.sh └── post-install.sh ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md ├── workflows │ ├── codeql.yml │ ├── mavenCI.yml │ └── release.yml └── working_agreement.md ├── .gitignore ├── CHANGELOG.md ├── CODE_OF_CONDUCT.MD ├── CONTRIBUTING.md ├── LICENSE.MD ├── README.md ├── SECURITY.md ├── checkstyle-suppressions.xml ├── doc ├── Confluent_Cloud_Setup.md ├── Confluent_Platform_Setup.md ├── CosmosDB_Setup.md ├── Developer_Walkthrough.md ├── Performance_Testing.md ├── README_Sink.md ├── README_Source.md └── images │ ├── SignUpConfluentCloud.png │ ├── Topics-Partitions.png │ ├── add-topic.png │ ├── attach-pr-to-issue.PNG │ ├── click-add-cluster.png │ ├── click-default.png │ ├── codespaces-forwarded-ports.png │ ├── codespaces-menu-button.png │ ├── codespaces-new-codespace.png │ ├── codespaces-open-with.png │ ├── converter-misconfigurations.png │ ├── cosmosdb-sink-records.png │ ├── delete-connector.png │ ├── delete-source-connector.png │ ├── env-configurations.png │ ├── environment-click.png │ ├── microsoft.png │ ├── name-topic.png │ ├── perf-sink-cosmos-chart.png │ ├── perf-sink-kafka-chart.png │ ├── perf-sink-test-layout.svg │ ├── perf-source-cosmos-chart.png │ ├── perf-source-kafka-chart.png │ ├── perf-source-msgs-per-sec-chart.png │ ├── perf-source-test-layout.svg │ ├── select-azure.png │ ├── select-enable-schema.png │ ├── select-ksqlDB.png │ ├── select-name-launch.png │ ├── select-schema-region.png │ ├── upload-connector-config.png │ ├── vscode-close-forwarded-ports.png │ ├── vscode-dev-container-progress.png │ └── vscode-remote-explorer.png ├── pom.xml └── src ├── docker ├── .env ├── .gitignore ├── Dockerfile ├── Dockerfile.sinkperf ├── docker-compose.yml ├── perf-payload │ └── sink │ │ ├── large.txt │ │ ├── medium.txt │ │ └── small.txt ├── resources │ ├── sink-uuid-smt.example.json │ ├── sink.example.json │ ├── sink.example.properties │ ├── source.example.json │ ├── source.example.properties │ └── standalone.example.properties ├── startup.ps1 └── startup.sh ├── main ├── java │ └── com │ │ └── azure │ │ └── cosmos │ │ └── kafka │ │ └── connect │ │ ├── ConnectorTestConfigurations.java │ │ ├── CosmosDBConfig.java │ │ ├── TopicContainerMap.java │ │ ├── implementations │ │ ├── CosmosClientStore.java │ │ └── CosmosKafkaSchedulers.java │ │ ├── sink │ │ ├── BulkWriter.java │ │ ├── CosmosDBSinkConfig.java │ │ ├── CosmosDBSinkConnector.java │ │ ├── CosmosDBSinkTask.java │ │ ├── CosmosDBWriteException.java │ │ ├── ExceptionsHelper.java │ │ ├── IWriter.java │ │ ├── PointWriter.java │ │ ├── SinkOperationContext.java │ │ ├── SinkOperationFailedResponse.java │ │ ├── SinkWriteResponse.java │ │ ├── SinkWriterBase.java │ │ ├── StructToJsonMap.java │ │ └── id │ │ │ └── strategy │ │ │ ├── AbstractIdStrategy.java │ │ │ ├── AbstractIdStrategyConfig.java │ │ │ ├── FullKeyStrategy.java │ │ │ ├── IdStrategy.java │ │ │ ├── KafkaMetadataStrategy.java │ │ │ ├── KafkaMetadataStrategyConfig.java │ │ │ ├── ProvidedInConfig.java │ │ │ ├── ProvidedInKeyStrategy.java │ │ │ ├── ProvidedInStrategy.java │ │ │ ├── ProvidedInValueStrategy.java │ │ │ ├── TemplateStrategy.java │ │ │ └── TemplateStrategyConfig.java │ │ └── source │ │ ├── CosmosDBSourceConfig.java │ │ ├── CosmosDBSourceConnector.java │ │ ├── CosmosDBSourceTask.java │ │ ├── JsonToStruct.java │ │ └── LinkedTransferQueue.class └── resources │ └── log4j.properties ├── perf ├── .gitignore ├── README.md ├── cluster │ ├── charts │ │ ├── connect │ │ │ ├── .helmignore │ │ │ ├── Chart.yaml │ │ │ ├── README.md │ │ │ ├── helm-config.yaml │ │ │ ├── templates │ │ │ │ ├── NOTES.txt │ │ │ │ ├── _helpers.tpl │ │ │ │ ├── deployment.yaml │ │ │ │ ├── jmx-configmap.yaml │ │ │ │ ├── secrets.yaml │ │ │ │ └── service.yaml │ │ │ └── values.yaml │ │ └── sink-perf │ │ │ ├── Chart.yaml │ │ │ ├── templates │ │ │ ├── _helpers.tpl │ │ │ └── deployment.yaml │ │ │ └── values.yaml │ └── manifests │ │ ├── cosmos-dashboard.json │ │ ├── kafka-client.yaml │ │ ├── kafka-helm-config.yaml │ │ └── single-sink-perf-client.yaml └── perf-driver.sh └── test ├── .gitignore ├── java └── com │ └── azure │ └── cosmos │ └── kafka │ └── connect │ ├── CosmosConfigTest.java │ ├── IntegrationTest.java │ ├── TopicContainerMapTest.java │ ├── sink │ ├── BulkWriterTests.java │ ├── CosmosDBSinkConfigTest.java │ ├── CosmosDBSinkConnectorConfigTest.java │ ├── CosmosDBSinkConnectorTest.java │ ├── CosmosDBSinkTaskTest.java │ ├── CosmosDBSinkTaskTestNotFails.java │ ├── PointWriterTest.java │ ├── StructToJsonMapTest.java │ ├── id │ │ └── strategy │ │ │ ├── ProvidedInStrategyTest.java │ │ │ └── TemplateStrategyTest.java │ └── integration │ │ └── SinkConnectorIT.java │ └── source │ ├── CosmosDBSourceConfigTest.java │ ├── CosmosDBSourceConnectorTest.java │ ├── CosmosDBSourceTaskTest.java │ └── integration │ └── SourceConnectorIT.java └── resources ├── log4j.properties └── mockito-extensions └── org.mockito.plugins.MockMaker /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG VARIANT="11" 2 | FROM mcr.microsoft.com/vscode/devcontainers/java:0-${VARIANT} 3 | 4 | ENV DEBIAN_FRONTEND=noninteractive 5 | 6 | ARG USERNAME="vscode" 7 | 8 | RUN curl https://packages.microsoft.com/config/debian/10/packages-microsoft-prod.deb --output /tmp/packages-microsoft-prod.deb && \ 9 | dpkg -i /tmp/packages-microsoft-prod.deb && \ 10 | apt-get update && \ 11 | apt-get -y install --no-install-recommends apt-utils dialog && \ 12 | apt-get -y install --no-install-recommends apt-transport-https ca-certificates software-properties-common libssl-dev libffi-dev \ 13 | build-essential gnupg-agent dnsutils httpie bash-completion curl wget git unzip jq lsb-release procps gnupg2 powershell && \ 14 | apt-get -y upgrade 15 | 16 | COPY .devcontainer/library-scripts /tmp/library-scripts/ 17 | 18 | # [Option] Install Maven 19 | ARG INSTALL_MAVEN="true" 20 | ARG MAVEN_VERSION="3.6.3" 21 | RUN if [ "${INSTALL_MAVEN}" = "true" ]; then su vscode -c "source /usr/local/sdkman/bin/sdkman-init.sh && sdk install maven \"${MAVEN_VERSION}\""; fi 22 | 23 | # [Option] Install Azure CLI 24 | ARG INSTALL_AZURE_CLI="true" 25 | RUN if [ "$INSTALL_AZURE_CLI" = "true" ]; then bash /tmp/library-scripts/azcli-debian.sh; fi 26 | 27 | # [Option] Install Confluent Platform and CLI 28 | ARG INSTALL_CONFLUENT="true" 29 | ARG CONFLUENT_VERSION="6.0" 30 | ARG CONFLUENT_CLI_VERSION="v1.16.0" 31 | ENV LOG_DIR=/home/$USERNAME/logs 32 | RUN if [ "$INSTALL_CONFLUENT" = "true" ]; then bash /tmp/library-scripts/confluent-debian.sh "${CONFLUENT_VERSION}" "${CONFLUENT_CLI_VERSION}"; fi 33 | 34 | # [Option] Install Docker CLI 35 | ARG INSTALL_DOCKER="true" 36 | RUN if [ "${INSTALL_DOCKER}" = "true" ]; then bash /tmp/library-scripts/docker-debian.sh "${USERNAME}"; fi 37 | 38 | USER $USERNAME 39 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "kafka-connect-cosmosdb", 3 | 4 | "build": { 5 | "dockerfile": "Dockerfile", 6 | "context": "..", 7 | "args": { 8 | "VARIANT": "11", 9 | "INSTALL_MAVEN": "true", 10 | "MAVEN_VERSION": "3.6.3", 11 | "INSTALL_AZURE_CLI": "true", 12 | "INSTALL_CONFLUENT": "true", 13 | "CONFLUENT_VERSION": "6.0", 14 | "CONFLUENT_CLI_VERSION": "v1.16.0", 15 | "INSTALL_DOCKER": "true" 16 | } 17 | }, 18 | 19 | "mounts": [ 20 | "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind" 21 | ], 22 | 23 | "settings": { 24 | "terminal.integrated.shell.linux": "/bin/bash", 25 | "java.home": "/docker-java-home" 26 | }, 27 | 28 | "remoteUser": "vscode", 29 | 30 | // Open ports for Confluent Control Center and Kafka UI Services 31 | "forwardPorts": [9000, 9001, 9004, 9021], 32 | 33 | // Install extensions 34 | "extensions": [ 35 | "vscjava.vscode-java-debug", 36 | "vscjava.vscode-java-dependency", 37 | "vscjava.vscode-java-pack", 38 | "vscjava.vscode-java-test", 39 | "redhat.java", 40 | "redhat.vscode-xml", 41 | "eamodio.gitlens", 42 | "davidanson.vscode-markdownlint", 43 | "gruntfuggly.todo-tree", 44 | "ms-azuretools.vscode-docker", 45 | "gabrielbb.vscode-lombok", 46 | "sonarsource.sonarlint-vscode", 47 | "visualstudioexptteam.vscodeintellicode", 48 | "vscjava.vscode-maven", 49 | "ms-vscode.PowerShell" 50 | ], 51 | 52 | // Run Bash script in .devcontainer directory 53 | "postCreateCommand": "/bin/bash ./.devcontainer/post-install.sh" 54 | } 55 | -------------------------------------------------------------------------------- /.devcontainer/library-scripts/azcli-debian.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | #------------------------------------------------------------------------------------------------------------- 3 | # Copyright (c) Microsoft Corporation. All rights reserved. 4 | # Licensed under the MIT License. See https://go.microsoft.com/fwlink/?linkid=2090316 for license information. 5 | #------------------------------------------------------------------------------------------------------------- 6 | # 7 | # Docs: https://github.com/microsoft/vscode-dev-containers/blob/master/script-library/docs/azcli.md 8 | # 9 | # Syntax: ./azcli-debian.sh 10 | 11 | set -e 12 | 13 | if [ "$(id -u)" -ne 0 ]; then 14 | echo -e 'Script must be run as root. Use sudo, su, or add "USER root" to your Dockerfile before running this script.' 15 | exit 1 16 | fi 17 | 18 | export DEBIAN_FRONTEND=noninteractive 19 | 20 | # Install curl, apt-transport-https, lsb-release, or gpg if missing 21 | if ! dpkg -s apt-transport-https curl ca-certificates lsb-release > /dev/null 2>&1 || ! type gpg > /dev/null 2>&1; then 22 | if [ ! -d "/var/lib/apt/lists" ] || [ "$(ls /var/lib/apt/lists/ | wc -l)" = "0" ]; then 23 | apt-get update 24 | fi 25 | apt-get -y install --no-install-recommends apt-transport-https curl ca-certificates lsb-release gnupg2 26 | fi 27 | 28 | # Install the Azure CLI 29 | echo "deb [arch=amd64] https://packages.microsoft.com/repos/azure-cli/ $(lsb_release -cs) main" > /etc/apt/sources.list.d/azure-cli.list 30 | curl -sL https://packages.microsoft.com/keys/microsoft.asc | (OUT=$(apt-key add - 2>&1) || echo $OUT) 31 | apt-get update 32 | apt-get install -y azure-cli 33 | echo "Done!" 34 | -------------------------------------------------------------------------------- /.devcontainer/library-scripts/confluent-debian.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | #------------------------------------------------------------------------------------------------------------- 3 | # Copyright (c) Microsoft Corporation. All rights reserved. 4 | # Licensed under the MIT License. See https://go.microsoft.com/fwlink/?linkid=2090316 for license information. 5 | #------------------------------------------------------------------------------------------------------------- 6 | # 7 | # Docs: https://docs.confluent.io/platform/current/installation/installing_cp/deb-ubuntu.html 8 | # 9 | # Syntax: ./confluent-debian.sh [confluent platform version to install] [confluent CLI version to install] 10 | 11 | CONFLUENT_VERSION=${1:-"6.0"} 12 | CONFLUENT_CLI_VERSION=${2:-"v1.16.0"} 13 | 14 | set -e 15 | 16 | if [ "$(id -u)" -ne 0 ]; then 17 | echo -e 'Script must be run as root. Use sudo, su, or add "USER root" to your Dockerfile before running this script.' 18 | exit 1 19 | fi 20 | 21 | export DEBIAN_FRONTEND=noninteractive 22 | 23 | # Install curl, apt-transport-https, or gpg if missing 24 | if ! dpkg -s apt-transport-https curl ca-certificates > /dev/null 2>&1 || ! type gpg > /dev/null 2>&1; then 25 | if [ ! -d "/var/lib/apt/lists" ] || [ "$(ls /var/lib/apt/lists/ | wc -l)" = "0" ]; then 26 | apt-get update 27 | fi 28 | apt-get -y install --no-install-recommends apt-transport-https curl ca-certificates gnupg2 29 | fi 30 | 31 | # Install Confluent Platform 32 | echo "deb [arch=amd64] https://packages.confluent.io/deb/${CONFLUENT_VERSION} stable main" > /etc/apt/sources.list.d/confluent.list 33 | curl -sL https://packages.confluent.io/deb/${CONFLUENT_VERSION}/archive.key | (OUT=$(apt-key add - 2>&1) || echo $OUT) 34 | apt-get update 35 | apt-get install -y confluent-platform 36 | 37 | # Install the Confluent CLI 38 | wget -O confluent-cli-install.sh https://cnfl.io/cli 39 | sh confluent-cli-install.sh $CONFLUENT_CLI_VERSION 40 | rm confluent-cli-install.sh 41 | 42 | # Set Log directory for Confluent 43 | export LOG_DIR=/var/log/kafka 44 | 45 | echo "Done!" 46 | -------------------------------------------------------------------------------- /.devcontainer/library-scripts/docker-debian.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | #------------------------------------------------------------------------------------------------------------- 3 | # Copyright (c) Microsoft Corporation. All rights reserved. 4 | # Licensed under the MIT License. See https://go.microsoft.com/fwlink/?linkid=2090316 for license information. 5 | #------------------------------------------------------------------------------------------------------------- 6 | # 7 | # To use the Docker CLI inside a container, use the bind mount option to add the docker socket to the container. 8 | # Example: "docker run -v '/var/run/docker.sock:/var/run/docker.sock' ..." 9 | 10 | USERNAME=${1:-"vscode"} 11 | 12 | set -e 13 | 14 | if [ "$(id -u)" -ne 0 ]; then 15 | echo -e 'Script must be run as root. Use sudo, su, or add "USER root" to your Dockerfile before running this script.' 16 | exit 1 17 | fi 18 | 19 | # Determine the appropriate non-root user 20 | if [ "${USERNAME}" = "auto" ] || [ "${USERNAME}" = "automatic" ]; then 21 | USERNAME="" 22 | POSSIBLE_USERS=("vscode" "codespace" "$(awk -v val=1000 -F ":" '$3==val{print $1}' /etc/passwd)") 23 | for CURRENT_USER in ${POSSIBLE_USERS[@]}; do 24 | if id -u ${CURRENT_USER} > /dev/null 2>&1; then 25 | USERNAME=${CURRENT_USER} 26 | break 27 | fi 28 | done 29 | if [ "${USERNAME}" = "" ]; then 30 | USERNAME=root 31 | fi 32 | elif [ "${USERNAME}" = "none" ] || ! id -u ${USERNAME} > /dev/null 2>&1; then 33 | USERNAME=root 34 | fi 35 | 36 | if [ "$(id -u)" -ne 0 ]; then 37 | echo -e 'Script must be run as root. Use sudo, su, or add "USER root" to your Dockerfile before running this script.' 38 | exit 1 39 | fi 40 | 41 | # Install Docker CLI 42 | curl -fsSL https://download.docker.com/linux/$(lsb_release -is | tr '[:upper:]' '[:lower:]')/gpg | (OUT=$(apt-key add - 2>&1) || echo $OUT) 43 | echo "deb [arch=amd64] https://download.docker.com/linux/$(lsb_release -is | tr '[:upper:]' '[:lower:]') $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list 44 | apt-get update 45 | apt-get -y install --no-install-recommends docker-ce-cli 46 | 47 | # Install Docker Compose 48 | LATEST_COMPOSE_VERSION=$(curl -sSL "https://api.github.com/repos/docker/compose/releases/latest" | grep -o -P '(?<="tag_name": ").+(?=")') 49 | curl -sSL "https://github.com/docker/compose/releases/download/${LATEST_COMPOSE_VERSION}/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose 50 | chmod +x /usr/local/bin/docker-compose 51 | 52 | groupadd -g 800 docker 53 | if [ "${USERNAME}" != "root" ]; then usermod -aG docker $USERNAME; fi 54 | 55 | echo "Done!" 56 | -------------------------------------------------------------------------------- /.devcontainer/post-install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Workaround to get docker working as nonroot inside a devcontainer on a local machine 4 | # NOTE: The main docker-debian.sh script https://github.com/microsoft/vscode-dev-containers/blob/master/script-library/docker-debian.sh 5 | # did not work out of the box for setting up docker as nonroot 6 | sudo chmod 660 /var/run/docker.sock 7 | sudo chgrp docker /var/run/docker.sock 8 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Kafka Connect Cosmos DB CODEOWNERS 3 | # ============================================================================== 4 | 5 | # Order is important; the last matching pattern takes the most precedence. 6 | # More info on how this file works can be found at: 7 | # https://help.github.com/articles/about-codeowners/ 8 | 9 | # This file is called CODEOWNERS because it is a magic file for GitHub to 10 | # automatically suggest reviewers. In this project's case, the names below 11 | # should be thought of as code reviewers rather than owners. Regular 12 | # contributors are free to add their names to specific directories or files 13 | # provided that they are willing to provide a review when automatically 14 | # assigned. 15 | 16 | # Absence from this list should not be interpreted as a discouragement to 17 | # review a pull request. Peer review is always welcome and is a critical 18 | # component of the progress of the codebase. Information on peer review 19 | # guidelines can be found in the CONTRIBUTING.md doc. 20 | 21 | 22 | # Maintainers 23 | * @microsoft/kafka-connect-cosmosdb-devs 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: For reporting an issue in code or documentation for improvement 4 | title: '' 5 | labels: bug 6 | assignees: '@microsoft/kafka-connect-cosmosdb-devs' 7 | 8 | --- 9 | 10 | ## Description 11 | 12 | - A clear and concise description of what the bug is 13 | 14 | ## Expected Behavior 15 | 16 | - A clear and concise description of what you expected to happen. 17 | 18 | ## Reproduce 19 | 20 | 1. Go to '...' 21 | 2. Click on '....' 22 | 3. Scroll down to '....' 23 | 4. See error 24 | 25 | ## Additional Context 26 | 27 | - _(If applicable: Add any other context about the problem here; for example: proposed solution, doc changes, screenshots, logs, links, etc)_ 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **## Problem Statement** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **## Proposed Solution** 14 | A clear and concise description of what you want to happen. 15 | 16 | **## Alternative Solutions** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **## Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | 22 | ## Next Steps 23 | 24 | - [ ] Team consensus to proceed 25 | - [ ] Schedule Design Session 26 | - [ ] Complete Design Review 27 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Type of PR 2 | 3 | - [ ] Documentation changes 4 | - [ ] Code changes 5 | - [ ] Test changes 6 | - [ ] CI-CD changes 7 | - [ ] GitHub Template changes 8 | 9 | ## Purpose of PR 10 | 11 | ## Observability + Testing 12 | - What changes or considerations did you make in relation to observability? 13 | 14 | - Did you add testing to account for any new or changed work? 15 | 16 | 17 | ## Review notes 18 | 19 | ## Issues Closed or Referenced 20 | 21 | - Closes # (this will automatically close the issue when the PR closes) 22 | - References # (this references the issue but does not close with PR) 23 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ "main" ] 17 | pull_request: 18 | branches: [ "main" ] 19 | schedule: 20 | - cron: '35 20 * * 3' 21 | 22 | jobs: 23 | analyze: 24 | name: Analyze 25 | # Runner size impacts CodeQL analysis time. To learn more, please see: 26 | # - https://gh.io/recommended-hardware-resources-for-running-codeql 27 | # - https://gh.io/supported-runners-and-hardware-resources 28 | # - https://gh.io/using-larger-runners 29 | # Consider using larger runners for possible analysis time improvements. 30 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} 31 | timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} 32 | permissions: 33 | # required for all workflows 34 | security-events: write 35 | 36 | # only required for workflows in private repositories 37 | actions: read 38 | contents: read 39 | 40 | strategy: 41 | fail-fast: false 42 | matrix: 43 | language: [ 'java-kotlin' ] 44 | # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] 45 | # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both 46 | # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both 47 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 48 | 49 | steps: 50 | - name: Checkout repository 51 | uses: actions/checkout@v4 52 | 53 | # Initializes the CodeQL tools for scanning. 54 | - name: Initialize CodeQL 55 | uses: github/codeql-action/init@v3 56 | with: 57 | languages: ${{ matrix.language }} 58 | # If you wish to specify custom queries, you can do so here or in a config file. 59 | # By default, queries listed here will override any specified in a config file. 60 | # Prefix the list here with "+" to use these queries and those in the config file. 61 | 62 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 63 | # queries: security-extended,security-and-quality 64 | 65 | 66 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). 67 | # If this step fails, then you should remove it and run the build manually (see below) 68 | - name: Autobuild 69 | uses: github/codeql-action/autobuild@v3 70 | 71 | # ℹ️ Command-line programs to run using the OS shell. 72 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 73 | 74 | # If the Autobuild fails above, remove it and uncomment the following three lines. 75 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 76 | 77 | # - run: | 78 | # echo "Run, Build Application using script" 79 | # ./location_of_script_within_repo/buildscript.sh 80 | 81 | - name: Perform CodeQL Analysis 82 | uses: github/codeql-action/analyze@v3 83 | with: 84 | category: "/language:${{matrix.language}}" 85 | -------------------------------------------------------------------------------- /.github/workflows/mavenCI.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a Java project with Maven 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven 3 | 4 | name: Java CI with Maven 5 | 6 | on: 7 | push: 8 | branches: [main, dev] 9 | pull_request: 10 | branches: [main, dev] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Checkout code 19 | uses: actions/checkout@v3 20 | - name: Set up JDK 11 21 | uses: actions/setup-java@v3 22 | with: 23 | distribution: 'zulu' 24 | java-version: 11 25 | - name: Build with Maven 26 | run: mvn -B package --file pom.xml 27 | - name: Run Unit Tests 28 | run: mvn clean test 29 | - name: Run Integration Test 30 | shell: pwsh 31 | run: ./src/docker/startup.ps1 32 | if: github.ref == 'refs/heads/main' 33 | - name: Upload JaCoCo coverage report 34 | uses: actions/upload-artifact@v4 35 | with: 36 | name: jacoco-report 37 | path: target/site/jacoco/ 38 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | tags: 6 | - v* 7 | 8 | name: Build and create release 9 | permissions: 10 | actions: write 11 | checks: write 12 | contents: write 13 | deployments: write 14 | id-token: write 15 | issues: write 16 | discussions: write 17 | packages: write 18 | pages: write 19 | pull-requests: write 20 | repository-projects: write 21 | security-events: write 22 | statuses: write 23 | 24 | jobs: 25 | build: 26 | name: Build and upload release binary 27 | if: ${{ startsWith(github.ref, 'refs/tags/v')}} 28 | runs-on: ubuntu-latest 29 | steps: 30 | - name: Checkout code 31 | uses: actions/checkout@v3 32 | - name: Set up JDK 11 33 | uses: actions/setup-java@v3 34 | with: 35 | distribution: 'zulu' 36 | java-version: 11 37 | - name: Get java-version 38 | id: get-version 39 | run: | 40 | BUILD_VERSION=$( mvn help:evaluate -Dexpression=project.version -q -DforceStdout ) 41 | echo "::set-output name=VERSION::$BUILD_VERSION" 42 | - name: Build with Maven 43 | run: mvn -B package --file pom.xml 44 | - name: Run Unit Tests 45 | run: mvn clean test 46 | - name: Run Integration Test 47 | shell: pwsh 48 | run: ./src/docker/startup.ps1 49 | - name: Create Release 50 | id: create_release 51 | uses: actions/create-release@v1 52 | env: 53 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 54 | with: 55 | tag_name: ${{ github.ref }} 56 | release_name: Release ${{ github.ref }} 57 | draft: false 58 | prerelease: false 59 | - name: Upload Release Asset 1 60 | id: upload-release-asset-1 61 | uses: actions/upload-release-asset@v1 62 | env: 63 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 64 | with: 65 | upload_url: ${{ steps.create_release.outputs.upload_url }} 66 | asset_path: ./target/kafka-connect-cosmos-${{steps.get-version.outputs.VERSION}}-jar-with-dependencies.jar 67 | asset_name: kafka-connect-cosmos-${{steps.get-version.outputs.VERSION}}-jar-with-dependencies.jar 68 | asset_content_type: application/java-archive 69 | - name: Upload Release Asset 2 70 | id: upload-release-asset-2 71 | uses: actions/upload-release-asset@v1 72 | env: 73 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 74 | with: 75 | upload_url: ${{ steps.create_release.outputs.upload_url }} 76 | asset_path: ./target/components/packages/microsoftcorporation-kafka-connect-cosmos-${{steps.get-version.outputs.VERSION}}.zip 77 | asset_name: microsoftcorporation-kafka-connect-cosmos-${{steps.get-version.outputs.VERSION}}.zip 78 | asset_content_type: application/zip 79 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | target/ 3 | *.iml 4 | 5 | .vscode/ 6 | 7 | .classpath 8 | .factorypath 9 | .project 10 | .settings/ -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.MD: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing Code 2 | We highly value contributions made from the community and we thank you for stopping by to learn more about how to contribute to this connector. 3 | 4 | ### Hard Requirements 5 | - Please include a test for any code you submit via PR. 6 | 7 | ### The Basics 8 | - Create your branch from dev 9 | - Open your PR against the dev branch 10 | - Once you open your PR, it will automatically be assigned to the [correct review group](https://github.com/orgs/microsoft/teams/kafka-connect-cosmosdb-devs) 11 | - When we are ready to cut a release, we will merge dev into main. 12 | 13 | ### Issues 14 | - We do not require an issue for submitted PRs unless the fix is for a bug. If it's for a bug, please [create a bug report](https://github.com/microsoft/kafka-connect-cosmosdb/issues/new?assignees=&labels=Bug&template=bug_report.md&title=) and attach it to your PR. 15 | - If your PR solves an existing issue, please make sure you note this in your PR so that the work is attached to the issue. At the end of the PR template, you will see a place to attach your PR to an issue: 16 | 17 | ![Attach PR to issues](doc/images/attach-pr-to-issue.PNG "Attach PR to existing issue") 18 | 19 | 20 | ### Contributor License Agreement 21 | 22 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 23 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 24 | the rights to use your contribution. For details, visit . 25 | 26 | When you submit a pull request, a CLA-bot will automatically determine whether you need to provide 27 | a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions 28 | provided by the bot. You will only need to do this once across all repos using our CLA. 29 | 30 | ### Microsoft Code of Conduct 31 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 32 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 33 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 34 | -------------------------------------------------------------------------------- /LICENSE.MD: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /checkstyle-suppressions.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /doc/Confluent_Cloud_Setup.md: -------------------------------------------------------------------------------- 1 | # Confluent Cloud Setup 2 | 3 | This guide walks through setting up Confluent Cloud using Docker containers. 4 | 5 | ## Prerequisites 6 | 7 | - Bash shell 8 | - Will not work in Cloud Shell or WSL1 9 | - Java 11+ ([download](https://www.oracle.com/java/technologies/javase-jdk11-downloads.html)) 10 | - Maven ([download](https://maven.apache.org/download.cgi)) 11 | - Docker ([download](https://www.docker.com/products/docker-desktop)) 12 | - CosmosDB [Setting up an Azure Cosmos DB Instance](CosmosDB_Setup.md) 13 | 14 | ## Setup 15 | 16 | ### Create Confluent Cloud Account and Setup Cluster 17 | Go to [create account](https://www.confluent.io/get-started/) and fill out the appropriate fields. 18 | 19 | ![SignupConfluentCloud](./images/SignUpConfluentCloud.png) 20 | 21 | --- 22 | 23 | Select environments. 24 | 25 | ![EnvironmentClick](./images/environment-click.png) 26 | 27 | --- 28 | 29 | Select default which is an environment automatically setup by confluent. 30 | 31 | ![DefaultClick](./images/click-default.png) 32 | 33 | --- 34 | 35 | - Select add cluster. 36 | 37 | ![Add Cluster](./images/click-add-cluster.png) 38 | 39 | --- 40 | 41 | - Select Azure create the cluster and choose the same region as the Cosmos DB instance you created. 42 | 43 | ![Select Azure](./images/select-azure.png) 44 | 45 | --- 46 | 47 | - Name the cluster, and then select launch cluster. 48 | 49 | ![Name and Launch](./images/select-name-launch.png) 50 | 51 | 52 | ### Create ksqlDB Cluster 53 | From inside the cluster select ksqlDB. Select add cluster. Select continue, name the cluster, and then select launch. 54 | 55 | ![ksqlDB](./images/select-ksqlDB.png) 56 | 57 | ### Create Schema Registry 58 | From inside the confluent environment in the right panel select enable schema governance. 59 | 60 | ![select schema](./images/select-enable-schema.png) 61 | 62 | --- 63 | 64 | Then select the region for azure and select enable. 65 | 66 | ![ksqlDB](./images/select-schema-region.png) 67 | 68 | ### Update Configurations 69 | Update the sink and source connectors configuration files located in `src/test/resources` by filling out the values for `connect.cosmos.connection.endpoint` and `connect.cosmos.master.key`, which you should have saved from the [Cosmos DB setup guide](CosmosDB_Setup.md). 70 | 71 | ```bash 72 | cd kafka-connect-cosmosdb 73 | 74 | # copy the template connector configs into src/test/resources 75 | # you will need to update the configs for the Cosmos values as mentioned above 76 | 77 | cp src/docker/resources/sink.example.json src/test/resources/sink.config.json 78 | cp src/docker/resources/source.example.json src/test/resources/source.config.json 79 | ``` 80 | Update the `src/main/java/com.azure.cosmos.kafka.connect/ConnectorTestConfigurations` file with the following configurations. 81 | - The cluster key and secret can be found under api keys in the cluster. Choose the one for ksqlDB. 82 | - The `BOOTSTRAP_SERVERS` endpoint can be found in the cluster under cluster settings and end endpoints. 83 | - The schema registry key and secret can be found on the bottom of the right panel inside the confluent environment under credentials. 84 | - The schema registry url can be found on the bottom of the right panel inside the confluent environment under Endpoint. 85 | 86 | Update the `src/docker/.env` file with the same configurations as above. Make sure to follow the following format for the `SASL_JAAS_CONFIG` and the `SCHEMA_REGISTRY_BASIC_AUTH_USER_INFO`. 87 | 88 | ![env-configuration](./images/env-configurations.png) 89 | ### Start Confluent Platform Docker 90 | > May take several minutes to run in order to download docker images for the Confluent platform components. 91 | 92 | ```bash 93 | cd $REPO_ROOT/src/docker/ 94 | 95 | ./startup.sh 96 | 97 | # verify the services are up and running 98 | docker-compose ps 99 | ``` 100 | ### Create Topic in Confluent Cloud UI 101 | Any topic used in the integration tests will need to be created in Confluent Cloud or through the [Confluent Cli](https://docs.confluent.io/cloud/current/client-apps/topics/manage.html#:~:text=Confluent%20CLI%20Follow%20these%20steps%20to%20create%20a,aren%E2%80%99t%20any%20topics%20created%20yet%2C%20click%20Create%20topic.) (Requires installing the Confluent Cli first). 102 | This will show how to create one through Confluent Cloud UI. 103 | 104 | Inside the Cluster Overview, scroll down and select topics and partitions. 105 | 106 | ![topic-partition](./images/Topics-Partitions.png) 107 | 108 | --- 109 | 110 | Select add topic. 111 | 112 | ![add-topic](./images/add-topic.png) 113 | 114 | --- 115 | 116 | Name the topic and select create with defaults. Afterward, a prompt will appear about creating a schema. This can be 117 | skipped as the tests will create the schemas. 118 | 119 | ![name-topic](./images/name-topic.png) 120 | 121 | ### Running Integration Tests 122 | 123 | Navigate to root project directory and execute the integration tests. Run startup.sh to rebuild the jar for the connector as necessary. 124 | 125 | ```bash 126 | # run the integration tests 127 | mvn clean test-compile failsafe:integration-test 128 | ``` 129 | 130 | ### Cleanup 131 | 132 | Tear down the Confluent Platform setup and cleanup any unnecessary resources 133 | 134 | ```bash 135 | 136 | cd $REPO_ROOT/src/docker 137 | 138 | # bring down all docker containers 139 | docker-compose down 140 | 141 | # remove dangling volumes and networks 142 | docker system prune -f --volumes --filter "label=io.confluent.docker" 143 | 144 | ``` 145 | 146 | 147 | ## Common Errors 148 | 149 | ### Topic not found 150 | `org.apache.kafka.common.errors.TimeoutException: Topic sink-test-json-schema not present in metadata after 2000 ms.` 151 | - Create the topic used in the corresponding test in Confluent Cloud 152 | 153 | ## Resources to Improve Infrastructure 154 | - [Docker Configurations](https://docs.confluent.io/platform/current/installation/docker/config-reference.html) 155 | - [Configuration Options](https://docs.confluent.io/platform/current/installation/configuration/index.html) 156 | - [Connect Confluent Platform Components to Confluent Cloud](https://docs.confluent.io/cloud/current/cp-component/index.html) 157 | -------------------------------------------------------------------------------- /doc/Confluent_Platform_Setup.md: -------------------------------------------------------------------------------- 1 | # Confluent Platform Setup 2 | 3 | This guide walks through setting up Confluent Platform using Docker containers. 4 | 5 | ## Prerequisites 6 | 7 | - Bash shell (tested on Github Codespaces, Mac, Ubuntu, Windows with WSL2) 8 | - Will not work in Cloud Shell or WSL1 9 | - Java 11+ ([download](https://www.oracle.com/java/technologies/javase-jdk11-downloads.html)) 10 | - Maven ([download](https://maven.apache.org/download.cgi)) 11 | - Docker ([download](https://www.docker.com/products/docker-desktop)) 12 | - Powershell (optional) ([download](https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell)) 13 | 14 | ## Setup 15 | 16 | > [Github Codespaces](https://github.com/features/codespaces) is the easiest way to evaluate the Cosmos DB Kafka Connectors as all of the prerequisites are automatically installed. Similarly, you can also use Visual Studio Code Dev Containers to setup locally. 17 | > 18 | > Follow the setup steps in the [developer setup](./Developer_Walkthrough.md) to setup Codespaces and/or Dev Containers. 19 | 20 | ### Initialize repo 21 | 22 | Clone the Kafka Connect Cosmos DB repo 23 | 24 | ```bash 25 | 26 | ### skip this step if using Codespaces or Dev Containers 27 | 28 | git clone https://github.com/microsoft/kafka-connect-cosmosdb.git 29 | 30 | cd kafka-connect-cosmosdb 31 | export REPO_ROOT=$(pwd) 32 | 33 | ``` 34 | 35 | ### Startup 36 | 37 | Start up the docker containers for Confluent Platform using `docker-compose` 38 | 39 | If you're using codespaces or dev containers, either option will work. Otherwise, use the script best suited to your shell environment. 40 | 41 | > NOTE: If you're using dev containers, you will need to stop ALL of the forwarded ports before you start this step. This is to prevent Visual Studio Code from occupying the ports and allowing the new docker containers to use them instead. 42 | > 43 | > You can do this from the `Remote Explorer` menu as shown below or you can open up the Command Palette (`F1` key) and search for `Stop Forwarding Port`. 44 | 45 | ![Close Forwarded Ports](./images/vscode-close-forwarded-ports.png "Close Forwarded Ports") 46 | 47 | > Running either script for the first time may take several minutes to run in order to download docker images for the Confluent platform components. 48 | 49 | ```bash 50 | 51 | cd $REPO_ROOT/src/docker 52 | 53 | # Option 1: Use the bash script to setup 54 | ./startup.sh 55 | 56 | # Option 2: Use the powershell script to setup 57 | pwsh startup.ps1 58 | 59 | # verify the services are up and running 60 | docker-compose ps 61 | 62 | ``` 63 | 64 | > If you are **not** using Codespaces and the containers fail to come up, you may need to increase the memory allocation for Docker to 3 GB or more. 65 | > 66 | > Rerun the startup script to reinitialize the docker containers. 67 | 68 | Your Confluent Platform setup is now ready to use! 69 | 70 | ### Running Kafka Connect standalone mode 71 | 72 | The Kafka Connect container that is included with the Confluent Platform setup runs as Kafka connect as `distributed mode`. Using Kafka Connect as `distributed mode` is *recommended* since you can interact with connectors using the Control Center UI. 73 | 74 | If you instead would like to run Kafka Connect as `standalone mode`, which is useful for quick testing, continue through this section. For more information on Kafka Conenct standalone and distributed modes, refer to these [Confluent docs](https://docs.confluent.io/home/connect/userguide.html#standalone-vs-distributed-mode). 75 | 76 | > NOTE: This step will only work if you're using Codespaces or Dev Containers. 77 | > 78 | > You will also need to fill out the values for `connect.cosmos.connection.endpoint` and `connect.cosmos.master.key` in the `sink.properties` and/or `source.properties` files, which you should have saved from the [Cosmos DB setup guide](./CosmosDB_Setup.md) 79 | 80 | ```bash 81 | 82 | ### skip this step if using Kafka Connect as distributed mode (recommended) 83 | 84 | cd $REPO_ROOT/src/docker/resources 85 | 86 | # create copies of template files 87 | # you will need to update the configs for the Cosmos values as mentioned above 88 | 89 | cp source.example.properties source.properties 90 | cp sink.example.properties sink.properties 91 | cp standalone.example.properties standalone.properties 92 | 93 | # Setup a Cosmos source connector 94 | connect-standalone standalone.properties source.properties 95 | 96 | # Setup a Cosmos sink connector 97 | connect-standalone standalone.properties sink.properties 98 | 99 | ``` 100 | 101 | ### Access Confluent Platform components 102 | 103 | All of the Confluent Platform services should now be accessible on `localhost`. You can also access the web interfaces for some services as shown below. 104 | 105 | > If you're using Codespaces, you need to go through the forwarded ports to view the following webpages. Navigate to the 'Forwarded Ports' section in the 'Remote Explorer' extension to access these forwarded ports. 106 | 107 | ![Access forwarded ports](./images/codespaces-forwarded-ports.png "Access forwarded ports") 108 | 109 | > Alternatively, `localhost` addresses will automatically redirect from within the Codespaces instance. For more information on accessing forwarded ports in Codespaces, refer to these [docs](https://docs.github.com/en/free-pro-team@latest/github/developing-online-with-codespaces/developing-in-a-codespace#forwarding-ports). 110 | 111 | | Name | Address | Description | 112 | | --- | --- | --- | 113 | | Control Center | | The main webpage for all Confluent services where you can create topics, configure connectors, interact with the Connect cluster (only for distributed mode) and more. | 114 | | Kafka Topics UI | | Useful to viewing Kafka topics and the messages within them. | 115 | | Schema Registry UI | | Can view and create new schemas, ideal for interacting with Avro data. | 116 | | ZooNavigator | | Web interface for Zookeeper. Refer to the [docs](https://zoonavigator.elkozmon.com/en/stable/) for more information. | 117 | 118 | ### Cleanup 119 | 120 | Tear down the Confluent Platform setup and cleanup any unneeded resources 121 | 122 | ```bash 123 | 124 | cd $REPO_ROOT/src/docker 125 | 126 | # bring down all docker containers 127 | docker-compose down 128 | 129 | # remove dangling volumes and networks 130 | docker system prune -f --volumes --filter "label=io.confluent.docker" 131 | 132 | ``` 133 | -------------------------------------------------------------------------------- /doc/CosmosDB_Setup.md: -------------------------------------------------------------------------------- 1 | # Setting up an Azure Cosmos DB Instance 2 | 3 | ## Prerequisites 4 | 5 | - Azure subscription with permissions to create: 6 | - Resource Groups, Cosmos DB 7 | - Bash shell (tested on Visual Studio Codespaces, Cloud Shell, Mac, Ubuntu, Windows with WSL2) 8 | - Will not work with WSL1 9 | - Azure CLI ([download](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest)) 10 | 11 | ## Create Azure Cosmos DB Instance, Database and Container 12 | 13 | Login to Azure and select subscription. 14 | 15 | ```bash 16 | 17 | az login 18 | 19 | # show your Azure accounts 20 | az account list -o table 21 | 22 | # select the Azure subscription if necessary 23 | az account set -s {subscription name or Id} 24 | 25 | ``` 26 | 27 | Create a new Azure Resource Group for this quickstart, then add to it a Cosmos DB Account, Database and Container using the Azure CLI. 28 | 29 | > The `az cosmosdb sql` extension is currently in preview and is subject to change 30 | 31 | ```bash 32 | 33 | # replace with a unique name 34 | # do not use punctuation or uppercase (a-z, 0-9) 35 | export Cosmos_Name={your Cosmos DB name} 36 | 37 | ## if true, change name to avoid DNS failure on create 38 | az cosmosdb check-name-exists -n ${Cosmos_Name} 39 | 40 | # set environment variables 41 | export Cosmos_Location="centralus" 42 | export Cosmos_Database="kafkaconnect" 43 | export Cosmos_Container="kafka" 44 | 45 | # Resource Group Name 46 | export Cosmos_RG=${Cosmos_Name}-rg-cosmos 47 | 48 | # create a new resource group 49 | az group create -n $Cosmos_RG -l $Cosmos_Location 50 | 51 | # create the Cosmos DB server 52 | # this command takes several minutes to run 53 | az cosmosdb create -g $Cosmos_RG -n $Cosmos_Name 54 | 55 | # create the database 56 | # 400 is the minimum --throughput (RUs) 57 | az cosmosdb sql database create -a $Cosmos_Name -n $Cosmos_Database -g $Cosmos_RG --throughput 400 58 | 59 | # create the container 60 | # /id is the partition key (case sensitive) 61 | az cosmosdb sql container create -p /id -g $Cosmos_RG -a $Cosmos_Name -d $Cosmos_Database -n $Cosmos_Container 62 | 63 | # OPTIONAL: Enable Time to Live (TTL) on the container 64 | export Cosmos_Container_TTL=1000 65 | az cosmosdb sql container update -g $Cosmos_RG -a $Cosmos_Name -d $Cosmos_Database -n $Cosmos_Container --ttl=$Cosmos_Container_TTL 66 | 67 | ``` 68 | 69 | With the Azure Cosmos DB instance setup, you will need to get the Cosmos DB endpoint URI and primary connection key. These values will be used to setup the Cosmos DB Source and Sink connectors. 70 | 71 | ```bash 72 | 73 | # Keep note of both of the following values as they will be used later 74 | 75 | # get Cosmos DB endpoint URI 76 | echo https://${Cosmos_Name}.documents.azure.com:443/ 77 | 78 | # get Cosmos DB primary connection key 79 | az cosmosdb keys list -n $Cosmos_Name -g $Cosmos_RG --query primaryMasterKey -o tsv 80 | 81 | ``` 82 | 83 | ### Cleanup 84 | 85 | Remove the Cosmos DB instance and the associated resource group 86 | 87 | ```bash 88 | 89 | # delete Cosmos DB instance 90 | az cosmosdb delete -g $Cosmos_RG -n $Cosmos_Name 91 | 92 | # delete Cosmos DB resource group 93 | az group delete --no-wait -y -n $Cosmos_RG 94 | 95 | ``` 96 | -------------------------------------------------------------------------------- /doc/Developer_Walkthrough.md: -------------------------------------------------------------------------------- 1 | # Developer Walkthrough and Project Setup 2 | 3 | ## Setup 4 | 5 | Github Codespaces is the easiest way to evaluate this repository as all of the prerequisites are automatically installed. If you want to setup locally instead of through Codespaces, using a Dev Container with Visual Studio Code is recommended as the Java based prerequisites are automatically installed. 6 | 7 | ### Github Codespaces 8 | 9 | - Open the Github repo in a web browser 10 | - [kafka-connect-cosmosdb](https://github.com/microsoft/kafka-connect-cosmosdb) 11 | - Click on the `Code` button 12 | 13 | ![Open with codespaces](./images/codespaces-open-with.png "Open with Codespaces") 14 | 15 | - Click on `Open with Codespaces` 16 | - Click on the `New codespace` button 17 | 18 | ![New codespace](./images/codespaces-new-codespace.png "New codespace") 19 | 20 | - Codespaces will display a Creation Log window 21 | - You may close this window once all the steps have completed 22 | - Open a terminal in Codespaces 23 | - Use the command palette or press ctl + ` 24 | - You can also click on the Menu button in the upper left corner 25 | - choose view 26 | - choose terminal 27 | 28 | ![Codespaces Menu Button](./images/codespaces-menu-button.png "Codespaces Menu Button") 29 | 30 | To manage your codespaces, visit the [Github Codespaces page](https://github.com/codespaces). 31 | 32 | ### Dev Container using Visual Studio Code 33 | 34 | #### Prerequisites (Dev Container setup) 35 | 36 | Ensure you have the following prerequisites installed. 37 | 38 | - Docker ([download](https://www.docker.com/products/docker-desktop)) 39 | - Visual Studio Code ([download](https://code.visualstudio.com/download)) 40 | - VS Code Remote Development Extension Pack ([download](https://aka.ms/vscode-remote/download/extension)) 41 | 42 | #### Dev Container Installation 43 | 44 | Setup a Dev Container for the [kafka-connect-cosmosdb](https://github.com/microsoft/kafka-connect-cosmosdb) repo by cloning it in a Docker container. 45 | 46 | - In VS Code, open the `Remote Explorer` menu from the left sidebar and click on the `Clone Repository in Container Volume` option as shown below. Alternatively, you can open up the Command Palette (`F1` key) and search for `Remote Containers: Clone Repository` 47 | 48 | ![Open Remote Explorer](./images/vscode-remote-explorer.png "Open Remote Explorer") 49 | 50 | - Enter the URL for the kafka-connect-cosmosdb repo 51 | - 52 | - Select the `Create a new volume` option. 53 | - Use the default values for the volume name and target folder name. If the default values are not pre-filled, use `vs-remote-container` for the volume name and `kafka-connect-cosmosdb` for the target folder name. 54 | - The VS Code window will reload and start building the dev container, which can take several minutes. You only have to build a dev container the first time you open it; opening the folder after the first successful build will be much quicker. 55 | 56 | ![Dev Container Progress](./images/vscode-dev-container-progress.png "Dev Container Progress") 57 | 58 | You can manage your dev containers from the `Remote Explorer` menu from the left sidebar of Visual Studio Code (as shown previously in the first step of installation). 59 | 60 | For more information on working with Dev Containers in Visual Studio Code, refer to the [documentation](https://code.visualstudio.com/docs/remote/containers). 61 | 62 | ### Manual Setup 63 | 64 | #### Prerequisites (manual setup) 65 | 66 | Ensure you have the following prerequisites installed. 67 | 68 | - Bash shell (tested on Github Codespaces, Mac, Ubuntu, Windows with WSL2) 69 | - Will not work in Cloud Shell or WSL1 70 | - Docker ([download](https://www.docker.com/products/docker-desktop)) 71 | - Git 72 | - Java 11+ ([download](https://www.oracle.com/java/technologies/javase-jdk11-downloads.html)) 73 | - Maven ([download](https://maven.apache.org/download.cgi)) 74 | 75 | #### Manual Installation 76 | 77 | Clone the Github repo microsoft/kafka-connect-cosmosdb GitHub repository. 78 | 79 | ```bash 80 | 81 | git clone https://github.com/microsoft/kafka-connect-cosmosdb.git 82 | 83 | ``` 84 | 85 | - Configure IDE to use a JDK by one of the following way. 86 | - Setting the value of the `JAVA_HOME` [system environment variable](https://docs.oracle.com/cd/E19182-01/821-0917/inst_jdk_javahome_t/index.html) to the install location of the JDK, for example, `C:\Program Files\Java\jdk-13.0.2`. 87 | - if you want to configure only VS Code to use the JDK, use the `java.home` setting in [VS Code's User or Workspace settings](https://code.visualstudio.com/docs/getstarted/settings) 88 | - Download maven dependencies. 89 | 90 | ## Testing 91 | 92 | ### Unit Tests 93 | 94 | Navigate to root project directory and execute unit tests 95 | 96 | ```bash 97 | 98 | cd kafka-connect-cosmosdb 99 | mvn clean test 100 | 101 | ``` 102 | 103 | ### Integration Tests 104 | 105 | To run the integration tests, you will need to first setup the following resources: 106 | 107 | - [Confluent Platform](Confluent_Platform_Setup.md) 108 | - [Setting up an Azure Cosmos DB Instance](CosmosDB_Setup.md) 109 | 110 | Update the sink and source connectors configuration files located in `src/test/resources` by filling out the values for `connect.cosmos.connection.endpoint` and `connect.cosmos.master.key`, which you should have saved from the [Cosmos DB setup guide](CosmosDB_Setup.md). 111 | 112 | Navigate to root project directory and execute the integration tests 113 | 114 | ```bash 115 | 116 | cd kafka-connect-cosmosdb 117 | 118 | # copy the template connector configs into src/test/resources 119 | # you will need to update the configs for the Cosmos values as mentioned above 120 | 121 | cp src/docker/resources/sink.example.json src/test/resources/sink.config.json 122 | cp src/docker/resources/source.example.json src/test/resources/source.config.json 123 | 124 | # run the integration tests 125 | mvn clean test-compile failsafe:integration-test 126 | 127 | ``` 128 | 129 | ## Configure Confluent Platform, Cosmos DB and validate Kafka Connectors 130 | 131 | - [Confluent Platform Setup](Confluent_Platform_Setup.md) 132 | - [Setting up an Azure Cosmos DB Instance](CosmosDB_Setup.md) 133 | - [Kafka Connect Cosmos DB Sink Connector](README_Sink.md) 134 | - [Kafka Connect Cosmos DB Source Connector](README_Source.md) 135 | -------------------------------------------------------------------------------- /doc/images/SignUpConfluentCloud.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/SignUpConfluentCloud.png -------------------------------------------------------------------------------- /doc/images/Topics-Partitions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/Topics-Partitions.png -------------------------------------------------------------------------------- /doc/images/add-topic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/add-topic.png -------------------------------------------------------------------------------- /doc/images/attach-pr-to-issue.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/attach-pr-to-issue.PNG -------------------------------------------------------------------------------- /doc/images/click-add-cluster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/click-add-cluster.png -------------------------------------------------------------------------------- /doc/images/click-default.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/click-default.png -------------------------------------------------------------------------------- /doc/images/codespaces-forwarded-ports.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/codespaces-forwarded-ports.png -------------------------------------------------------------------------------- /doc/images/codespaces-menu-button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/codespaces-menu-button.png -------------------------------------------------------------------------------- /doc/images/codespaces-new-codespace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/codespaces-new-codespace.png -------------------------------------------------------------------------------- /doc/images/codespaces-open-with.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/codespaces-open-with.png -------------------------------------------------------------------------------- /doc/images/converter-misconfigurations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/converter-misconfigurations.png -------------------------------------------------------------------------------- /doc/images/cosmosdb-sink-records.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/cosmosdb-sink-records.png -------------------------------------------------------------------------------- /doc/images/delete-connector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/delete-connector.png -------------------------------------------------------------------------------- /doc/images/delete-source-connector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/delete-source-connector.png -------------------------------------------------------------------------------- /doc/images/env-configurations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/env-configurations.png -------------------------------------------------------------------------------- /doc/images/environment-click.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/environment-click.png -------------------------------------------------------------------------------- /doc/images/microsoft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/microsoft.png -------------------------------------------------------------------------------- /doc/images/name-topic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/name-topic.png -------------------------------------------------------------------------------- /doc/images/perf-sink-cosmos-chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/perf-sink-cosmos-chart.png -------------------------------------------------------------------------------- /doc/images/perf-sink-kafka-chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/perf-sink-kafka-chart.png -------------------------------------------------------------------------------- /doc/images/perf-source-cosmos-chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/perf-source-cosmos-chart.png -------------------------------------------------------------------------------- /doc/images/perf-source-kafka-chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/perf-source-kafka-chart.png -------------------------------------------------------------------------------- /doc/images/perf-source-msgs-per-sec-chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/perf-source-msgs-per-sec-chart.png -------------------------------------------------------------------------------- /doc/images/select-azure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/select-azure.png -------------------------------------------------------------------------------- /doc/images/select-enable-schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/select-enable-schema.png -------------------------------------------------------------------------------- /doc/images/select-ksqlDB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/select-ksqlDB.png -------------------------------------------------------------------------------- /doc/images/select-name-launch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/select-name-launch.png -------------------------------------------------------------------------------- /doc/images/select-schema-region.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/select-schema-region.png -------------------------------------------------------------------------------- /doc/images/upload-connector-config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/upload-connector-config.png -------------------------------------------------------------------------------- /doc/images/vscode-close-forwarded-ports.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/vscode-close-forwarded-ports.png -------------------------------------------------------------------------------- /doc/images/vscode-dev-container-progress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/vscode-dev-container-progress.png -------------------------------------------------------------------------------- /doc/images/vscode-remote-explorer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/kafka-connect-cosmosdb/06fff0172d5dcade1a995abaf9db3acc85864783/doc/images/vscode-remote-explorer.png -------------------------------------------------------------------------------- /src/docker/.env: -------------------------------------------------------------------------------- 1 | BOOTSTRAP_SERVERS= 2 | SASL_JAAS_CONFIG=org.apache.kafka.common.security.plain.PlainLoginModule required username="ClusterKey" password="ClusterSecret"; 3 | SCHEMA_REGISTRY_URL= 4 | BASIC_AUTH_CREDENTIALS_SOURCE=USER_INFO 5 | SCHEMA_REGISTRY_BASIC_AUTH_USER_INFO=SchemaRegistryKey:SchemaRegistrySecret -------------------------------------------------------------------------------- /src/docker/.gitignore: -------------------------------------------------------------------------------- 1 | connectors/ 2 | log.txt 3 | 4 | # Exclude all temporary files in resources 5 | !resources/*example 6 | resources/sink.properties 7 | resources/source.properties 8 | resources/standalone.properties 9 | -------------------------------------------------------------------------------- /src/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build the Cosmos DB Connectors on top of the Kafka Connect image 2 | FROM confluentinc/cp-kafka-connect:6.0.0 3 | 4 | # Install datagen connector 5 | RUN confluent-hub install --no-prompt confluentinc/kafka-connect-datagen:latest 6 | 7 | COPY connectors/ /etc/kafka-connect/jars 8 | -------------------------------------------------------------------------------- /src/docker/Dockerfile.sinkperf: -------------------------------------------------------------------------------- 1 | # Use the Kafka client image as base 2 | FROM confluentinc/cp-kafka:6.0.1 3 | 4 | COPY perf-payload/sink/ /etc/payload 5 | 6 | ENTRYPOINT [ "kafka-producer-perf-test" ] 7 | -------------------------------------------------------------------------------- /src/docker/perf-payload/sink/small.txt: -------------------------------------------------------------------------------- 1 | {"HotelName": "Marriott", "Description": "Marriott description"} 2 | {"HotelName": "HolidayInn", "Description": "HolidayInn description"} 3 | {"HotelName": "Motel8", "Description": "Motel8 description"} 4 | {"HotelName": "Ramada", "Description": "Ramada description"} 5 | {"HotelName": "Hyatt", "Description": "Hyatt description"} 6 | {"HotelName": "Hilton", "Description": "Hilton description"} 7 | {"HotelName": "ComfortInn", "Description": "ComfortInn description"} -------------------------------------------------------------------------------- /src/docker/resources/sink-uuid-smt.example.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cosmosdb-sink-connector", 3 | "config": { 4 | "connector.class": "com.azure.cosmos.kafka.connect.sink.CosmosDBSinkConnector", 5 | "tasks.max": "1", 6 | "topics": "sink-test", 7 | "value.converter": "org.apache.kafka.connect.json.JsonConverter", 8 | "value.converter.schemas.enable": "false", 9 | "key.converter": "org.apache.kafka.connect.json.JsonConverter", 10 | "key.converter.schemas.enable": "false", 11 | "connect.cosmosdb.connection.endpoint": "https://.documents.azure.com:443/", 12 | "connect.cosmosdb.master.key": "", 13 | "connect.cosmosdb.databasename": "kafkaconnect", 14 | "connect.cosmosdb.containers.topicmap": "sink-test#kafka", 15 | "transforms": "insertID", 16 | "transforms.insertID.type": "com.github.cjmatta.kafka.connect.smt.InsertUuid$Value", 17 | "transforms.insertID.uuid.field.name": "id" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/docker/resources/sink.example.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cosmosdb-sink-connector", 3 | "config": { 4 | "connector.class": "com.azure.cosmos.kafka.connect.sink.CosmosDBSinkConnector", 5 | "tasks.max": "1", 6 | "topics": "sink-test", 7 | "value.converter": "org.apache.kafka.connect.json.JsonConverter", 8 | "value.converter.schemas.enable": "false", 9 | "key.converter": "org.apache.kafka.connect.json.JsonConverter", 10 | "key.converter.schemas.enable": "false", 11 | "connect.cosmos.connection.endpoint": "https://.documents.azure.com:443/", 12 | "connect.cosmos.master.key": "", 13 | "connect.cosmos.databasename": "kafkaconnect", 14 | "connect.cosmos.containers.topicmap": "sink-test#kafka" 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/docker/resources/sink.example.properties: -------------------------------------------------------------------------------- 1 | name=cosmosdb-sink-connector 2 | connector.class=com.azure.cosmos.kafka.connect.sink.CosmosDBSinkConnector 3 | tasks.max=1 4 | format=json 5 | topics=connect-test 6 | connect.cosmos.connection.endpoint=https://.documents.azure.com:443/ 7 | connect.cosmos.master.key= 8 | connect.cosmos.databasename=kafkaconnect 9 | connect.cosmos.containers.topicmap=connect-test\#kafka 10 | value.converter=org.apache.kafka.connect.json.JsonConverter 11 | value.converter.schemas.enable=false 12 | key.converter=org.apache.kafka.connect.json.JsonConverter 13 | key.converter.schemas.enable=false 14 | -------------------------------------------------------------------------------- /src/docker/resources/source.example.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cosmosdb-source-connector", 3 | "config": { 4 | "connector.class": "com.azure.cosmos.kafka.connect.source.CosmosDBSourceConnector", 5 | "tasks.max": "1", 6 | "value.converter": "org.apache.kafka.connect.json.JsonConverter", 7 | "value.converter.schemas.enable": "false", 8 | "key.converter": "org.apache.kafka.connect.json.JsonConverter", 9 | "key.converter.schemas.enable": "false", 10 | "connect.cosmos.task.poll.interval": "1000", 11 | "connect.cosmos.offset.useLatest": false, 12 | "connect.cosmos.connection.endpoint": "https://.documents.azure.com:443/", 13 | "connect.cosmos.master.key": "", 14 | "connect.cosmos.databasename": "kafkaconnect", 15 | "connect.cosmos.containers.topicmap": "source-test#kafka" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/docker/resources/source.example.properties: -------------------------------------------------------------------------------- 1 | name=cosmosdb-source-connector 2 | connector.class=com.azure.cosmos.kafka.connect.source.CosmosDBSourceConnector 3 | tasks.max=1 4 | format=json 5 | connect.cosmos.task.poll.interval=1000 6 | connect.cosmos.offset.useLatest=false 7 | connect.cosmos.connection.endpoint=https://.documents.azure.com:443/ 8 | connect.cosmos.master.key= 9 | connect.cosmos.databasename=kafkaconnect 10 | connect.cosmos.containers.topicmap=apparels\#kafka 11 | value.converter=org.apache.kafka.connect.json.JsonConverter 12 | value.converter.schemas.enable=true 13 | key.converter=org.apache.kafka.connect.json.JsonConverter 14 | key.converter.schemas.enable=false 15 | -------------------------------------------------------------------------------- /src/docker/resources/standalone.example.properties: -------------------------------------------------------------------------------- 1 | # These are defaults. This file just demonstrates how to override some settings. 2 | bootstrap.servers=localhost:9092 3 | 4 | # The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will 5 | # need to configure these based on the format they want their data in when loaded from or stored into Kafka 6 | key.converter=org.apache.kafka.connect.storage.StringConverter 7 | value.converter=org.apache.kafka.connect.storage.StringConverter 8 | # Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply 9 | # it to 10 | key.converter.schemas.enable=false 11 | value.converter.schemas.enable=false 12 | 13 | # The internal converter used for offsets and config data is configurable and must be specified, but most users will 14 | # always want to use the built-in default. Offset and config data is never visible outside of Copcyat in this format. 15 | internal.key.converter=org.apache.kafka.connect.json.JsonConverter 16 | internal.value.converter=org.apache.kafka.connect.json.JsonConverter 17 | internal.key.converter.schemas.enable=false 18 | internal.value.converter.schemas.enable=false 19 | 20 | offset.storage.file.filename=/tmp/connect.offsets 21 | # Flush much faster than normal, which is useful for testing/debugging 22 | offset.flush.interval.ms=10000 23 | 24 | plugin.path=/usr/share/java,/etc/kafka-connect/jars,/home/vscode/workspace/kafka-connect-cosmosdb/src/docker/connectors 25 | rest.port=9898 26 | -------------------------------------------------------------------------------- /src/docker/startup.ps1: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env pwsh 2 | $ErrorActionPreference='Stop' 3 | cd $PSScriptRoot 4 | Write-Host "Shutting down Docker Compose orchestration..." 5 | docker compose down 6 | 7 | Write-Host "Deleting prior Cosmos DB connectors..." 8 | rm -rf "$PSScriptRoot/connectors" 9 | New-Item -Path "$PSScriptRoot" -ItemType "directory" -Name "connectors" -Force | Out-Null 10 | cd $PSScriptRoot/../.. 11 | 12 | Write-Host "Rebuilding Cosmos DB connectors..." 13 | mvn clean package -DskipTests=true 14 | copy target\*-jar-with-dependencies.jar $PSScriptRoot/connectors 15 | cd $PSScriptRoot 16 | 17 | Write-Host "Adding custom Insert UUID SMT" 18 | cd $PSScriptRoot/connectors 19 | git clone https://github.com/confluentinc/kafka-connect-insert-uuid.git insertuuid -q && cd insertuuid 20 | mvn clean package -DskipTests=true 21 | copy target\*.jar $PSScriptRoot/connectors 22 | rm -rf "$PSScriptRoot/connectors/insertuuid" 23 | cd $PSScriptRoot 24 | 25 | Write-Host "Building Cosmos DB Kafka Connect Docker image" 26 | docker build . -t cosmosdb-kafka-connect:latest 27 | 28 | Write-Host "Starting Docker Compose..." 29 | docker compose up -d 30 | -------------------------------------------------------------------------------- /src/docker/startup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "Shutting down Docker Compose orchestration..." 3 | docker compose down 4 | 5 | echo "Deleting prior Cosmos DB connectors..." 6 | rm -rf connectors 7 | mkdir connectors 8 | cd ../../ 9 | 10 | echo "Rebuilding Cosmos DB connectors..." 11 | mvn clean package -DskipTests=true 12 | cp target/*-jar-with-dependencies.jar src/docker/connectors 13 | cd src/docker 14 | 15 | echo "Adding custom Insert UUID SMT" 16 | cd connectors 17 | git clone https://github.com/confluentinc/kafka-connect-insert-uuid.git insertuuid -q && cd insertuuid 18 | mvn clean package -DskipTests=true 19 | cp target/*.jar ../ 20 | cd .. && rm -rf insertuuid 21 | cd ../ 22 | 23 | echo "Building Cosmos DB Kafka Connect Docker image" 24 | docker build . -t cosmosdb-kafka-connect:latest 25 | 26 | echo "Starting Docker Compose..." 27 | docker compose up -d 28 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/ConnectorTestConfigurations.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect; 5 | 6 | public final class ConnectorTestConfigurations { 7 | 8 | private static final String KAFKA_CLUSTER_KEY = ""; 9 | private static final String KAFKA_CLUSTER_SECRET = ""; 10 | private static final String SCHEMA_REGISTRY_KEY = ""; 11 | 12 | private static final String SCHEMA_REGISTRY_SECRET = ""; 13 | public static final String SCHEMA_REGISTRY_URL = ""; 14 | public static final String BOOTSTRAP_SERVER = ""; 15 | public static final String SASL_JAAS = String.format("org.apache.kafka.common.security.plain.PlainLoginModule required username='%s' password='%s';", KAFKA_CLUSTER_KEY, KAFKA_CLUSTER_SECRET); 16 | public static final String BASIC_AUTH_USER_INFO = SCHEMA_REGISTRY_KEY + ":" + SCHEMA_REGISTRY_SECRET; 17 | 18 | 19 | 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/TopicContainerMap.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect; 5 | 6 | import org.apache.commons.collections4.BidiMap; 7 | import org.apache.commons.collections4.bidimap.DualHashBidiMap; 8 | import org.apache.commons.lang3.StringUtils; 9 | 10 | import java.util.Arrays; 11 | import java.util.ArrayList; 12 | import java.util.Collection; 13 | import java.util.List; 14 | import java.util.Objects; 15 | import java.util.Optional; 16 | import java.util.stream.Collectors; 17 | import java.util.stream.Stream; 18 | 19 | /** 20 | * Maps Kafka topics to CosmosDB Containers 21 | */ 22 | public class TopicContainerMap { 23 | 24 | 25 | private final BidiMap map; 26 | 27 | private TopicContainerMap(BidiMap map) { 28 | this.map = map; 29 | } 30 | 31 | public static TopicContainerMap deserialize(String input) { 32 | if (StringUtils.isEmpty(input)) { 33 | return TopicContainerMap.empty(); 34 | } 35 | 36 | if (StringUtils.contains(input, '#')) { // There's at least one pair 37 | String[] items = StringUtils.split(input, ','); 38 | Stream keyValuePairs = Arrays.stream(items).map(item -> { 39 | String[] pair = StringUtils.split(item, '#'); 40 | pair[0] = StringUtils.trimToNull(pair[0]); 41 | pair[1] = StringUtils.trimToNull(pair[1]); 42 | return pair; 43 | }); 44 | 45 | BidiMap map = new DualHashBidiMap<>(); 46 | keyValuePairs.forEach(pair -> map.put(pair[0], pair[1])); 47 | return new TopicContainerMap(map); 48 | } else { 49 | throw new IllegalArgumentException("Invalid topic container map."); 50 | } 51 | } 52 | 53 | /** 54 | * Creates an empty map of topic containers 55 | * 56 | * @return Returns a map of Topics assigned to containers. 57 | */ 58 | public static TopicContainerMap empty() { 59 | return new TopicContainerMap(new DualHashBidiMap<>()); 60 | } 61 | 62 | public String serialize() { 63 | return map.entrySet().stream() 64 | .map(entry -> entry.getKey() + "#" + entry.getValue()) 65 | .collect(Collectors.joining(",")); 66 | } 67 | 68 | /** 69 | * Adds topic names, generating default container names 70 | * 71 | * @param topicNames Collection of topic names 72 | */ 73 | public void addTopics(Collection topicNames) { 74 | Objects.requireNonNull(topicNames); 75 | for (String topicName : topicNames) { 76 | map.putIfAbsent(topicName, topicName); 77 | } 78 | } 79 | 80 | public Optional getContainerForTopic(String topicName) { 81 | return Optional.ofNullable(map.get(topicName)); 82 | } 83 | 84 | public Optional getTopicForContainer(String containerName) { 85 | return Optional.ofNullable(map.inverseBidiMap().get(containerName)); 86 | } 87 | 88 | public List getContainerList() { 89 | return new ArrayList(map.values()); 90 | } 91 | 92 | } 93 | 94 | 95 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/implementations/CosmosClientStore.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.implementations; 5 | 6 | import com.azure.cosmos.ConsistencyLevel; 7 | import com.azure.cosmos.CosmosAsyncClient; 8 | import com.azure.cosmos.CosmosClientBuilder; 9 | import com.azure.cosmos.kafka.connect.source.CosmosDBSourceConfig; 10 | import org.apache.commons.lang3.StringUtils; 11 | 12 | import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkArgument; 13 | 14 | public class CosmosClientStore { 15 | public static CosmosAsyncClient getCosmosClient(CosmosDBSourceConfig config, String userAgentSuffix) { 16 | checkArgument(StringUtils.isNotEmpty(userAgentSuffix), "Argument 'userAgentSuffix' can not be null"); 17 | 18 | CosmosClientBuilder cosmosClientBuilder = new CosmosClientBuilder() 19 | .endpoint(config.getConnEndpoint()) 20 | .key(config.getConnKey()) 21 | .consistencyLevel(ConsistencyLevel.SESSION) 22 | .contentResponseOnWriteEnabled(true) 23 | .connectionSharingAcrossClientsEnabled(config.isConnectionSharingEnabled()) 24 | .userAgentSuffix(userAgentSuffix); 25 | 26 | if (config.isGatewayModeEnabled()) { 27 | cosmosClientBuilder.gatewayMode(); 28 | } 29 | 30 | return cosmosClientBuilder.buildAsyncClient(); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/implementations/CosmosKafkaSchedulers.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.implementations; 5 | 6 | import reactor.core.scheduler.Scheduler; 7 | import reactor.core.scheduler.Schedulers; 8 | 9 | public class CosmosKafkaSchedulers { 10 | private static final String COSMOS_KAFKA_CFP_THREAD_NAME = "cosmos-kafka-cfp-bounded-elastic"; 11 | private static final int TTL_FOR_SCHEDULER_WORKER_IN_SECONDS = 60; // same as BoundedElasticScheduler.DEFAULT_TTL_SECONDS 12 | 13 | // Custom bounded elastic scheduler for kafka connector 14 | public static final Scheduler COSMOS_KAFKA_CFP_BOUNDED_ELASTIC = Schedulers.newBoundedElastic( 15 | Schedulers.DEFAULT_BOUNDED_ELASTIC_SIZE, 16 | Schedulers.DEFAULT_BOUNDED_ELASTIC_QUEUESIZE, 17 | COSMOS_KAFKA_CFP_THREAD_NAME, 18 | TTL_FOR_SCHEDULER_WORKER_IN_SECONDS, 19 | true 20 | ); 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/CosmosDBSinkConfig.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink; 5 | 6 | import com.azure.cosmos.kafka.connect.sink.id.strategy.AbstractIdStrategyConfig; 7 | import com.azure.cosmos.kafka.connect.sink.id.strategy.IdStrategy; 8 | import com.azure.cosmos.kafka.connect.sink.id.strategy.ProvidedInValueStrategy; 9 | import org.apache.kafka.common.config.ConfigDef; 10 | 11 | import java.util.Map; 12 | 13 | import com.azure.cosmos.kafka.connect.CosmosDBConfig; 14 | import org.apache.kafka.common.config.ConfigDef.Importance; 15 | import org.apache.kafka.common.config.ConfigDef.Type; 16 | import org.apache.kafka.common.config.ConfigDef.Width; 17 | import org.apache.kafka.common.config.ConfigException; 18 | 19 | /** 20 | * Contains settings for the Kafka ComsosDB Sink Connector 21 | */ 22 | 23 | @SuppressWarnings ({"squid:S1854", "squid:S2160"}) // suppress unneeded int *groupOrder variables, equals method 24 | public class CosmosDBSinkConfig extends CosmosDBConfig { 25 | public static final String ID_STRATEGY_CONFIG = AbstractIdStrategyConfig.ID_STRATEGY; 26 | public static final Class ID_STRATEGY_DEFAULT = ProvidedInValueStrategy.class; 27 | public static final String ID_STRATEGY_DOC = 28 | "A strategy used to populate the document with an ``id``. Valid strategies are: " 29 | + "``TemplateStrategy``, ``FullKeyStrategy``, ``KafkaMetadataStrategy``, " 30 | + "``ProvidedInKeyStrategy``, ``ProvidedInValueStrategy``. For each strategy, the full " 31 | + "name of the strategy must be specified, e.g. " 32 | + "com.azure.cosmos.kafka.connect.sink.id.strategy.TemplateStrategy. Configuration " 33 | + "properties prefixed with``id.strategy`` are passed through to the strategy. For " 34 | + "example, when using" 35 | + "``id.strategy=com.azure.cosmos.kafka.connect.sink.id.strategy.TemplateStrategy`` , " 36 | + "the property ``id.strategy.template`` is passed through to the template strategy " 37 | + "and used to specify the template string to be used in constructing the ``id``."; 38 | public static final String TEMPLATE_CONFIG_DISPLAY = "ID Strategy"; 39 | 40 | private IdStrategy idStrategy; 41 | 42 | public CosmosDBSinkConfig(ConfigDef config, Map parsedConfig) { 43 | super(config, parsedConfig); 44 | 45 | this.idStrategy = createIdStrategy(); 46 | } 47 | 48 | public CosmosDBSinkConfig(Map parsedConfig) { 49 | this(getConfig(), parsedConfig); 50 | } 51 | 52 | public static ConfigDef getConfig() { 53 | ConfigDef result = CosmosDBConfig.getConfig(); 54 | 55 | final String groupName = "ID Strategy Parameters"; 56 | int groupOrder = 0; 57 | 58 | result 59 | .define( 60 | ID_STRATEGY_CONFIG, 61 | Type.CLASS, 62 | ID_STRATEGY_DEFAULT, 63 | Importance.HIGH, 64 | ID_STRATEGY_DOC, 65 | groupName, 66 | groupOrder++, 67 | Width.MEDIUM, 68 | TEMPLATE_CONFIG_DISPLAY 69 | ); 70 | 71 | return result; 72 | } 73 | 74 | private IdStrategy createIdStrategy() { 75 | IdStrategy idStrategy; 76 | try { 77 | idStrategy = (IdStrategy) getClass(ID_STRATEGY_CONFIG).getConstructor().newInstance(); 78 | } catch (Exception e) { 79 | throw new ConfigException("Could not instantiate IdStrategy", e); 80 | } 81 | idStrategy.configure(this.originalsWithPrefix(AbstractIdStrategyConfig.PREFIX)); 82 | return idStrategy; 83 | } 84 | 85 | public IdStrategy idStrategy() { 86 | return idStrategy; 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/CosmosDBSinkConnector.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink; 5 | 6 | import static com.azure.cosmos.kafka.connect.CosmosDBConfig.validateConnection; 7 | import static com.azure.cosmos.kafka.connect.CosmosDBConfig.validateTopicMap; 8 | 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | import java.util.Map; 12 | import java.util.function.Function; 13 | import java.util.stream.Collectors; 14 | 15 | import org.apache.kafka.common.config.Config; 16 | import org.apache.kafka.common.config.ConfigDef; 17 | import org.apache.kafka.common.config.ConfigValue; 18 | import org.apache.kafka.connect.connector.Task; 19 | import org.apache.kafka.connect.sink.SinkConnector; 20 | import org.slf4j.Logger; 21 | import org.slf4j.LoggerFactory; 22 | 23 | /** 24 | * A Sink connector that publishes topic messages to CosmosDB. 25 | */ 26 | public class CosmosDBSinkConnector extends SinkConnector { 27 | private static final Logger logger = LoggerFactory.getLogger(CosmosDBSinkConnector.class); 28 | private Map configProps; 29 | 30 | @Override 31 | public void start(Map props) { 32 | logger.debug("Starting CosmosDB sink connector."); 33 | configProps = props; 34 | } 35 | 36 | @Override 37 | public Class taskClass() { 38 | return CosmosDBSinkTask.class; 39 | } 40 | 41 | @Override 42 | public List> taskConfigs(int maxTasks) { 43 | logger.info("Setting task configurations for {} workers.", maxTasks); 44 | final List> configs = new ArrayList<>(maxTasks); 45 | for (int i = 0; i < maxTasks; ++i) { 46 | configs.add(configProps); 47 | } 48 | return configs; 49 | } 50 | 51 | @Override 52 | public void stop() { 53 | logger.debug("Stopping CosmosDB Sink Connector."); 54 | } 55 | 56 | @Override 57 | public ConfigDef config() { 58 | return CosmosDBSinkConfig.getConfig(); 59 | } 60 | 61 | @Override 62 | public String version() { 63 | return this.getClass().getPackage().getImplementationVersion(); 64 | } 65 | 66 | @Override 67 | public Config validate(Map connectorConfigs) { 68 | Config config = super.validate(connectorConfigs); 69 | if (config.configValues().stream().anyMatch(cv -> !cv.errorMessages().isEmpty())) { 70 | return config; 71 | } 72 | 73 | Map configValues = config.configValues().stream().collect( 74 | Collectors.toMap(ConfigValue::name, Function.identity())); 75 | 76 | validateConnection(connectorConfigs, configValues); 77 | validateTopicMap(connectorConfigs, configValues); 78 | 79 | return config; 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/CosmosDBWriteException.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink; 5 | 6 | import org.apache.kafka.connect.errors.ConnectException; 7 | 8 | /** 9 | * Exception thrown when an attempt to write a message to CosmosDB has failed. 10 | */ 11 | public class CosmosDBWriteException extends ConnectException { 12 | /** 13 | * 14 | */ 15 | private static final long serialVersionUID = 1L; 16 | 17 | public CosmosDBWriteException(String message) { 18 | super(message); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/ExceptionsHelper.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink; 5 | 6 | import com.azure.cosmos.CosmosException; 7 | import com.azure.cosmos.implementation.HttpConstants; 8 | 9 | public class ExceptionsHelper { 10 | public static boolean isTransientFailure(int statusCode, int substatusCode) { 11 | return statusCode == HttpConstants.StatusCodes.GONE 12 | || statusCode == HttpConstants.StatusCodes.SERVICE_UNAVAILABLE 13 | || statusCode == HttpConstants.StatusCodes.INTERNAL_SERVER_ERROR 14 | || statusCode == HttpConstants.StatusCodes.REQUEST_TIMEOUT 15 | || (statusCode == HttpConstants.StatusCodes.NOTFOUND && substatusCode == HttpConstants.SubStatusCodes.READ_SESSION_NOT_AVAILABLE); 16 | 17 | } 18 | 19 | public static boolean isTransientFailure(Exception e) { 20 | if (e instanceof CosmosException) { 21 | return isTransientFailure(((CosmosException) e).getStatusCode(), ((CosmosException) e).getSubStatusCode()); 22 | } 23 | 24 | return false; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/IWriter.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink; 5 | 6 | import org.apache.kafka.connect.sink.SinkRecord; 7 | 8 | import java.util.List; 9 | 10 | public interface IWriter { 11 | SinkWriteResponse write(List sinkRecords); 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/PointWriter.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink; 5 | 6 | import com.azure.cosmos.CosmosContainer; 7 | import com.azure.cosmos.CosmosException; 8 | import org.apache.kafka.connect.sink.SinkRecord; 9 | 10 | import java.util.List; 11 | 12 | import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; 13 | 14 | public class PointWriter extends SinkWriterBase { 15 | private final CosmosContainer container; 16 | 17 | public PointWriter(CosmosContainer container, int maxRetryCount) { 18 | super(maxRetryCount); 19 | 20 | checkNotNull(container, "Argument 'container' can not be null"); 21 | this.container = container; 22 | } 23 | 24 | @Override 25 | protected SinkWriteResponse writeCore(List sinkRecords) { 26 | checkNotNull(sinkRecords, "Argument 'sinkRecords' should not be null"); 27 | SinkWriteResponse sinkWriteResponse = new SinkWriteResponse(); 28 | 29 | for (SinkRecord sinkRecord : sinkRecords) { 30 | try { 31 | container.upsertItem(sinkRecord.value()); 32 | sinkWriteResponse.getSucceededRecords().add(sinkRecord); 33 | } catch (CosmosException cosmosException) { 34 | // Generally we would want to retry for the transient exceptions, and fail-fast for non-transient exceptions 35 | // Putting the non-transient exceptions at the front of the list 36 | // so later when deciding retry behavior, only examining the first exception will be enough 37 | if (ExceptionsHelper.isTransientFailure(cosmosException)) { 38 | sinkWriteResponse 39 | .getFailedRecordResponses() 40 | .add(new SinkOperationFailedResponse(sinkRecord, cosmosException)); 41 | } else { 42 | sinkWriteResponse 43 | .getFailedRecordResponses() 44 | .add(0, new SinkOperationFailedResponse(sinkRecord, cosmosException)); 45 | } 46 | 47 | } 48 | } 49 | 50 | return sinkWriteResponse; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/SinkOperationContext.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink; 5 | 6 | import org.apache.kafka.connect.sink.SinkRecord; 7 | 8 | public class SinkOperationContext { 9 | private final SinkRecord sinkRecord; 10 | 11 | public SinkOperationContext(SinkRecord sinkRecord) { 12 | this.sinkRecord = sinkRecord; 13 | } 14 | 15 | public SinkRecord getSinkRecord() { 16 | return this.sinkRecord; 17 | } 18 | 19 | public long getKafkaOffset() { 20 | return this.sinkRecord.kafkaOffset(); 21 | } 22 | 23 | public Integer getKafkaPartition() { 24 | return this.sinkRecord.kafkaPartition(); 25 | } 26 | 27 | public String getTopic() { 28 | return this.sinkRecord.topic(); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/SinkOperationFailedResponse.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink; 5 | 6 | import com.azure.cosmos.CosmosException; 7 | import org.apache.kafka.connect.sink.SinkRecord; 8 | 9 | public class SinkOperationFailedResponse { 10 | private final SinkRecord sinkRecord; 11 | private final Exception exception; 12 | 13 | public SinkOperationFailedResponse(SinkRecord sinkRecord, CosmosException cosmosException) { 14 | this.sinkRecord = sinkRecord; 15 | this.exception = cosmosException; 16 | } 17 | 18 | public SinkRecord getSinkRecord() { 19 | return this.sinkRecord; 20 | } 21 | 22 | public Exception getException() { 23 | return this.exception; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/SinkWriteResponse.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink; 5 | 6 | import org.apache.kafka.connect.sink.SinkRecord; 7 | 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | 11 | public class SinkWriteResponse { 12 | private final List succeededRecords; 13 | private List failedRecordResponses; 14 | 15 | public SinkWriteResponse() { 16 | succeededRecords = new ArrayList<>(); 17 | failedRecordResponses = new ArrayList<>(); 18 | } 19 | 20 | public List getSucceededRecords() { 21 | return succeededRecords; 22 | } 23 | 24 | public List getFailedRecordResponses() { 25 | return failedRecordResponses; 26 | } 27 | 28 | public void setFailedRecordResponses(List failedRecordResponses) { 29 | this.failedRecordResponses = failedRecordResponses; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/SinkWriterBase.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink; 5 | 6 | import org.apache.kafka.connect.sink.SinkRecord; 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | 10 | import java.util.List; 11 | import java.util.stream.Collectors; 12 | 13 | public abstract class SinkWriterBase implements IWriter { 14 | private static final Logger logger = LoggerFactory.getLogger(SinkWriterBase.class); 15 | private final int maxRetryCount; 16 | 17 | public SinkWriterBase(int maxRetryCount) { 18 | this.maxRetryCount = maxRetryCount; 19 | } 20 | 21 | protected abstract SinkWriteResponse writeCore(List sinkRecords); 22 | 23 | @Override 24 | public SinkWriteResponse write(List sinkRecords) { 25 | 26 | SinkWriteResponse sinkWriteResponse = writeCore(sinkRecords); 27 | int retryCount = 0; 28 | 29 | List toBeRetriedRecords; 30 | while (shouldRetry(retryCount, sinkWriteResponse)) { 31 | retryCount++; 32 | toBeRetriedRecords = sinkWriteResponse.getFailedRecordResponses().stream().map(SinkOperationFailedResponse::getSinkRecord).collect(Collectors.toList()); 33 | SinkWriteResponse retryResponse = writeCore(toBeRetriedRecords); 34 | sinkWriteResponse.getSucceededRecords().addAll(retryResponse.getSucceededRecords()); 35 | sinkWriteResponse.setFailedRecordResponses(retryResponse.getFailedRecordResponses()); 36 | } 37 | 38 | return sinkWriteResponse; 39 | } 40 | 41 | private boolean shouldRetry(int currentRetryCount, SinkWriteResponse response) { 42 | if (response == null || response.getFailedRecordResponses().size() == 0) { 43 | // there is no failed operation 44 | return false; 45 | } 46 | 47 | if (currentRetryCount >= this.maxRetryCount) { 48 | logger.warn("Exhausted all the retries, will not retry anymore."); 49 | return false; 50 | } 51 | 52 | // If there are any non-transient exceptions, then retry will NOT happen 53 | // for optimization purpose, the non-transient exception will be put in the front 54 | // so it will be enough to only examine the first record in the list 55 | if (!ExceptionsHelper.isTransientFailure(response.getFailedRecordResponses().get(0).getException())) { 56 | return false; 57 | } 58 | 59 | return true; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/StructToJsonMap.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink; 5 | 6 | import java.util.ArrayList; 7 | import java.util.HashMap; 8 | import java.util.List; 9 | import java.util.Map; 10 | 11 | import org.apache.kafka.connect.data.Date; 12 | import org.apache.kafka.connect.data.Field; 13 | import org.apache.kafka.connect.data.Schema; 14 | import org.apache.kafka.connect.data.Struct; 15 | import org.apache.kafka.connect.data.Time; 16 | import org.apache.kafka.connect.data.Timestamp; 17 | 18 | public class StructToJsonMap { 19 | 20 | public static Map toJsonMap(Struct struct) { 21 | if (struct == null) { 22 | return null; 23 | } 24 | Map jsonMap = new HashMap(0); 25 | List fields = struct.schema().fields(); 26 | for (Field field : fields) { 27 | String fieldName = field.name(); 28 | Schema.Type fieldType = field.schema().type(); 29 | String schemaName = field.schema().name(); 30 | switch (fieldType) { 31 | case STRING: 32 | jsonMap.put(fieldName, struct.getString(fieldName)); 33 | break; 34 | case INT32: 35 | if (Date.LOGICAL_NAME.equals(schemaName) || Time.LOGICAL_NAME.equals(schemaName)) { 36 | jsonMap.put(fieldName, (java.util.Date) struct.get(fieldName)); 37 | } else { 38 | jsonMap.put(fieldName, struct.getInt32(fieldName)); 39 | } 40 | break; 41 | case INT16: 42 | jsonMap.put(fieldName, struct.getInt16(fieldName)); 43 | break; 44 | case INT64: 45 | if (Timestamp.LOGICAL_NAME.equals(schemaName)) { 46 | jsonMap.put(fieldName, (java.util.Date) struct.get(fieldName)); 47 | } else { 48 | jsonMap.put(fieldName, struct.getInt64(fieldName)); 49 | } 50 | break; 51 | case FLOAT32: 52 | jsonMap.put(fieldName, struct.getFloat32(fieldName)); 53 | break; 54 | case FLOAT64: 55 | jsonMap.put(fieldName, struct.getFloat64(fieldName)); 56 | break; 57 | case BOOLEAN: 58 | jsonMap.put(fieldName, struct.getBoolean(fieldName)); 59 | break; 60 | case ARRAY: 61 | List fieldArray = struct.getArray(fieldName); 62 | if (fieldArray != null && !fieldArray.isEmpty() && fieldArray.get(0) instanceof Struct) { 63 | // If Array contains list of Structs 64 | List jsonArray = new ArrayList<>(); 65 | fieldArray.forEach(item -> { 66 | jsonArray.add(toJsonMap((Struct) item)); 67 | }); 68 | jsonMap.put(fieldName, jsonArray); 69 | } else { 70 | jsonMap.put(fieldName, fieldArray); 71 | } 72 | break; 73 | case STRUCT: 74 | jsonMap.put(fieldName, toJsonMap(struct.getStruct(fieldName))); 75 | break; 76 | case MAP: 77 | jsonMap.put(fieldName, handleMap(struct.getMap(fieldName))); 78 | break; 79 | default: 80 | jsonMap.put(fieldName, struct.get(fieldName)); 81 | break; 82 | } 83 | } 84 | return jsonMap; 85 | } 86 | 87 | public static Map handleMap(Map map) { 88 | if (map == null) { 89 | return null; 90 | } 91 | Map cacheMap = new HashMap<>(); 92 | map.forEach((key, value) -> { 93 | if (value instanceof Map) { 94 | cacheMap.put(key, handleMap((Map) value)); 95 | } else if (value instanceof Struct) { 96 | cacheMap.put(key, toJsonMap((Struct) value)); 97 | } else if (value instanceof List) { 98 | List list = (List) value; 99 | List jsonArray = new ArrayList<>(); 100 | list.forEach(item -> { 101 | if (item instanceof Struct) { 102 | jsonArray.add(toJsonMap((Struct) item)); 103 | } else if (item instanceof Map) { 104 | jsonArray.add(handleMap((Map) item)); 105 | } else { 106 | jsonArray.add(item); 107 | } 108 | }); 109 | cacheMap.put(key, jsonArray); 110 | } else { 111 | cacheMap.put(key, value); 112 | } 113 | }); 114 | return cacheMap; 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/id/strategy/AbstractIdStrategy.java: -------------------------------------------------------------------------------- 1 | package com.azure.cosmos.kafka.connect.sink.id.strategy; 2 | 3 | import java.util.Map; 4 | import java.util.regex.Pattern; 5 | 6 | public abstract class AbstractIdStrategy implements IdStrategy { 7 | private static final String SANITIZED_CHAR = "_"; 8 | private static final Pattern SANITIZE_ID_PATTERN = Pattern.compile("[/\\\\?#]"); 9 | 10 | protected Map configs; 11 | 12 | @Override 13 | public void configure(Map configs) { 14 | this.configs = configs; 15 | } 16 | 17 | /** 18 | * Replaces all characters that cannot be part of the ID with {@value SANITIZED_CHAR}. 19 | *

The following characters are restricted and cannot be used in the Id property: '/', '\\', '?', '#' 20 | */ 21 | public static String sanitizeId(String unsanitized) { 22 | return SANITIZE_ID_PATTERN.matcher(unsanitized).replaceAll(SANITIZED_CHAR); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/id/strategy/AbstractIdStrategyConfig.java: -------------------------------------------------------------------------------- 1 | package com.azure.cosmos.kafka.connect.sink.id.strategy; 2 | 3 | import org.apache.kafka.common.config.AbstractConfig; 4 | import org.apache.kafka.common.config.ConfigDef; 5 | 6 | import java.util.Map; 7 | 8 | public class AbstractIdStrategyConfig extends AbstractConfig { 9 | public static final String ID = "id"; 10 | public static final String ID_STRATEGY = ID + ".strategy"; 11 | public static final String PREFIX = ID_STRATEGY + "."; 12 | 13 | public AbstractIdStrategyConfig(ConfigDef definition, Map originals) { 14 | super(definition, originals); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/id/strategy/FullKeyStrategy.java: -------------------------------------------------------------------------------- 1 | package com.azure.cosmos.kafka.connect.sink.id.strategy; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | public class FullKeyStrategy extends TemplateStrategy { 7 | @Override 8 | public void configure(Map configs) { 9 | Map conf = new HashMap<>(configs); 10 | conf.put(TemplateStrategyConfig.TEMPLATE_CONFIG, "${key}"); 11 | super.configure(conf); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/id/strategy/IdStrategy.java: -------------------------------------------------------------------------------- 1 | package com.azure.cosmos.kafka.connect.sink.id.strategy; 2 | 3 | import org.apache.kafka.common.Configurable; 4 | import org.apache.kafka.connect.sink.SinkRecord; 5 | 6 | public interface IdStrategy extends Configurable { 7 | String generateId(SinkRecord record); 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/id/strategy/KafkaMetadataStrategy.java: -------------------------------------------------------------------------------- 1 | package com.azure.cosmos.kafka.connect.sink.id.strategy; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | public class KafkaMetadataStrategy extends TemplateStrategy { 7 | private KafkaMetadataStrategyConfig config; 8 | 9 | @Override 10 | public void configure(Map configs) { 11 | config = new KafkaMetadataStrategyConfig(configs); 12 | Map conf = new HashMap<>(configs); 13 | conf.put(TemplateStrategyConfig.TEMPLATE_CONFIG, 14 | "${topic}" + config.delimiter() 15 | + "${partition}" + config.delimiter() + "${offset}"); 16 | 17 | super.configure(conf); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/id/strategy/KafkaMetadataStrategyConfig.java: -------------------------------------------------------------------------------- 1 | package com.azure.cosmos.kafka.connect.sink.id.strategy; 2 | 3 | import org.apache.kafka.common.config.ConfigDef; 4 | 5 | import java.util.Map; 6 | 7 | public class KafkaMetadataStrategyConfig extends AbstractIdStrategyConfig { 8 | public static final String DELIMITER_CONFIG = "delimiter"; 9 | public static final String DELIMITER_CONFIG_DEFAULT = "-"; 10 | public static final String DELIMITER_CONFIG_DOC = "The delimiter between metadata components"; 11 | public static final String DELIMITER_CONFIG_DISPLAY = "Kafka Metadata"; 12 | 13 | private String delimiter; 14 | 15 | public KafkaMetadataStrategyConfig(Map props) { 16 | this(getConfig(), props); 17 | } 18 | 19 | public KafkaMetadataStrategyConfig(ConfigDef definition, Map originals) { 20 | super(definition, originals); 21 | 22 | this.delimiter = getString(DELIMITER_CONFIG); 23 | } 24 | 25 | public static ConfigDef getConfig() { 26 | ConfigDef result = new ConfigDef(); 27 | 28 | final String groupName = "Kafka Metadata Parameters"; 29 | int groupOrder = 0; 30 | 31 | result.define( 32 | DELIMITER_CONFIG, 33 | ConfigDef.Type.STRING, 34 | DELIMITER_CONFIG_DEFAULT, 35 | ConfigDef.Importance.MEDIUM, 36 | DELIMITER_CONFIG_DOC, 37 | groupName, 38 | groupOrder++, 39 | ConfigDef.Width.MEDIUM, 40 | DELIMITER_CONFIG_DISPLAY 41 | ); 42 | 43 | return result; 44 | } 45 | 46 | public String delimiter() { 47 | return delimiter; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/id/strategy/ProvidedInConfig.java: -------------------------------------------------------------------------------- 1 | package com.azure.cosmos.kafka.connect.sink.id.strategy; 2 | 3 | import org.apache.kafka.common.config.ConfigDef; 4 | 5 | import java.util.Map; 6 | 7 | public class ProvidedInConfig extends AbstractIdStrategyConfig { 8 | public static final String JSON_PATH_CONFIG = "jsonPath"; 9 | public static final String JSON_PATH_CONFIG_DEFAULT = "$.id"; 10 | public static final String JSON_PATH_CONFIG_DOC = "A JsonPath expression to select the desired component to use as ``id``"; 11 | public static final String JSON_PATH_CONFIG_DISPLAY = "JSON Path"; 12 | private final String jsonPath; 13 | 14 | public ProvidedInConfig(Map props) { 15 | this(getConfig(), props); 16 | } 17 | 18 | public ProvidedInConfig(ConfigDef definition, Map originals) { 19 | super(definition, originals); 20 | 21 | this.jsonPath = getString(JSON_PATH_CONFIG); 22 | } 23 | 24 | 25 | public static ConfigDef getConfig() { 26 | ConfigDef result = new ConfigDef(); 27 | 28 | final String groupName = "JsonPath Parameters"; 29 | int groupOrder = 0; 30 | 31 | result.define( 32 | JSON_PATH_CONFIG, 33 | ConfigDef.Type.STRING, 34 | JSON_PATH_CONFIG_DEFAULT, 35 | ConfigDef.Importance.MEDIUM, 36 | JSON_PATH_CONFIG_DOC, 37 | groupName, 38 | groupOrder++, 39 | ConfigDef.Width.MEDIUM, 40 | JSON_PATH_CONFIG_DISPLAY 41 | ); 42 | 43 | return result; 44 | } 45 | 46 | public String jsonPath() { 47 | return jsonPath; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/id/strategy/ProvidedInKeyStrategy.java: -------------------------------------------------------------------------------- 1 | package com.azure.cosmos.kafka.connect.sink.id.strategy; 2 | 3 | public class ProvidedInKeyStrategy extends ProvidedInStrategy { 4 | public ProvidedInKeyStrategy() { 5 | super(ProvidedIn.KEY); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/id/strategy/ProvidedInStrategy.java: -------------------------------------------------------------------------------- 1 | package com.azure.cosmos.kafka.connect.sink.id.strategy; 2 | 3 | import com.jayway.jsonpath.JsonPath; 4 | import org.apache.kafka.connect.data.Schema; 5 | import org.apache.kafka.connect.data.Struct; 6 | import org.apache.kafka.connect.data.Values; 7 | import org.apache.kafka.connect.errors.ConnectException; 8 | import org.apache.kafka.connect.sink.SinkRecord; 9 | 10 | import java.util.Map; 11 | 12 | class ProvidedInStrategy extends AbstractIdStrategy { 13 | protected enum ProvidedIn { 14 | KEY, 15 | VALUE 16 | } 17 | 18 | private final ProvidedIn where; 19 | 20 | private ProvidedInConfig config; 21 | 22 | ProvidedInStrategy(ProvidedIn where) { 23 | this.where = where; 24 | } 25 | 26 | @Override 27 | public String generateId(SinkRecord record) { 28 | String value = where == ProvidedIn.KEY 29 | ? Values.convertToString(record.keySchema(), record.key()) 30 | : Values.convertToString(record.valueSchema(), record.value()); 31 | try { 32 | Object object = JsonPath.parse(value).read(config.jsonPath()); 33 | return sanitizeId(Values.convertToString(null, object)); 34 | } catch (Exception e) { 35 | throw new ConnectException("Could not evaluate JsonPath " + config.jsonPath(), e); 36 | } 37 | } 38 | 39 | @Override 40 | public void configure(Map configs) { 41 | config = new ProvidedInConfig(configs); 42 | super.configure(configs); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/id/strategy/ProvidedInValueStrategy.java: -------------------------------------------------------------------------------- 1 | package com.azure.cosmos.kafka.connect.sink.id.strategy; 2 | 3 | public class ProvidedInValueStrategy extends ProvidedInStrategy { 4 | public ProvidedInValueStrategy() { 5 | super(ProvidedIn.VALUE); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/id/strategy/TemplateStrategy.java: -------------------------------------------------------------------------------- 1 | package com.azure.cosmos.kafka.connect.sink.id.strategy; 2 | 3 | import com.azure.cosmos.implementation.guava25.collect.ImmutableMap; 4 | import org.apache.kafka.connect.data.Values; 5 | import org.apache.kafka.connect.sink.SinkRecord; 6 | 7 | import java.util.Map; 8 | import java.util.function.Function; 9 | import java.util.regex.Matcher; 10 | import java.util.regex.Pattern; 11 | import java.util.stream.Collectors; 12 | 13 | public class TemplateStrategy extends AbstractIdStrategy { 14 | private static final String KEY = "key"; 15 | private static final String TOPIC = "topic"; 16 | private static final String PARTITION = "partition"; 17 | private static final String OFFSET = "offset"; 18 | 19 | private static final String PATTERN_TEMPLATE = "\\$\\{(%s)\\}"; 20 | private static final Pattern PATTERN; 21 | 22 | private TemplateStrategyConfig config; 23 | 24 | private static final Map> METHODS_BY_VARIABLE; 25 | 26 | static { 27 | ImmutableMap.Builder> builder = ImmutableMap.builder(); 28 | builder.put(KEY, (r) -> Values.convertToString(r.keySchema(), r.key())); 29 | builder.put(TOPIC, SinkRecord::topic); 30 | builder.put(PARTITION, (r) -> r.kafkaPartition().toString()); 31 | builder.put(OFFSET, (r) -> Long.toString(r.kafkaOffset())); 32 | METHODS_BY_VARIABLE = builder.build(); 33 | 34 | String pattern = String.format(PATTERN_TEMPLATE, 35 | METHODS_BY_VARIABLE.keySet().stream().collect(Collectors.joining("|"))); 36 | PATTERN = Pattern.compile(pattern); 37 | } 38 | 39 | @Override 40 | public String generateId(SinkRecord record) { 41 | String template = config.template(); 42 | return sanitizeId(resolveAll(template, record)); 43 | } 44 | 45 | @Override 46 | public void configure(Map configs) { 47 | config = new TemplateStrategyConfig(configs); 48 | 49 | super.configure(configs); 50 | } 51 | 52 | private String resolveAll(String template, SinkRecord record) { 53 | int lastIndex = 0; 54 | StringBuilder output = new StringBuilder(); 55 | Matcher matcher = PATTERN.matcher(template); 56 | while (matcher.find()) { 57 | output.append(template, lastIndex, matcher.start()) 58 | .append(METHODS_BY_VARIABLE.get(matcher.group(1)).apply(record)); 59 | 60 | lastIndex = matcher.end(); 61 | } 62 | if (lastIndex < template.length()) { 63 | output.append(template, lastIndex, template.length()); 64 | } 65 | return output.toString(); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/sink/id/strategy/TemplateStrategyConfig.java: -------------------------------------------------------------------------------- 1 | package com.azure.cosmos.kafka.connect.sink.id.strategy; 2 | 3 | import org.apache.kafka.common.config.ConfigDef; 4 | 5 | import java.util.Map; 6 | 7 | public class TemplateStrategyConfig extends AbstractIdStrategyConfig { 8 | public static final String TEMPLATE_CONFIG = "template"; 9 | public static final String TEMPLATE_CONFIG_DEFAULT = ""; 10 | public static final String TEMPLATE_CONFIG_DOC = 11 | "The template string to use for determining the ``id``. The template can contain the " 12 | + "following variables that are bound to their values on the Kafka record:" 13 | + "${topic}, ${partition}, ${offset}, ${key}. For example, the template " 14 | + "``${topic}-${key}`` would use the topic name and the entire key in the ``id``, " 15 | + "separated by '-'"; 16 | public static final String TEMPLATE_CONFIG_DISPLAY = "Template"; 17 | private final String template; 18 | 19 | public TemplateStrategyConfig(Map props) { 20 | this(getConfig(), props); 21 | } 22 | 23 | public TemplateStrategyConfig(ConfigDef definition, Map originals) { 24 | super(definition, originals); 25 | 26 | this.template = getString(TEMPLATE_CONFIG); 27 | } 28 | 29 | 30 | public static ConfigDef getConfig() { 31 | ConfigDef result = new ConfigDef(); 32 | 33 | final String groupName = "Template Parameters"; 34 | int groupOrder = 0; 35 | 36 | result.define( 37 | TEMPLATE_CONFIG, 38 | ConfigDef.Type.STRING, 39 | TEMPLATE_CONFIG_DEFAULT, 40 | ConfigDef.Importance.MEDIUM, 41 | TEMPLATE_CONFIG_DOC, 42 | groupName, 43 | groupOrder++, 44 | ConfigDef.Width.MEDIUM, 45 | TEMPLATE_CONFIG_DISPLAY 46 | ); 47 | 48 | return result; 49 | } 50 | 51 | public String template() { 52 | return template; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/source/CosmosDBSourceConnector.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.source; 5 | 6 | import static com.azure.cosmos.kafka.connect.CosmosDBConfig.validateConnection; 7 | import static com.azure.cosmos.kafka.connect.CosmosDBConfig.validateTopicMap; 8 | 9 | import java.util.function.Function; 10 | import java.util.stream.Collectors; 11 | 12 | import com.azure.cosmos.CosmosAsyncClient; 13 | import com.azure.cosmos.CosmosAsyncContainer; 14 | import com.azure.cosmos.CosmosAsyncDatabase; 15 | import com.azure.cosmos.CosmosException; 16 | import com.azure.cosmos.kafka.connect.CosmosDBConfig; 17 | import com.azure.cosmos.kafka.connect.implementations.CosmosClientStore; 18 | import com.azure.cosmos.models.CosmosContainerProperties; 19 | import com.azure.cosmos.models.CosmosContainerRequestOptions; 20 | import com.azure.cosmos.models.CosmosContainerResponse; 21 | import com.azure.cosmos.models.ThroughputProperties; 22 | import org.apache.commons.lang3.RandomUtils; 23 | import org.apache.kafka.common.config.Config; 24 | import org.apache.kafka.common.config.ConfigDef; 25 | import org.apache.kafka.common.config.ConfigException; 26 | import org.apache.kafka.common.config.ConfigValue; 27 | import org.apache.kafka.connect.connector.Task; 28 | import org.apache.kafka.connect.errors.ConnectException; 29 | import org.apache.kafka.connect.source.SourceConnector; 30 | import org.slf4j.Logger; 31 | import org.slf4j.LoggerFactory; 32 | 33 | import java.util.*; 34 | 35 | /** 36 | * The CosmosDB Source Connector 37 | */ 38 | public class CosmosDBSourceConnector extends SourceConnector { 39 | 40 | private static final Logger logger = LoggerFactory.getLogger(CosmosDBSourceConnector.class); 41 | private CosmosDBSourceConfig config = null; 42 | private CosmosAsyncClient cosmosClient = null; 43 | 44 | @Override 45 | public void start(Map props) { 46 | logger.info("Starting the Source Connector"); 47 | try { 48 | config = new CosmosDBSourceConfig(props); 49 | this.cosmosClient = CosmosClientStore.getCosmosClient(this.config, this.getUserAgentSuffix()); 50 | 51 | List containerList = config.getTopicContainerMap().getContainerList(); 52 | for (String containerId : containerList) { 53 | createLeaseContainerIfNotExists(cosmosClient, this.config.getDatabaseName(), this.getAssignedLeaseContainer(containerId)); 54 | } 55 | 56 | } catch (ConfigException e) { 57 | throw new ConnectException( 58 | "Couldn't start CosmosDBSourceConnector due to configuration error", e); 59 | } 60 | } 61 | 62 | @Override 63 | public Class taskClass() { 64 | return CosmosDBSourceTask.class; 65 | } 66 | 67 | @Override 68 | public List> taskConfigs(int maxTasks) { 69 | logger.info("Creating the task Configs"); 70 | List containerList = config.getTopicContainerMap().getContainerList(); 71 | List> taskConfigs = new ArrayList<>(maxTasks); 72 | 73 | if (containerList.size() == 0) { 74 | logger.debug("Container list is not specified"); 75 | return taskConfigs; 76 | } 77 | 78 | for (int i = 0; i < maxTasks; i++) { 79 | // Equally distribute workers by assigning workers to containers in round-robin fashion. 80 | Map taskProps = config.originalsStrings(); 81 | String assignedContainer = containerList.get(i % containerList.size()); 82 | 83 | taskProps.put(CosmosDBSourceConfig.COSMOS_ASSIGNED_CONTAINER_CONF, assignedContainer); 84 | taskProps.put(CosmosDBSourceConfig.COSMOS_ASSIGNED_LEASE_CONTAINER_CONF, this.getAssignedLeaseContainer(assignedContainer)); 85 | taskProps.put(CosmosDBSourceConfig.COSMOS_WORKER_NAME_CONF, 86 | String.format("%s-%d-%d", 87 | CosmosDBSourceConfig.COSMOS_WORKER_NAME_DEFAULT, 88 | RandomUtils.nextLong(1L, 9999999L), i)); 89 | taskConfigs.add(taskProps); 90 | } 91 | 92 | return taskConfigs; 93 | } 94 | 95 | @Override 96 | public void stop() { 97 | logger.info("Stopping CosmosDB Source Connector"); 98 | if (this.cosmosClient != null) { 99 | this.cosmosClient.close(); 100 | } 101 | } 102 | 103 | @Override 104 | public ConfigDef config() { 105 | return CosmosDBSourceConfig.getConfig(); 106 | } 107 | 108 | @Override 109 | public String version() { 110 | return this.getClass().getPackage().getImplementationVersion(); 111 | } 112 | 113 | @Override 114 | public Config validate(Map connectorConfigs) { 115 | Config config = super.validate(connectorConfigs); 116 | if (config.configValues().stream().anyMatch(cv -> !cv.errorMessages().isEmpty())) { 117 | return config; 118 | } 119 | 120 | Map configValues = config.configValues().stream().collect( 121 | Collectors.toMap(ConfigValue::name, Function.identity())); 122 | 123 | validateConnection(connectorConfigs, configValues); 124 | validateTopicMap(connectorConfigs, configValues); 125 | 126 | return config; 127 | } 128 | 129 | private String getAssignedLeaseContainer(String containerName) { 130 | return containerName + "-leases"; 131 | } 132 | 133 | private String getUserAgentSuffix() { 134 | return CosmosDBConfig.COSMOS_CLIENT_USER_AGENT_SUFFIX + version(); 135 | } 136 | 137 | private CosmosAsyncContainer createLeaseContainerIfNotExists(CosmosAsyncClient client, String databaseName, String leaseCollectionName) { 138 | CosmosAsyncDatabase database = client.getDatabase(databaseName); 139 | CosmosAsyncContainer leaseCollection = database.getContainer(leaseCollectionName); 140 | CosmosContainerResponse leaseContainerResponse = null; 141 | 142 | logger.info("Checking whether the lease container exists."); 143 | try { 144 | leaseContainerResponse = leaseCollection.read().block(); 145 | } catch (CosmosException ex) { 146 | // Swallowing exceptions when the type is CosmosException and statusCode is 404 147 | if (ex.getStatusCode() != 404) { 148 | throw ex; 149 | } 150 | logger.info("Lease container does not exist {}", ex.getMessage()); 151 | } 152 | 153 | if (leaseContainerResponse == null) { 154 | logger.info("Creating the Lease container : {}", leaseCollectionName); 155 | CosmosContainerProperties containerSettings = new CosmosContainerProperties(leaseCollectionName, "/id"); 156 | ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(400); 157 | CosmosContainerRequestOptions requestOptions = new CosmosContainerRequestOptions(); 158 | 159 | try { 160 | database.createContainer(containerSettings, throughputProperties, requestOptions).block(); 161 | } catch (Exception e) { 162 | logger.error("Failed to create container {} in database {}", leaseCollectionName, databaseName); 163 | throw e; 164 | } 165 | logger.info("Successfully created new lease container."); 166 | } 167 | 168 | return database.getContainer(leaseCollectionName); 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /src/main/java/com/azure/cosmos/kafka/connect/source/JsonToStruct.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.source; 5 | 6 | import static java.lang.String.format; 7 | import static org.apache.kafka.connect.data.Values.convertToByte; 8 | import static org.apache.kafka.connect.data.Values.convertToDouble; 9 | import static org.apache.kafka.connect.data.Values.convertToFloat; 10 | import static org.apache.kafka.connect.data.Values.convertToInteger; 11 | import static org.apache.kafka.connect.data.Values.convertToLong; 12 | import static org.apache.kafka.connect.data.Values.convertToShort; 13 | 14 | import java.util.ArrayList; 15 | import java.util.Iterator; 16 | import java.util.List; 17 | import java.util.Map; 18 | import java.util.Objects; 19 | 20 | import com.fasterxml.jackson.databind.JsonNode; 21 | 22 | import org.apache.kafka.connect.data.Schema; 23 | import org.apache.kafka.connect.data.SchemaAndValue; 24 | import org.apache.kafka.connect.data.SchemaBuilder; 25 | import org.apache.kafka.connect.data.Struct; 26 | import org.slf4j.Logger; 27 | import org.slf4j.LoggerFactory; 28 | 29 | public class JsonToStruct { 30 | private static final Logger logger = LoggerFactory.getLogger(JsonToStruct.class); 31 | private static final String SCHEMA_NAME_TEMPLATE = "inferred_name_%s"; 32 | 33 | public SchemaAndValue recordToSchemaAndValue(final JsonNode node) { 34 | Schema nodeSchema = inferSchema(node); 35 | Struct struct = new Struct(nodeSchema); 36 | 37 | if (nodeSchema != null) { 38 | nodeSchema.fields().forEach(field -> { 39 | JsonNode fieldValue = node.get(field.name()); 40 | if (fieldValue != null) { 41 | SchemaAndValue schemaAndValue = toSchemaAndValue(field.schema(), fieldValue); 42 | struct.put(field, schemaAndValue.value()); 43 | } else { 44 | boolean optionalField = field.schema().isOptional(); 45 | Object defaultValue = field.schema().defaultValue(); 46 | if (optionalField || defaultValue != null) { 47 | struct.put(field, defaultValue); 48 | } else { 49 | logger.error("Missing value for field {}", field.name()); 50 | } 51 | } 52 | }); 53 | } 54 | return new SchemaAndValue(nodeSchema, struct); 55 | } 56 | 57 | private Schema inferSchema(JsonNode jsonNode) { 58 | switch (jsonNode.getNodeType()) { 59 | case NULL: 60 | return Schema.OPTIONAL_STRING_SCHEMA; 61 | case BOOLEAN: 62 | return Schema.BOOLEAN_SCHEMA; 63 | case NUMBER: 64 | if (jsonNode.isIntegralNumber()) { 65 | return Schema.INT64_SCHEMA; 66 | } else { 67 | return Schema.FLOAT64_SCHEMA; 68 | } 69 | case ARRAY: 70 | List jsonValues = new ArrayList<>(); 71 | SchemaBuilder arrayBuilder; 72 | jsonNode.forEach(jn -> jsonValues.add(jn)); 73 | 74 | Schema firstItemSchema = jsonValues.isEmpty() ? Schema.OPTIONAL_STRING_SCHEMA 75 | : inferSchema(jsonValues.get(0)); 76 | if (jsonValues.isEmpty() || jsonValues.stream() 77 | .anyMatch(jv -> !Objects.equals(inferSchema(jv), firstItemSchema))) { 78 | // If array is emtpy or it contains elements with different schema types 79 | arrayBuilder = SchemaBuilder.array(Schema.OPTIONAL_STRING_SCHEMA); 80 | arrayBuilder.name(generateName(arrayBuilder)); 81 | return arrayBuilder.optional().build(); 82 | } 83 | arrayBuilder = SchemaBuilder.array(inferSchema(jsonValues.get(0))); 84 | arrayBuilder.name(generateName(arrayBuilder)); 85 | return arrayBuilder.optional().build(); 86 | case OBJECT: 87 | SchemaBuilder structBuilder = SchemaBuilder.struct(); 88 | Iterator> it = jsonNode.fields(); 89 | while (it.hasNext()) { 90 | Map.Entry entry = it.next(); 91 | structBuilder.field(entry.getKey(), inferSchema(entry.getValue())); 92 | } 93 | structBuilder.name(generateName(structBuilder)); 94 | return structBuilder.build(); 95 | case STRING: 96 | return Schema.STRING_SCHEMA; 97 | case BINARY: 98 | case MISSING: 99 | case POJO: 100 | default: 101 | return null; 102 | } 103 | } 104 | 105 | // Generate Unique Schema Name 106 | private static String generateName(final SchemaBuilder builder) { 107 | return format(SCHEMA_NAME_TEMPLATE, Objects.hashCode(builder.build())).replace("-", "_"); 108 | } 109 | 110 | private SchemaAndValue toSchemaAndValue(final Schema schema, final JsonNode node) { 111 | SchemaAndValue schemaAndValue = new SchemaAndValue(schema, node); 112 | if (schema.isOptional() && node.isNull()) { 113 | return new SchemaAndValue(schema, null); 114 | } 115 | switch (schema.type()) { 116 | case INT8: 117 | case INT16: 118 | case INT32: 119 | case INT64: 120 | case FLOAT32: 121 | case FLOAT64: 122 | schemaAndValue = numberToSchemaAndValue(schema, node); 123 | break; 124 | case BOOLEAN: 125 | schemaAndValue = new SchemaAndValue(schema, node.asBoolean()); 126 | break; 127 | case STRING: 128 | schemaAndValue = stringToSchemaAndValue(schema, node); 129 | break; 130 | case BYTES: 131 | schemaAndValue = new SchemaAndValue(schema, node); 132 | break; 133 | case ARRAY: 134 | schemaAndValue = arrayToSchemaAndValue(schema, node); 135 | break; 136 | case MAP: 137 | schemaAndValue = new SchemaAndValue(schema, node); 138 | break; 139 | case STRUCT: 140 | schemaAndValue = recordToSchemaAndValue(node); 141 | break; 142 | default: 143 | logger.error("Unsupported Schema type: {}", schema.type()); 144 | } 145 | return schemaAndValue; 146 | } 147 | 148 | private SchemaAndValue stringToSchemaAndValue(final Schema schema, final JsonNode nodeValue) { 149 | String value; 150 | if (nodeValue.isTextual()) { 151 | value = nodeValue.asText(); 152 | } else { 153 | value = nodeValue.toString(); 154 | } 155 | return new SchemaAndValue(schema, value); 156 | } 157 | 158 | private SchemaAndValue arrayToSchemaAndValue(final Schema schema, final JsonNode nodeValue) { 159 | if (!nodeValue.isArray()) { 160 | logger.error("Unexpected array value for schema {}", schema); 161 | } 162 | List values = new ArrayList<>(); 163 | nodeValue.forEach(v -> 164 | values.add(toSchemaAndValue(schema.valueSchema(), v).value()) 165 | ); 166 | return new SchemaAndValue(schema, values); 167 | } 168 | 169 | private SchemaAndValue numberToSchemaAndValue(final Schema schema, final JsonNode nodeValue) { 170 | Object value = null; 171 | if (nodeValue.isNumber()) { 172 | if (nodeValue.isInt()) { 173 | value = nodeValue.intValue(); 174 | } else if (nodeValue.isDouble()) { 175 | value = nodeValue.doubleValue(); 176 | } else if (nodeValue.isLong()) { 177 | value = nodeValue.longValue(); 178 | } 179 | } else { 180 | logger.error("Unexpected value for schema {}", schema); 181 | } 182 | 183 | switch (schema.type()) { 184 | case INT8: 185 | value = convertToByte(schema, value); 186 | break; 187 | case INT16: 188 | value = convertToShort(schema, value); 189 | break; 190 | case INT32: 191 | value = convertToInteger(schema, value); 192 | break; 193 | case INT64: 194 | value = convertToLong(schema, value); 195 | break; 196 | case FLOAT32: 197 | value = convertToFloat(schema, value); 198 | break; 199 | case FLOAT64: 200 | value = convertToDouble(schema, value); 201 | break; 202 | default: 203 | logger.error("Unsupported Schema type: {}", schema.type()); 204 | } 205 | return new SchemaAndValue(schema, value); 206 | } 207 | 208 | } 209 | -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | #Define console appender 2 | log4j.appender.console=org.apache.log4j.ConsoleAppender 3 | log4j.appender.console.Target=System.err 4 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.console.layout.ConversionPattern=%-5p %c{1} - %m%n 6 | #Loggers 7 | log4j.rootLogger=ERROR,console 8 | log4j.logger.com.microsoft=INFO 9 | log4j.logger.com.azure.cosmos.kafka=DEBUG 10 | -------------------------------------------------------------------------------- /src/perf/.gitignore: -------------------------------------------------------------------------------- 1 | cluster/manifests/sink.json 2 | cluster/manifests/source.json 3 | -------------------------------------------------------------------------------- /src/perf/cluster/charts/connect/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /src/perf/cluster/charts/connect/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: A Helm chart for Cosmos DB Kafka Connect on Kubernetes 4 | name: cp-kafka-connect 5 | version: 0.1.0 6 | -------------------------------------------------------------------------------- /src/perf/cluster/charts/connect/README.md: -------------------------------------------------------------------------------- 1 | # CP-Kafka Connect Helm Chart 2 | 3 | This chart bootstraps a deployment of a Confluent Kafka Connect 4 | 5 | ## Prerequisites 6 | 7 | * Kubernetes 1.9.2+ 8 | * Helm 2.8.2+ 9 | * A healthy and accessible Kafka Cluster 10 | 11 | ## Developing Environment: 12 | 13 | * [Pivotal Container Service (PKS)](https://pivotal.io/platform/pivotal-container-service) 14 | * [Google Kubernetes Engine (GKE)](https://cloud.google.com/kubernetes-engine/) 15 | 16 | ## Docker Image Source: 17 | 18 | * [DockerHub -> ConfluentInc](https://hub.docker.com/u/confluentinc/) 19 | 20 | ## Installing the Chart 21 | 22 | ### Install along with cp-helm-charts 23 | 24 | ```console 25 | git clone https://github.com/confluentinc/cp-helm-charts.git 26 | helm install cp-helm-charts 27 | ``` 28 | 29 | To install with a specific name, you can do: 30 | 31 | ```console 32 | helm install --name my-confluent cp-helm-charts 33 | ``` 34 | 35 | ### Install with a existing CP-Kafka release 36 | 37 | ```console 38 | helm install --set kafka.bootstrapServers="PLAINTEXT://lolling-chinchilla-cp-kafka-headless:9092",cp-schema-registry.url="lolling-chinchilla-cp-schema-registry:8081" cp-helm-charts/charts/cp-kafka-connect 39 | ``` 40 | 41 | ### Installed Components 42 | 43 | You can use `helm status ` to view all of the installed components. 44 | 45 | For example: 46 | 47 | ```console 48 | $ helm status kissing-macaw 49 | 50 | NAMESPACE: default 51 | STATUS: DEPLOYED 52 | 53 | RESOURCES: 54 | ==> v1/Service 55 | NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE 56 | kissing-macaw-cp-kafka-connect ClusterIP 10.19.253.217 8083/TCP 34m 57 | 58 | ==> v1beta2/Deployment 59 | NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE 60 | kissing-macaw-cp-kafka-connect 1 1 1 1 34m 61 | 62 | ==> v1/Pod(related) 63 | NAME READY STATUS RESTARTS AGE 64 | kissing-macaw-cp-kafka-connect-6c77b8f5fd-cqlzq 1/1 Running 0 34m 65 | ``` 66 | 67 | There are 68 | 1. A [Deployment](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) `kissing-macaw-cp-kafka-connect` which contains 1 Kafka Connect [Pod](https://kubernetes.io/docs/concepts/workloads/pods/pod-overview/): `kissing-macaw-cp-kafka-connect-6c77b8f5fd-cqlzq`. 69 | 2. A [Service](https://kubernetes.io/docs/concepts/services-networking/service/) `kissing-macaw-cp-kafka-connect` for clients to connect to Kafka Connect REST endpoint. 70 | 71 | ## Configuration 72 | 73 | You can specify each parameter using the `--set key=value[,key=value]` argument to `helm install`. 74 | 75 | Alternatively, a YAML file that specifies the values for the parameters can be provided while installing the chart. For example, 76 | 77 | ```console 78 | helm install --name my-kafka-connect -f my-values.yaml ./cp-kafka-connect 79 | ``` 80 | 81 | > **Tip**: A default [values.yaml](values.yaml) is provided 82 | 83 | ### Kafka Connect Deployment 84 | 85 | The configuration parameters in this section control the resources requested and utilized by the `cp-kafka-connect` chart. 86 | 87 | | Parameter | Description | Default | 88 | | ----------------- | ------------------------------------- | --------- | 89 | | `replicaCount` | The number of Kafka Connect Servers. | `1` | 90 | 91 | ### Image 92 | 93 | | Parameter | Description | Default | 94 | | --------- | ----------- | ------- | 95 | | `image` | Docker Image of Confluent Kafka Connect. | `confluentinc/cp-kafka-connect` | 96 | | `imageTag` | Docker Image Tag of Confluent Kafka Connect. | `6.0.1` | 97 | | `imagePullPolicy` | Docker Image Tag of Confluent Kafka Connect. | `IfNotPresent` | 98 | | `imagePullSecrets` | Secrets to be used for private registries. | see [values.yaml](values.yaml) for details | 99 | 100 | ### Port 101 | 102 | | Parameter | Description | Default | 103 | | --------- | ----------- | ------- | 104 | | `servicePort` | The port on which the Kafka Connect will be available and serving requests. | `8083` | 105 | 106 | ### Kafka Connect Worker Configurations 107 | 108 | | Parameter | Description | Default | 109 | | --------- | ----------- | ------- | 110 | | `configurationOverrides` | Kafka Connect [configuration](https://docs.confluent.io/current/connect/references/allconfigs.html) overrides in the dictionary format. | `{}` | 111 | | `customEnv` | Custom environmental variables | `{}` | 112 | 113 | ### Volumes 114 | 115 | | Parameter | Description | Default | 116 | | --------- | ----------- | ------- | 117 | | `volumes` | Volumes for connect-server container | see [values.yaml](values.yaml) for details | 118 | | `volumeMounts` | Volume mounts for connect-server container | see [values.yaml](values.yaml) for details | 119 | 120 | ### Secrets 121 | 122 | | Parameter | Description | Default | 123 | | --------- | ----------- | ------- | 124 | | `secrets` | Secret with one or more `key:value` pairs | see [values.yaml](values.yaml) for details | 125 | 126 | ### Kafka Connect JVM Heap Options 127 | 128 | | Parameter | Description | Default | 129 | | --------- | ----------- | ------- | 130 | | `heapOptions` | The JVM Heap Options for Kafka Connect | `"-Xms512M -Xmx512M"` | 131 | 132 | ### Resources 133 | 134 | | Parameter | Description | Default | 135 | | --------- | ----------- | ------- | 136 | | `resources.requests.cpu` | The amount of CPU to request. | see [values.yaml](values.yaml) for details | 137 | | `resources.requests.memory` | The amount of memory to request. | see [values.yaml](values.yaml) for details | 138 | | `resources.requests.limit` | The upper limit CPU usage for a Kafka Connect Pod. | see [values.yaml](values.yaml) for details | 139 | | `resources.requests.limit` | The upper limit memory usage for a Kafka Connect Pod. | see [values.yaml](values.yaml) for details | 140 | 141 | ### Annotations 142 | 143 | | Parameter | Description | Default | 144 | | --------- | ----------- | ------- | 145 | | `podAnnotations` | Map of custom annotations to attach to the pod spec. | `{}` | 146 | 147 | ### JMX Configuration 148 | 149 | | Parameter | Description | Default | 150 | | --------- | ----------- | ------- | 151 | | `jmx.port` | The jmx port which JMX style metrics are exposed. | `5555` | 152 | 153 | ### Prometheus JMX Exporter Configuration 154 | 155 | | Parameter | Description | Default | 156 | | --------- | ----------- | ------- | 157 | | `prometheus.jmx.enabled` | Whether or not to install Prometheus JMX Exporter as a sidecar container and expose JMX metrics to Prometheus. | `true` | 158 | | `prometheus.jmx.image` | Docker Image for Prometheus JMX Exporter container. | `solsson/kafka-prometheus-jmx-exporter@sha256` | 159 | | `prometheus.jmx.imageTag` | Docker Image Tag for Prometheus JMX Exporter container. | `6f82e2b0464f50da8104acd7363fb9b995001ddff77d248379f8788e78946143` | 160 | | `prometheus.jmx.imagePullPolicy` | Docker Image Pull Policy for Prometheus JMX Exporter container. | `IfNotPresent` | 161 | | `prometheus.jmx.port` | JMX Exporter Port which exposes metrics in Prometheus format for scraping. | `5556` | 162 | | `prometheus.jmx.resources` | JMX Exporter resources configuration. | see [values.yaml](values.yaml) for details | 163 | 164 | ### Running Custom Scripts 165 | 166 | | Parameter | Description | Default | 167 | | --------- | ----------- | ------- | 168 | | `customEnv.CUSTOM_SCRIPT_PATH` | Path to external bash script to run inside the container | see [values.yaml](values.yaml) for details | 169 | | `livenessProbe` | Requirement of `livenessProbe` depends on the custom script to be run | see [values.yaml](values.yaml) for details | 170 | 171 | ### Deployment Topology 172 | 173 | | Parameter | Description | Default | 174 | | --------- | ----------- | ------- | 175 | | `nodeSelector` | Dictionary containing key-value-pairs to match labels on nodes. When defined pods will only be scheduled on nodes, that have each of the indicated key-value pairs as labels. Further information can be found in the [Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/) | `{}` 176 | | `tolerations`| Array containing taint references. When defined, pods can run on nodes, which would otherwise deny scheduling. Further information can be found in the [Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) | `{}` 177 | 178 | ## Dependencies 179 | 180 | ### Kafka 181 | 182 | | Parameter | Description | Default | 183 | | --------- | ----------- | ------- | 184 | | `kafka.bootstrapServers` | Bootstrap Servers for Kafka Connect | `""` | 185 | -------------------------------------------------------------------------------- /src/perf/cluster/charts/connect/helm-config.yaml: -------------------------------------------------------------------------------- 1 | # Default values for Kafka Connect. 2 | # This is a YAML-formatted file. 3 | # Declare variables to be passed into your templates. 4 | 5 | image: mssivamu/kafka-connect-cosmosdb 6 | imageTag: perf 7 | 8 | heapOptions: "-Xms512M -Xmx2g" 9 | 10 | resources: 11 | # Limits based on the Connect Performance Verification Guide defined in Page 19: 12 | # https://assets.confluent.io/m/28c7ffcc359a13c0/original/20200325-VIP_Connect-Verification_Guide.pdf 13 | limits: 14 | cpu: 2666m 15 | memory: 2Gi 16 | requests: 17 | cpu: 2666m 18 | memory: 2Gi 19 | 20 | prometheus: 21 | jmx: 22 | enabled: true 23 | 24 | kafka: 25 | bootstrapServers: "PLAINTEXT://kafka-cp-kafka-headless.kafka:9092" 26 | 27 | cp-schema-registry: 28 | url: "kafka-cp-schema-registry.kafka:8081" 29 | -------------------------------------------------------------------------------- /src/perf/cluster/charts/connect/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | This chart installs a Confluent Kafka Connect 2 | 3 | https://docs.confluent.io/current/connect/index.html -------------------------------------------------------------------------------- /src/perf/cluster/charts/connect/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "cp-kafka-connect.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "cp-kafka-connect.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "cp-kafka-connect.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | 34 | {{/* 35 | Create a default fully qualified kafka headless name. 36 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 37 | */}} 38 | {{- define "cp-kafka-connect.cp-kafka-headless.fullname" -}} 39 | {{- $name := "cp-kafka-headless" -}} 40 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 41 | {{- end -}} 42 | 43 | {{/* 44 | Form the Kafka URL. If Kafka is installed as part of this chart, use k8s service discovery, 45 | else use user-provided URL 46 | */}} 47 | {{- define "cp-kafka-connect.kafka.bootstrapServers" -}} 48 | {{- if .Values.kafka.bootstrapServers -}} 49 | {{- .Values.kafka.bootstrapServers -}} 50 | {{- else -}} 51 | {{- printf "PLAINTEXT://%s:9092" (include "cp-kafka-connect.cp-kafka-headless.fullname" .) -}} 52 | {{- end -}} 53 | {{- end -}} 54 | 55 | {{/* 56 | Create a default fully qualified schema registry name. 57 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 58 | */}} 59 | {{- define "cp-kafka-connect.cp-schema-registry.fullname" -}} 60 | {{- $name := default "cp-schema-registry" (index .Values "cp-schema-registry" "nameOverride") -}} 61 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 62 | {{- end -}} 63 | 64 | {{- define "cp-kafka-connect.cp-schema-registry.service-name" -}} 65 | {{- if (index .Values "cp-schema-registry" "url") -}} 66 | {{- printf "%s" (index .Values "cp-schema-registry" "url") -}} 67 | {{- else -}} 68 | {{- printf "http://%s:8081" (include "cp-kafka-connect.cp-schema-registry.fullname" .) -}} 69 | {{- end -}} 70 | {{- end -}} 71 | 72 | {{/* 73 | Default GroupId to Release Name but allow it to be overridden 74 | */}} 75 | {{- define "cp-kafka-connect.groupId" -}} 76 | {{- if .Values.overrideGroupId -}} 77 | {{- .Values.overrideGroupId -}} 78 | {{- else -}} 79 | {{- .Release.Name -}} 80 | {{- end -}} 81 | {{- end -}} 82 | -------------------------------------------------------------------------------- /src/perf/cluster/charts/connect/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Capabilities.APIVersions.Has "apps/v1" }} 2 | apiVersion: apps/v1 3 | {{- else }} 4 | apiVersion: apps/v1beta2 5 | {{- end }} 6 | kind: Deployment 7 | metadata: 8 | name: {{ template "cp-kafka-connect.fullname" . }} 9 | labels: 10 | app: {{ template "cp-kafka-connect.name" . }} 11 | chart: {{ template "cp-kafka-connect.chart" . }} 12 | release: {{ .Release.Name }} 13 | heritage: {{ .Release.Service }} 14 | spec: 15 | replicas: {{ .Values.replicaCount }} 16 | selector: 17 | matchLabels: 18 | app: {{ template "cp-kafka-connect.name" . }} 19 | release: {{ .Release.Name }} 20 | template: 21 | metadata: 22 | labels: 23 | app: {{ template "cp-kafka-connect.name" . }} 24 | release: {{ .Release.Name }} 25 | {{- if or .Values.podAnnotations .Values.prometheus.jmx.enabled }} 26 | annotations: 27 | {{- range $key, $value := .Values.podAnnotations }} 28 | {{ $key }}: {{ $value | quote }} 29 | {{- end }} 30 | {{- if .Values.prometheus.jmx.enabled }} 31 | prometheus.io/scrape: "true" 32 | prometheus.io/port: {{ .Values.prometheus.jmx.port | quote }} 33 | {{- end }} 34 | {{- end }} 35 | spec: 36 | containers: 37 | {{- if .Values.prometheus.jmx.enabled }} 38 | - name: prometheus-jmx-exporter 39 | image: "{{ .Values.prometheus.jmx.image }}:{{ .Values.prometheus.jmx.imageTag }}" 40 | imagePullPolicy: "{{ .Values.prometheus.jmx.imagePullPolicy }}" 41 | command: 42 | - java 43 | - -XX:+UnlockExperimentalVMOptions 44 | - -XX:+UseCGroupMemoryLimitForHeap 45 | - -XX:MaxRAMFraction=1 46 | - -XshowSettings:vm 47 | - -jar 48 | - jmx_prometheus_httpserver.jar 49 | - {{ .Values.prometheus.jmx.port | quote }} 50 | - /etc/jmx-kafka-connect/jmx-kafka-connect-prometheus.yml 51 | ports: 52 | - containerPort: {{ .Values.prometheus.jmx.port }} 53 | resources: 54 | {{ toYaml .Values.prometheus.jmx.resources | indent 12 }} 55 | volumeMounts: 56 | - name: jmx-config 57 | mountPath: /etc/jmx-kafka-connect 58 | {{- end }} 59 | - name: {{ template "cp-kafka-connect.name" . }}-server 60 | image: "{{ .Values.image }}:{{ .Values.imageTag }}" 61 | imagePullPolicy: "{{ .Values.imagePullPolicy }}" 62 | ports: 63 | - name: kafka-connect 64 | containerPort: {{ .Values.servicePort}} 65 | protocol: TCP 66 | {{- if .Values.prometheus.jmx.enabled }} 67 | - containerPort: {{ .Values.jmx.port }} 68 | name: jmx 69 | {{- end }} 70 | resources: 71 | {{ toYaml .Values.resources | indent 12 }} 72 | env: 73 | - name: CONNECT_REST_ADVERTISED_HOST_NAME 74 | valueFrom: 75 | fieldRef: 76 | fieldPath: status.podIP 77 | - name: CONNECT_BOOTSTRAP_SERVERS 78 | value: {{ template "cp-kafka-connect.kafka.bootstrapServers" . }} 79 | - name: CONNECT_GROUP_ID 80 | value: {{ template "cp-kafka-connect.groupId" . }} 81 | - name: CONNECT_CONFIG_STORAGE_TOPIC 82 | value: {{ template "cp-kafka-connect.fullname" . }}-config 83 | - name: CONNECT_OFFSET_STORAGE_TOPIC 84 | value: {{ template "cp-kafka-connect.fullname" . }}-offset 85 | - name: CONNECT_STATUS_STORAGE_TOPIC 86 | value: {{ template "cp-kafka-connect.fullname" . }}-status 87 | - name: CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL 88 | value: {{ template "cp-kafka-connect.cp-schema-registry.service-name" .}} 89 | - name: CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL 90 | value: {{ template "cp-kafka-connect.cp-schema-registry.service-name" .}} 91 | - name: KAFKA_HEAP_OPTS 92 | value: "{{ .Values.heapOptions }}" 93 | {{- range $key, $value := .Values.configurationOverrides }} 94 | - name: {{ printf "CONNECT_%s" $key | replace "." "_" | upper | quote }} 95 | value: {{ $value | quote }} 96 | {{- end }} 97 | {{- range $key, $value := .Values.customEnv }} 98 | - name: {{ $key | quote }} 99 | value: {{ $value | quote }} 100 | {{- end }} 101 | {{- if .Values.jmx.port }} 102 | - name: KAFKA_JMX_PORT 103 | value: "{{ .Values.jmx.port }}" 104 | {{- end }} 105 | {{- if .Values.customEnv.CUSTOM_SCRIPT_PATH }} 106 | command: 107 | - /bin/bash 108 | - -c 109 | - | 110 | /etc/confluent/docker/run & 111 | $CUSTOM_SCRIPT_PATH 112 | sleep infinity 113 | {{- if .Values.livenessProbe }} 114 | livenessProbe: 115 | {{ toYaml .Values.livenessProbe | trim | indent 12 }} 116 | {{- end }} 117 | {{- end }} 118 | {{- if .Values.volumeMounts }} 119 | volumeMounts: 120 | {{ toYaml .Values.volumeMounts | indent 10 }} 121 | {{- end}} 122 | {{- if .Values.imagePullSecrets }} 123 | imagePullSecrets: 124 | {{ toYaml .Values.imagePullSecrets | indent 8 }} 125 | {{- end }} 126 | volumes: 127 | {{- if .Values.volumes }} 128 | {{ toYaml .Values.volumes | trim | indent 6 }} 129 | {{- end}} 130 | {{- if .Values.prometheus.jmx.enabled }} 131 | - name: jmx-config 132 | configMap: 133 | name: {{ template "cp-kafka-connect.fullname" . }}-jmx-configmap 134 | {{- end }} 135 | {{- if .Values.nodeSelector }} 136 | nodeSelector: 137 | {{ toYaml .Values.nodeSelector | indent 8 }} 138 | {{- end }} 139 | {{- if .Values.tolerations }} 140 | tolerations: 141 | {{ toYaml .Values.tolerations | indent 8 }} 142 | {{- end }} 143 | {{- if .Values.affinity }} 144 | affinity: 145 | {{ toYaml .Values.affinity | indent 8 }} 146 | {{- end }} 147 | -------------------------------------------------------------------------------- /src/perf/cluster/charts/connect/templates/jmx-configmap.yaml: -------------------------------------------------------------------------------- 1 | {{- if and .Values.prometheus.jmx.enabled }} 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | name: {{ template "cp-kafka-connect.fullname" . }}-jmx-configmap 6 | labels: 7 | app: {{ template "cp-kafka-connect.name" . }} 8 | chart: {{ template "cp-kafka-connect.chart" . }} 9 | release: {{ .Release.Name }} 10 | heritage: {{ .Release.Service }} 11 | data: 12 | jmx-kafka-connect-prometheus.yml: |+ 13 | jmxUrl: service:jmx:rmi:///jndi/rmi://localhost:{{ .Values.jmx.port }}/jmxrmi 14 | lowercaseOutputName: true 15 | lowercaseOutputLabelNames: true 16 | ssl: false 17 | whitelistObjectNames: 18 | - kafka.connect:type=connect-worker-metrics 19 | - kafka.connect:type=connect-metrics,client-id=* 20 | - kafka.connect:type=connector-task-metrics,connector=*,task=* 21 | - kafka.connect:type=source-task-metrics,connector=*,task=* 22 | - kafka.connect:type=sink-task-metrics,connector=*,task=* 23 | - cosmos.kafka.connect:type=*,connector=*,task=* 24 | rules: 25 | - pattern : "kafka.connect([^:]+):" 26 | name: "cp_kafka_connect_connect_worker_metrics_$1" 27 | - pattern : "kafka.connect<>([^:]+)" 28 | name: "cp_kafka_connect_connect_metrics_$1_$2" 29 | - pattern : "kafka.connect<>status: ([^:]+)" 30 | name: "cp_kafka_connect_connect_connector_metrics" 31 | value: 1 32 | labels: 33 | connector: $1 34 | task: $2 35 | status: $3 36 | - pattern: kafka.connect<>([^:]+) 37 | name: cp_kafka_connect_$1_$4 38 | labels: 39 | connector: "$2" 40 | task: "$3" 41 | - pattern: cosmos.kafka.connect<>([^:]+) 42 | name: cosmos_kafka_connect_$1_$4 43 | labels: 44 | connector: "$2" 45 | task: "$3" 46 | {{- end }} 47 | -------------------------------------------------------------------------------- /src/perf/cluster/charts/connect/templates/secrets.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.secrets }} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: {{ template "cp-kafka-connect.fullname" . }} 6 | labels: 7 | app: {{ template "cp-kafka-connect.name" . }} 8 | chart: {{ template "cp-kafka-connect.chart" . }} 9 | release: {{ .Release.Name }} 10 | heritage: {{ .Release.Service }} 11 | type: Opaque 12 | data: 13 | {{- range $key, $value := .Values.secrets }} 14 | {{ $key }}: {{ $value | b64enc }} 15 | {{- end }} 16 | {{- end }} 17 | -------------------------------------------------------------------------------- /src/perf/cluster/charts/connect/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ template "cp-kafka-connect.fullname" . }} 5 | labels: 6 | app: {{ template "cp-kafka-connect.name" . }} 7 | chart: {{ template "cp-kafka-connect.chart" . }} 8 | release: {{ .Release.Name }} 9 | heritage: {{ .Release.Service }} 10 | spec: 11 | type: LoadBalancer 12 | ports: 13 | - name: kafka-connect 14 | port: {{ .Values.servicePort }} 15 | targetPort: 8083 16 | {{- if .Values.prometheus.jmx.enabled }} 17 | - name: metrics 18 | port: {{ .Values.prometheus.jmx.port }} 19 | {{- end }} 20 | selector: 21 | app: {{ template "cp-kafka-connect.name" . }} 22 | release: {{ .Release.Name }} 23 | -------------------------------------------------------------------------------- /src/perf/cluster/charts/connect/values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for cp-kafka-connect. 2 | # This is a YAML-formatted file. 3 | # Declare variables to be passed into your templates. 4 | 5 | replicaCount: 1 6 | 7 | ## Image Info 8 | ## ref: https://hub.docker.com/r/confluentinc/cp-kafka/ 9 | image: mssivamu/kafka-connect-cosmosdb 10 | imageTag: perf 11 | 12 | ## Specify a imagePullPolicy 13 | ## ref: http://kubernetes.io/docs/user-guide/images/#pre-pulling-images 14 | imagePullPolicy: Always 15 | 16 | ## Specify an array of imagePullSecrets. 17 | ## Secrets must be manually created in the namespace. 18 | ## ref: https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod 19 | imagePullSecrets: 20 | 21 | servicePort: 8083 22 | 23 | ## Kafka Connect properties 24 | ## ref: https://docs.confluent.io/current/connect/userguide.html#configuring-workers 25 | configurationOverrides: 26 | "plugin.path": "/usr/share/java,/usr/share/confluent-hub-components,/etc/kafka-connect/jars" 27 | "key.converter": "org.apache.kafka.connect.storage.StringConverter" 28 | "value.converter": "org.apache.kafka.connect.json.JsonConverter" 29 | "key.converter.schemas.enable": "false" 30 | "value.converter.schemas.enable": "false" 31 | "internal.key.converter": "org.apache.kafka.connect.storage.StringConverter" 32 | "internal.value.converter": "org.apache.kafka.connect.json.JsonConverter" 33 | "internal.value.converter.schemas.enable": "false" 34 | "internal.key.converter.schemas.enable": "false" 35 | "config.storage.replication.factor": "3" 36 | "offset.storage.replication.factor": "3" 37 | "status.storage.replication.factor": "3" 38 | "group.id": "cosmos-connect-cluster" 39 | "log4j.loggers": "org.apache.kafka=INFO,org.reflections=ERROR,com.azure.cosmos.kafka=DEBUG" 40 | "config.storage.topic": "cosmos-connect-configs" 41 | "offset.storage.topic": "cosmos-connect-offsets" 42 | 43 | ## Kafka Connect JVM Heap Option 44 | heapOptions: "-Xms512M -Xmx512M" 45 | 46 | ## Additional env variables 47 | customEnv: 48 | CLASSPATH: /usr/share/java/monitoring-interceptors/monitoring-interceptors-6.0.0.jar 49 | CONNECT_PRODUCER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringProducerInterceptor" 50 | CONNECT_CONSUMER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringConsumerInterceptor" 51 | 52 | resources: {} 53 | # We usually recommend not to specify default resources and to leave this as a conscious 54 | # choice for the user. This also increases chances charts run on environments with little 55 | # resources, such as Minikube. If you do want to specify resources, uncomment the following 56 | # lines, adjust them as necessary, and remove the curly braces after 'resources:'. 57 | # limits: 58 | # cpu: 100m 59 | # memory: 128Mi 60 | # requests: 61 | # cpu: 100m 62 | # memory: 128Mi 63 | 64 | ## Custom pod annotations 65 | podAnnotations: {} 66 | 67 | ## Node labels for pod assignment 68 | ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ 69 | nodeSelector: {} 70 | 71 | ## Taints to tolerate on node assignment: 72 | ## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ 73 | tolerations: [] 74 | 75 | ## Monitoring 76 | ## Kafka Connect JMX Settings 77 | ## ref: https://kafka.apache.org/documentation/#connect_monitoring 78 | jmx: 79 | port: 5555 80 | 81 | ## Prometheus Exporter Configuration 82 | ## ref: https://prometheus.io/docs/instrumenting/exporters/ 83 | prometheus: 84 | ## JMX Exporter Configuration 85 | ## ref: https://github.com/prometheus/jmx_exporter 86 | jmx: 87 | enabled: true 88 | image: solsson/kafka-prometheus-jmx-exporter@sha256 89 | imageTag: 6f82e2b0464f50da8104acd7363fb9b995001ddff77d248379f8788e78946143 90 | imagePullPolicy: IfNotPresent 91 | port: 5556 92 | 93 | ## Resources configuration for the JMX exporter container. 94 | ## See the `resources` documentation above for details. 95 | resources: {} 96 | 97 | ## You can list load balanced service endpoint, or list of all brokers (which is hard in K8s). e.g.: 98 | ## bootstrapServers: "PLAINTEXT://dozing-prawn-kafka-headless:9092" 99 | kafka: 100 | bootstrapServers: "" 101 | 102 | ## If the Kafka Chart is disabled a URL and port are required to connect 103 | ## e.g. gnoble-panther-cp-schema-registry:8081 104 | cp-schema-registry: 105 | url: "" 106 | 107 | ## List of volumeMounts for connect server container 108 | ## ref: https://kubernetes.io/docs/concepts/storage/volumes/ 109 | volumeMounts: 110 | # - name: credentials 111 | # mountPath: /etc/creds-volume 112 | 113 | ## List of volumeMounts for connect server container 114 | ## ref: https://kubernetes.io/docs/concepts/storage/volumes/ 115 | volumes: 116 | # - name: credentials 117 | # secret: 118 | # secretName: creds 119 | 120 | ## Secret with multiple keys to serve the purpose of multiple secrets 121 | ## Values for all the keys will be base64 encoded when the Secret is created or updated 122 | ## ref: https://kubernetes.io/docs/concepts/configuration/secret/ 123 | secrets: 124 | # username: kafka123 125 | # password: connect321 126 | 127 | ## These values are used only when "customEnv.CUSTOM_SCRIPT_PATH" is defined. 128 | ## "livenessProbe" is required only for the edge cases where the custom script to be ran takes too much time 129 | ## and errors by the ENTRYPOINT are ignored by the container 130 | ## As an example such a similar script is added to "cp-helm-charts/examples/create-connectors.sh" 131 | ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ 132 | livenessProbe: 133 | # httpGet: 134 | # path: /connectors 135 | # port: 8083 136 | # initialDelaySeconds: 30 137 | # periodSeconds: 5 138 | # failureThreshold: 10 139 | -------------------------------------------------------------------------------- /src/perf/cluster/charts/sink-perf/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: sink-perf 3 | description: A Helm chart for Kubernetes 4 | 5 | # A chart can be either an 'application' or a 'library' chart. 6 | # 7 | # Application charts are a collection of templates that can be packaged into versioned archives 8 | # to be deployed. 9 | # 10 | # Library charts provide useful utilities or functions for the chart developer. They're included as 11 | # a dependency of application charts to inject those utilities and functions into the rendering 12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed. 13 | type: application 14 | 15 | # This is the chart version. This version number should be incremented each time you make changes 16 | # to the chart and its templates, including the app version. 17 | # Versions are expected to follow Semantic Versioning (https://semver.org/) 18 | version: 0.1.0 19 | 20 | # This is the version number of the application being deployed. This version number should be 21 | # incremented each time you make changes to the application. Versions are not expected to 22 | # follow Semantic Versioning. They should reflect the version the application is using. 23 | appVersion: 1.16.0 24 | -------------------------------------------------------------------------------- /src/perf/cluster/charts/sink-perf/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Expand the name of the chart. 3 | */}} 4 | {{- define "sink-perf.name" -}} 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 6 | {{- end }} 7 | 8 | {{/* 9 | Create a default fully qualified app name. 10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 11 | If release name contains chart name it will be used as a full name. 12 | */}} 13 | {{- define "sink-perf.fullname" -}} 14 | {{- if .Values.fullnameOverride }} 15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 16 | {{- else }} 17 | {{- $name := default .Chart.Name .Values.nameOverride }} 18 | {{- if contains $name .Release.Name }} 19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 20 | {{- else }} 21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 22 | {{- end }} 23 | {{- end }} 24 | {{- end }} 25 | 26 | {{/* 27 | Create chart name and version as used by the chart label. 28 | */}} 29 | {{- define "sink-perf.chart" -}} 30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 31 | {{- end }} 32 | 33 | {{/* 34 | Common labels 35 | */}} 36 | {{- define "sink-perf.labels" -}} 37 | helm.sh/chart: {{ include "sink-perf.chart" . }} 38 | {{ include "sink-perf.selectorLabels" . }} 39 | {{- if .Chart.AppVersion }} 40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 41 | {{- end }} 42 | app.kubernetes.io/managed-by: {{ .Release.Service }} 43 | {{- end }} 44 | 45 | {{/* 46 | Selector labels 47 | */}} 48 | {{- define "sink-perf.selectorLabels" -}} 49 | app.kubernetes.io/name: {{ include "sink-perf.name" . }} 50 | app.kubernetes.io/instance: {{ .Release.Name }} 51 | {{- end }} 52 | 53 | {{/* 54 | Create the name of the service account to use 55 | */}} 56 | {{- define "sink-perf.serviceAccountName" -}} 57 | {{- if .Values.serviceAccount.create }} 58 | {{- default (include "sink-perf.fullname" .) .Values.serviceAccount.name }} 59 | {{- else }} 60 | {{- default "default" .Values.serviceAccount.name }} 61 | {{- end }} 62 | {{- end }} 63 | -------------------------------------------------------------------------------- /src/perf/cluster/charts/sink-perf/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ template "sink-perf.name" . }} 5 | labels: 6 | app: {{ template "sink-perf.name" . }} 7 | chart: {{ template "sink-perf.chart" . }} 8 | release: {{ .Release.Name }} 9 | heritage: {{ .Release.Service }} 10 | spec: 11 | replicas: {{ .Values.replicaCount }} 12 | selector: 13 | matchLabels: 14 | app: {{ template "sink-perf.name" . }} 15 | release: {{ .Release.Name }} 16 | template: 17 | metadata: 18 | labels: 19 | app: {{ template "sink-perf.name" . }} 20 | release: {{ .Release.Name }} 21 | spec: 22 | containers: 23 | - name: sink-perf-client 24 | image: mssivamu/kafka-sink-perf-cosmosdb:latest 25 | imagePullPolicy: Always 26 | env: 27 | - name: PAYLOAD_SIZE 28 | value: {{ .Values.params.payload }} 29 | args: 30 | - --topic 31 | - {{ .Values.params.topic }} 32 | - --num-records 33 | - {{ .Values.params.totalRecords | quote }} 34 | - --payload-file 35 | - "/etc/payload/{{ .Values.params.payload }}.txt" 36 | - --throughput 37 | - {{ .Values.params.throughput | quote }} 38 | - --producer-props 39 | - bootstrap.servers={{ .Values.params.brokerEndpoint }} 40 | - buffer.memory=67108864 41 | - batch.size=8196 42 | -------------------------------------------------------------------------------- /src/perf/cluster/charts/sink-perf/values.yaml: -------------------------------------------------------------------------------- 1 | replicaCount: 1 2 | 3 | params: 4 | topic: connect-test 5 | totalRecords: '10000000' 6 | throughput: 10 7 | # Valid values are: small (100b), medium (1kb) or large (50kb) 8 | payload: small 9 | brokerEndpoint: kafka-cp-kafka-headless.kafka:9092 10 | -------------------------------------------------------------------------------- /src/perf/cluster/manifests/kafka-client.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: kafka-client 5 | spec: 6 | containers: 7 | - name: kafka-client 8 | image: confluentinc/cp-kafka:6.0.1 9 | command: 10 | - sh 11 | - -c 12 | - "exec tail -f /dev/null" 13 | -------------------------------------------------------------------------------- /src/perf/cluster/manifests/kafka-helm-config.yaml: -------------------------------------------------------------------------------- 1 | # Default values for Kafka. 2 | # This is a YAML-formatted file. 3 | # Declare variables to be passed into your templates. 4 | 5 | cp-zookeeper: 6 | enabled: true 7 | servers: 3 8 | persistence: 9 | enabled: false 10 | 11 | cp-kafka: 12 | enabled: true 13 | servers: 3 14 | persistence: 15 | enabled: false 16 | 17 | cp-schema-registry: 18 | enabled: true 19 | 20 | cp-kafka-rest: 21 | enabled: true 22 | 23 | cp-kafka-connect: 24 | enabled: false 25 | 26 | cp-ksql-server: 27 | enabled: false 28 | 29 | cp-control-center: 30 | enabled: false 31 | -------------------------------------------------------------------------------- /src/perf/cluster/manifests/single-sink-perf-client.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: kafka-perf-client 5 | spec: 6 | containers: 7 | - name: kafka-perf-client 8 | image: mssivamu/kafka-sink-perf-cosmosdb:latest 9 | args: 10 | - --topic 11 | - sink 12 | - --num-records 13 | - '4500' 14 | - --payload-file 15 | - /etc/payload/small.txt 16 | - --throughput 17 | - '15' 18 | - --producer-props 19 | - bootstrap.servers=kafka-cp-kafka-headless:9092 20 | - buffer.memory=67108864 21 | - batch.size=8196 22 | restartPolicy: Never 23 | -------------------------------------------------------------------------------- /src/perf/perf-driver.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | while getopts :n:g:t:s:p:d:l: option 3 | do 4 | case "${option}" in 5 | n) DB_NAME=${OPTARG};; 6 | g) DB_RG=${OPTARG};; 7 | t) DB_THROUGHPUT=${OPTARG};; 8 | s) SOURCE_SINK=${OPTARG};; 9 | p) NUM_PARTITIONS=${OPTARG};; 10 | d) TEST_DURATION=${OPTARG};; 11 | l) LOAD_THROUGHPUT=${OPTARG};; 12 | *) echo "Please refer to usage guide in the README." >&2 13 | exit 1 ;; 14 | esac 15 | done 16 | 17 | setup_cosmos_db() { 18 | SQL_DB_NAME="kafkaconnect" 19 | SQL_CTR_NAME="kafka" 20 | 21 | az cosmosdb sql database delete -a $DB_NAME -n $SQL_DB_NAME -g $DB_RG -y > /dev/null 2>&1 22 | az cosmosdb sql database create -a $DB_NAME -n $SQL_DB_NAME -g $DB_RG --throughput $DB_THROUGHPUT > /dev/null 2>&1 23 | az cosmosdb sql container create -p /id -g $DB_RG -a $DB_NAME -d $SQL_DB_NAME -n $SQL_CTR_NAME > /dev/null 2>&1 24 | 25 | # Create lease container if running source test 26 | if [[ "$SOURCE_SINK" == "source" ]]; then 27 | az cosmosdb sql container create -p /id -g $DB_RG -a $DB_NAME -d $SQL_DB_NAME -n $SQL_CTR_NAME-leases > /dev/null 2>&1 28 | fi 29 | } 30 | 31 | cleanup() { 32 | kubectl delete pod kafka-client -n kafka > /dev/null 2>&1 33 | helm delete sink-perf -n perf > /dev/null 2>&1 34 | helm delete connect -n connect > /dev/null 2>&1 35 | helm delete kafka -n kafka > /dev/null 2>&1 36 | } 37 | 38 | # Validation 39 | if ! az cosmosdb show -n $DB_NAME -g $DB_RG > /dev/null 2>&1; then 40 | echo "ERROR: cannot find Cosmos DB instance. Make sure you're signed in and/or the Cosmos DB instance exists." 41 | exit 1 42 | fi 43 | 44 | if [[ "$SOURCE_SINK" != "sink" && "$SOURCE_SINK" != "source" ]]; then 45 | echo "Please provide 'sink' or 'source' as the argument for -s". 46 | exit 1 47 | fi 48 | 49 | if [[ "$SOURCE_SINK" == "sink" && ! -f "cluster/manifests/sink.json" ]]; then 50 | echo "Sink Connector config file (sink.json) does not exist in cluster/manifests." 51 | exit 1 52 | elif [[ "$SOURCE_SINK" == "source" && ! -f "cluster/manifests/source.json" ]]; then 53 | echo "Source Connector config file (source.json) does not exist in cluster/manifests." 54 | exit 1 55 | fi 56 | 57 | case $TEST_DURATION in 58 | ''|*[!0-9]*) echo "Please provide a number for test duration (arg -d)"; exit 1; ;; 59 | esac 60 | case $LOAD_THROUGHPUT in 61 | ''|*[!0-9]*) echo "Please provide a number for load throughput (arg -l)"; exit 1; ;; 62 | esac 63 | 64 | # Clean up any existing helm releases 65 | echo "Cleaning up any existing kafka helm releases" 66 | cleanup 67 | while [[ ! -z $(kubectl get pods -n kafka | grep kafka) ]]; 68 | do 69 | echo "Sleeping for 10 seconds. Waiting for kafka pods to go down.." 70 | sleep 10s 71 | done 72 | 73 | # Setup Kafka Server, Connect Cluster, Cosmos DB resources, Kafka Topic 74 | echo "Setting up Kafka Server and Client" 75 | cd cluster/manifests 76 | helm install kafka confluentinc/cp-helm-charts -f kafka-helm-config.yaml -n kafka > /dev/null 2>&1 77 | kubectl apply -f kafka-client.yaml -n kafka > /dev/null 2>&1 78 | sleep 5s 79 | 80 | echo "Setting up Kafka Connect Cluster" 81 | cd ../charts/ 82 | helm install connect ./connect -f ./connect/helm-config.yaml -n connect --set replicaCount=3 > /dev/null 2>&1 83 | 84 | echo "Creating new Cosmos DB SQL Database and Container" 85 | setup_cosmos_db 86 | 87 | echo "Creating new kafka topic for sink connector" 88 | SINK_KAFKA_TOPIC="sink-test" 89 | kubectl exec -it kafka-client -n kafka -- kafka-topics --zookeeper kafka-cp-zookeeper:2181 --create --topic $SINK_KAFKA_TOPIC --partitions $NUM_PARTITIONS --replication-factor 3 90 | 91 | # Setup connectors, sink and source if source type is specified 92 | echo "Creating new Cosmos DB Connectors" 93 | cd ../manifests/ 94 | CONNECT_PIP=$(kubectl get svc -n connect -l app=cp-kafka-connect -o jsonpath='{.items[0].status.loadBalancer.ingress[0].ip}') 95 | echo "Creating new Sink Connector" 96 | curl -H "Content-Type: application/json" -X POST -d @sink.json http://$CONNECT_PIP:8083/connectors > /dev/null 2>&1 97 | if [[ "$SOURCE_SINK" == "source" ]]; then 98 | echo "Creating new Source Connector" 99 | curl -H "Content-Type: application/json" -X POST -d @source.json http://$CONNECT_PIP:8083/connectors > /dev/null 2>&1 100 | fi 101 | sleep 20s 102 | cd ../charts/ 103 | 104 | # Begin driving the load client and start the test 105 | echo "Starting performance test."; date 106 | 107 | TOTAL_RECORDS=$(($LOAD_THROUGHPUT*$TEST_DURATION)) 108 | helm install sink-perf ./sink-perf -n perf --set params.topic=$SINK_KAFKA_TOPIC --set params.throughput="$LOAD_THROUGHPUT" --set params.totalRecords="$TOTAL_RECORDS" > /dev/null 2>&1 109 | sleep $TEST_DURATION 110 | 111 | echo "Stopping performance test."; date 112 | 113 | # Cleanup Cosmos, Kafka pods 114 | echo -e "Cleaning up resources...\n" 115 | az cosmosdb sql database delete -a $DB_NAME -n $SQL_DB_NAME -g $DB_RG -y > /dev/null 2>&1 116 | cleanup 117 | -------------------------------------------------------------------------------- /src/test/.gitignore: -------------------------------------------------------------------------------- 1 | # ignore test resource configs 2 | resources/*.config.json 3 | -------------------------------------------------------------------------------- /src/test/java/com/azure/cosmos/kafka/connect/CosmosConfigTest.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect; 5 | 6 | import org.junit.Test; 7 | 8 | import java.util.HashMap; 9 | 10 | import static org.junit.Assert.assertEquals; 11 | import static org.junit.Assert.assertNull; 12 | 13 | public class CosmosConfigTest { 14 | private static final String COSMOS_URL = "https://.documents.azure.com:443/"; 15 | 16 | public static HashMap setupConfigsWithProvider() { 17 | HashMap configs = new HashMap<>(); 18 | configs.put(CosmosDBConfig.COSMOS_CONN_ENDPOINT_CONF, COSMOS_URL); 19 | configs.put(CosmosDBConfig.COSMOS_CONN_KEY_CONF, "mykey"); 20 | configs.put(CosmosDBConfig.COSMOS_DATABASE_NAME_CONF, "mydb"); 21 | configs.put(CosmosDBConfig.COSMOS_CONTAINER_TOPIC_MAP_CONF, "mytopic5#mycontainer6"); 22 | configs.put(CosmosDBConfig.COSMOS_PROVIDER_NAME_CONF, "myprovider"); 23 | 24 | return configs; 25 | } 26 | 27 | public static HashMap setupConfigs() { 28 | HashMap configs = new HashMap<>(); 29 | configs.put(CosmosDBConfig.COSMOS_CONN_ENDPOINT_CONF, COSMOS_URL); 30 | configs.put(CosmosDBConfig.COSMOS_CONN_KEY_CONF, "mykey"); 31 | configs.put(CosmosDBConfig.COSMOS_DATABASE_NAME_CONF, "mydb"); 32 | configs.put(CosmosDBConfig.COSMOS_CONTAINER_TOPIC_MAP_CONF, "mytopic5#mycontainer6"); 33 | 34 | return configs; 35 | } 36 | 37 | @Test 38 | public void shouldHaveDefaultValues() { 39 | // Adding required Configuration with no default value. 40 | CosmosDBConfig config = new CosmosDBConfig(setupConfigs()); 41 | assertNull("Provider Name should be null unless set", config.getProviderName()); 42 | } 43 | 44 | @Test 45 | public void shouldAcceptValidConfig() { 46 | // Adding required Configuration with no default value. 47 | CosmosDBConfig config = new CosmosDBConfig(setupConfigsWithProvider()); 48 | assertEquals("myprovider", config.getProviderName()); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/test/java/com/azure/cosmos/kafka/connect/IntegrationTest.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect; 5 | 6 | /** 7 | * To be used with JUnit's {@link org.junit.experimental.categories.Category} to identify integration tests. 8 | */ 9 | public interface IntegrationTest { 10 | } 11 | -------------------------------------------------------------------------------- /src/test/java/com/azure/cosmos/kafka/connect/TopicContainerMapTest.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect; 5 | 6 | import org.junit.Test; 7 | 8 | import static org.junit.Assert.assertEquals; 9 | import static org.junit.Assert.assertNotNull; 10 | 11 | public class TopicContainerMapTest { 12 | 13 | @Test 14 | public void testPopulateOneItem() { 15 | final String topic = "topic6325"; 16 | final String container = "container61616"; 17 | TopicContainerMap map = TopicContainerMap.deserialize(topic + "#" + container); 18 | assertEquals(topic, map.getTopicForContainer(container).get()); 19 | assertEquals(container, map.getContainerForTopic(topic).get()); 20 | } 21 | 22 | @Test 23 | public void testSerializeEmpty() { 24 | String result = TopicContainerMap.empty().serialize(); 25 | assertNotNull(result); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/test/java/com/azure/cosmos/kafka/connect/sink/CosmosDBSinkConfigTest.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink; 5 | 6 | import com.azure.cosmos.kafka.connect.sink.id.strategy.AbstractIdStrategyConfig; 7 | import com.azure.cosmos.kafka.connect.sink.id.strategy.KafkaMetadataStrategy; 8 | import org.apache.kafka.common.config.ConfigException; 9 | import org.junit.Test; 10 | 11 | import java.util.HashMap; 12 | 13 | import static org.junit.Assert.*; 14 | 15 | public class CosmosDBSinkConfigTest { 16 | 17 | private static final String COSMOS_URL = "https://.documents.azure.com:443/"; 18 | private static final String COSMOS_KEY = "mykey"; 19 | private static final String COSMOS_DATABASE_NAME = "mydb"; 20 | private static final String COSMOS_CONTAINER_NAME = "mycontainer"; 21 | private static final String TOPIC_NAME = "mytopic"; 22 | private static final int DEFAULT_MAX_RETRy = 10; 23 | private static final boolean DEFAULT_GATEWAY_MODE_ENABLED = false; 24 | private static final boolean DEFAULT_CONNECTION_SHARING_ENABLED = false; 25 | 26 | public static HashMap setupConfigs() { 27 | HashMap configs = new HashMap<>(); 28 | configs.put(CosmosDBSinkConfig.COSMOS_CONN_ENDPOINT_CONF, COSMOS_URL); 29 | configs.put(CosmosDBSinkConfig.COSMOS_CONN_KEY_CONF, COSMOS_KEY); 30 | configs.put(CosmosDBSinkConfig.COSMOS_DATABASE_NAME_CONF, COSMOS_DATABASE_NAME); 31 | configs.put(CosmosDBSinkConfig.COSMOS_CONTAINER_TOPIC_MAP_CONF, TOPIC_NAME + "#" + COSMOS_CONTAINER_NAME); 32 | configs.put(AbstractIdStrategyConfig.ID_STRATEGY, KafkaMetadataStrategy.class.getName()); 33 | return configs; 34 | } 35 | 36 | @Test 37 | public void shouldAcceptValidConfig() { 38 | // Adding required Configuration with no default value. 39 | CosmosDBSinkConfig config = new CosmosDBSinkConfig(setupConfigs()); 40 | assertNotNull(config); 41 | assertEquals(COSMOS_URL, config.getConnEndpoint()); 42 | assertEquals(COSMOS_KEY, config.getConnKey()); 43 | assertEquals(COSMOS_DATABASE_NAME, config.getDatabaseName()); 44 | assertEquals(COSMOS_CONTAINER_NAME, config.getTopicContainerMap().getContainerForTopic(TOPIC_NAME).get()); 45 | } 46 | 47 | @Test 48 | public void shouldThrowExceptionWhenCosmosEndpointNotGiven() { 49 | // Adding required Configuration with no default value. 50 | HashMap settings = setupConfigs(); 51 | settings.remove(CosmosDBSinkConfig.COSMOS_CONN_ENDPOINT_CONF); 52 | assertThrows(ConfigException.class, () -> { 53 | new CosmosDBSinkConfig(settings); 54 | }); 55 | } 56 | 57 | @Test 58 | public void shouldThrowExceptionWhenRequiredFieldsEmpty() { 59 | HashMap settings = new HashMap<>(); 60 | settings.put(CosmosDBSinkConfig.COSMOS_CONN_ENDPOINT_CONF, ""); 61 | settings.put(CosmosDBSinkConfig.COSMOS_DATABASE_NAME_CONF, ""); 62 | settings.put(CosmosDBSinkConfig.COSMOS_CONTAINER_TOPIC_MAP_CONF, ""); 63 | 64 | assertThrows(ConfigException.class, () -> { 65 | new CosmosDBSinkConfig(settings); 66 | }); 67 | } 68 | 69 | @Test 70 | public void bulkModeTest() { 71 | HashMap settings = setupConfigs(); 72 | 73 | // validate by default bulk mode is enabled 74 | CosmosDBSinkConfig config = new CosmosDBSinkConfig(settings); 75 | assertTrue(config.isBulkModeEnabled()); 76 | 77 | // validate bulk mode is false 78 | settings.put(CosmosDBSinkConfig.COSMOS_SINK_BULK_ENABLED_CONF, "false"); 79 | config = new CosmosDBSinkConfig(settings); 80 | assertFalse(config.isBulkModeEnabled()); 81 | } 82 | 83 | @Test 84 | public void maxRetryCountTest() { 85 | HashMap settings = setupConfigs(); 86 | 87 | // validate default max retry count 88 | CosmosDBSinkConfig config = new CosmosDBSinkConfig(settings); 89 | assertEquals(config.getMaxRetryCount(), DEFAULT_MAX_RETRy); 90 | 91 | // validate configured max retry count 92 | settings.put(CosmosDBSinkConfig.COSMOS_SINK_MAX_RETRY_COUNT, "3"); 93 | config = new CosmosDBSinkConfig(settings); 94 | assertEquals(config.getMaxRetryCount(), 3); 95 | } 96 | 97 | @Test 98 | public void gatewayModeEnabledTest() { 99 | HashMap settings = setupConfigs(); 100 | 101 | // validate default max retry count 102 | CosmosDBSinkConfig config = new CosmosDBSinkConfig(settings); 103 | assertEquals(config.isGatewayModeEnabled(), DEFAULT_GATEWAY_MODE_ENABLED); 104 | 105 | // validate configured max retry count 106 | settings.put(CosmosDBSinkConfig.COSMOS_GATEWAY_MODE_ENABLED, "true"); 107 | config = new CosmosDBSinkConfig(settings); 108 | assertEquals(config.isGatewayModeEnabled(), true); 109 | } 110 | 111 | @Test 112 | public void connectionSharingEnabledTest() { 113 | HashMap settings = setupConfigs(); 114 | 115 | // validate default max retry count 116 | CosmosDBSinkConfig config = new CosmosDBSinkConfig(settings); 117 | assertEquals(config.isConnectionSharingEnabled(), DEFAULT_CONNECTION_SHARING_ENABLED); 118 | 119 | // validate configured max retry count 120 | settings.put(CosmosDBSinkConfig.COSMOS_CONNECTION_SHARING_ENABLED, "true"); 121 | config = new CosmosDBSinkConfig(settings); 122 | assertEquals(config.isConnectionSharingEnabled(), true); 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/test/java/com/azure/cosmos/kafka/connect/sink/CosmosDBSinkConnectorConfigTest.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink; 5 | 6 | import org.apache.kafka.common.config.ConfigDef; 7 | import org.junit.Test; 8 | 9 | import java.util.List; 10 | import java.util.Map; 11 | import java.util.Set; 12 | 13 | import static org.junit.Assert.*; 14 | 15 | /** 16 | * Tests the configuration of Sink Connector 17 | */ 18 | public class CosmosDBSinkConnectorConfigTest { 19 | 20 | @Test 21 | public void testConfig() { 22 | ConfigDef configDef = new CosmosDBSinkConnector().config(); 23 | assertNotNull(configDef); 24 | 25 | //Ensure all settings are represented 26 | CosmosDBSinkConfig config = new CosmosDBSinkConfig(CosmosDBSinkConfigTest.setupConfigs()); 27 | Set allSettingsNames = config.values().keySet(); 28 | assertEquals("Not all settings are represented", allSettingsNames, configDef.names()); 29 | } 30 | 31 | @Test 32 | public void testAbsentDefaults() { 33 | //Database name does not have a default setting. Let's see if the configdef does 34 | assertNull(new CosmosDBSinkConnector().config().defaultValues() 35 | .get(CosmosDBSinkConfig.COSMOS_DATABASE_NAME_CONF)); 36 | } 37 | 38 | @Test 39 | public void testTaskConfigs(){ 40 | Map settingAssignment = CosmosDBSinkConfigTest.setupConfigs(); 41 | CosmosDBSinkConnector sinkConnector = new CosmosDBSinkConnector(); 42 | sinkConnector.start(settingAssignment); 43 | List> taskConfigs = sinkConnector.taskConfigs(3); 44 | assertEquals(3, taskConfigs.size()); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/test/java/com/azure/cosmos/kafka/connect/sink/CosmosDBSinkConnectorTest.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink; 5 | 6 | import com.azure.cosmos.kafka.connect.CosmosDBConfig.CosmosClientBuilder; 7 | import org.apache.kafka.common.config.Config; 8 | import org.apache.kafka.common.config.ConfigValue; 9 | import org.junit.Test; 10 | import org.mockito.MockedStatic; 11 | 12 | import java.util.Collections; 13 | import java.util.List; 14 | import java.util.Map; 15 | import java.util.stream.Collectors; 16 | 17 | import static org.hamcrest.CoreMatchers.not; 18 | import static org.hamcrest.MatcherAssert.assertThat; 19 | import static org.hamcrest.collection.IsEmptyCollection.empty; 20 | import static org.mockito.AdditionalAnswers.answerVoid; 21 | import static org.mockito.ArgumentMatchers.anyString; 22 | import static org.mockito.Mockito.mockStatic; 23 | 24 | public class CosmosDBSinkConnectorTest { 25 | 26 | @Test 27 | public void testValidateEmptyConfigFailsRequiredFields() { 28 | Config config = new CosmosDBSinkConnector().validate(Collections.emptyMap()); 29 | 30 | Map> errorMessages = config.configValues().stream() 31 | .collect(Collectors.toMap(ConfigValue::name, ConfigValue::errorMessages)); 32 | assertThat(errorMessages.get(CosmosDBSinkConfig.COSMOS_CONN_ENDPOINT_CONF), not(empty())); 33 | assertThat(errorMessages.get(CosmosDBSinkConfig.COSMOS_CONN_KEY_CONF), not(empty())); 34 | assertThat(errorMessages.get(CosmosDBSinkConfig.COSMOS_DATABASE_NAME_CONF), not(empty())); 35 | assertThat(errorMessages.get(CosmosDBSinkConfig.COSMOS_CONTAINER_TOPIC_MAP_CONF), not(empty())); 36 | } 37 | 38 | @Test 39 | public void testValidateCannotConnectToCosmos() { 40 | CosmosDBSinkConnector connector = new CosmosDBSinkConnector(); 41 | 42 | try (MockedStatic cosmosDBClient 43 | = mockStatic(CosmosClientBuilder.class)) { 44 | 45 | cosmosDBClient 46 | .when(() -> CosmosClientBuilder.createClient(anyString(), anyString())) 47 | .thenThrow(IllegalArgumentException.class); 48 | 49 | Config config = connector.validate(Map.of( 50 | CosmosDBSinkConfig.COSMOS_CONN_ENDPOINT_CONF, "https://endpoint:port/", 51 | CosmosDBSinkConfig.COSMOS_CONN_KEY_CONF, "superSecretPassword", 52 | CosmosDBSinkConfig.COSMOS_DATABASE_NAME_CONF, "superAwesomeDatabase", 53 | CosmosDBSinkConfig.COSMOS_PROVIDER_NAME_CONF, "superAwesomeProvider", 54 | CosmosDBSinkConfig.COSMOS_CONTAINER_TOPIC_MAP_CONF, "topic#container" 55 | )); 56 | Map> errorMessages = config.configValues().stream() 57 | .collect(Collectors.toMap(ConfigValue::name, ConfigValue::errorMessages)); 58 | assertThat(errorMessages.get(CosmosDBSinkConfig.COSMOS_CONN_ENDPOINT_CONF), not(empty())); 59 | assertThat(errorMessages.get(CosmosDBSinkConfig.COSMOS_CONN_KEY_CONF), not(empty())); 60 | } 61 | } 62 | 63 | @Test 64 | public void testValidateHappyPath() { 65 | CosmosDBSinkConnector connector = new CosmosDBSinkConnector(); 66 | 67 | try (MockedStatic cosmosDBClient 68 | = mockStatic(CosmosClientBuilder.class)) { 69 | cosmosDBClient 70 | .when(() -> CosmosClientBuilder.createClient(anyString(), anyString())) 71 | .then(answerVoid((s1, s2) -> { 72 | })); 73 | 74 | Config config = connector.validate(Map.of( 75 | CosmosDBSinkConfig.COSMOS_CONN_ENDPOINT_CONF, 76 | "https://cosmos-instance.documents.azure.com:443/", 77 | CosmosDBSinkConfig.COSMOS_CONN_KEY_CONF, "superSecretPassword", 78 | CosmosDBSinkConfig.COSMOS_DATABASE_NAME_CONF, "superAwesomeDatabase", 79 | CosmosDBSinkConfig.COSMOS_PROVIDER_NAME_CONF, "superAwesomeProvider", 80 | CosmosDBSinkConfig.COSMOS_CONTAINER_TOPIC_MAP_CONF, "topic#container" 81 | )); 82 | for (ConfigValue value : config.configValues()) { 83 | assertThat("Expecting empty error message for config " + value.name(), 84 | value.errorMessages(), empty()); 85 | } 86 | } 87 | } 88 | 89 | @Test 90 | public void testValidateTopicMapValidFormat() { 91 | try (MockedStatic cosmosDBConfig 92 | = mockStatic(CosmosClientBuilder.class)) { 93 | 94 | cosmosDBConfig 95 | .when(() -> CosmosClientBuilder.createClient(anyString(), anyString())) 96 | .then(answerVoid((s1, s2) -> {})); 97 | 98 | CosmosDBSinkConnector connector = new CosmosDBSinkConnector(); 99 | 100 | invalidTopicMapString(connector, "topicOnly"); 101 | invalidTopicMapString(connector, "#containerOnly"); 102 | invalidTopicMapString(connector, ",,,,,"); 103 | invalidTopicMapString(connector, "###"); 104 | invalidTopicMapString(connector, "partially#correct,but,not#entirely"); 105 | } 106 | } 107 | 108 | private void invalidTopicMapString(CosmosDBSinkConnector connector, String topicMapConfig) { 109 | Config config = connector.validate(Map.of( 110 | CosmosDBSinkConfig.COSMOS_CONN_ENDPOINT_CONF, "https://endpoint:port/", 111 | CosmosDBSinkConfig.COSMOS_CONN_KEY_CONF, "superSecretPassword", 112 | CosmosDBSinkConfig.COSMOS_DATABASE_NAME_CONF, "superAwesomeDatabase", 113 | CosmosDBSinkConfig.COSMOS_CONTAINER_TOPIC_MAP_CONF, topicMapConfig 114 | )); 115 | Map> errorMessages = config.configValues().stream() 116 | .collect(Collectors.toMap(ConfigValue::name, ConfigValue::errorMessages)); 117 | assertThat(errorMessages.get(CosmosDBSinkConfig.COSMOS_CONTAINER_TOPIC_MAP_CONF), not(empty())); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /src/test/java/com/azure/cosmos/kafka/connect/sink/CosmosDBSinkTaskTestNotFails.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink; 5 | 6 | import com.azure.cosmos.CosmosClient; 7 | import com.azure.cosmos.CosmosContainer; 8 | import com.azure.cosmos.CosmosDatabase; 9 | import com.azure.cosmos.implementation.BadRequestException; 10 | import com.fasterxml.jackson.core.JsonProcessingException; 11 | import org.apache.commons.lang3.reflect.FieldUtils; 12 | import org.apache.kafka.connect.data.ConnectSchema; 13 | import org.apache.kafka.connect.data.Schema; 14 | import org.apache.kafka.connect.sink.ErrantRecordReporter; 15 | import org.apache.kafka.connect.sink.SinkRecord; 16 | import org.apache.kafka.connect.sink.SinkTaskContext; 17 | import org.junit.After; 18 | import org.junit.Before; 19 | import org.junit.Test; 20 | import org.mockito.Mockito; 21 | 22 | import java.util.List; 23 | import java.util.Map; 24 | 25 | import static org.mockito.Mockito.any; 26 | import static org.mockito.Mockito.anyString; 27 | import static org.mockito.Mockito.times; 28 | import static org.mockito.Mockito.verify; 29 | import static org.mockito.Mockito.when; 30 | 31 | public class CosmosDBSinkTaskTestNotFails { 32 | private final String topicName = "testtopic"; 33 | private final String containerName = "container666"; 34 | private final String databaseName = "fakeDatabase312"; 35 | private CosmosDBSinkTask testTask; 36 | private CosmosClient mockCosmosClient; 37 | private CosmosContainer mockContainer; 38 | private SinkTaskContext mockContext = Mockito.mock(SinkTaskContext.class); 39 | private ErrantRecordReporter mockErrantReporter = Mockito.mock(ErrantRecordReporter.class); 40 | 41 | @Before 42 | public void setup() throws IllegalAccessException { 43 | testTask = new CosmosDBSinkTask(); 44 | 45 | //Configure settings 46 | Map settingAssignment = CosmosDBSinkConfigTest.setupConfigs(); 47 | settingAssignment.put(CosmosDBSinkConfig.COSMOS_CONTAINER_TOPIC_MAP_CONF, topicName + "#" + containerName); 48 | settingAssignment.put(CosmosDBSinkConfig.COSMOS_DATABASE_NAME_CONF, databaseName); 49 | settingAssignment.put(CosmosDBSinkConfig.TOLERANCE_ON_ERROR_CONFIG, "all"); 50 | settingAssignment.put(CosmosDBSinkConfig.COSMOS_SINK_BULK_ENABLED_CONF, "false"); 51 | CosmosDBSinkConfig config = new CosmosDBSinkConfig(settingAssignment); 52 | FieldUtils.writeField(testTask, "config", config, true); 53 | 54 | //Mock the Cosmos SDK 55 | mockCosmosClient = Mockito.mock(CosmosClient.class); 56 | CosmosDatabase mockDatabase = Mockito.mock(CosmosDatabase.class); 57 | when(mockCosmosClient.getDatabase(anyString())).thenReturn(mockDatabase); 58 | mockContainer = Mockito.mock(CosmosContainer.class); 59 | when(mockDatabase.getContainer(any())).thenReturn(mockContainer); 60 | when(mockContext.errantRecordReporter()).thenReturn(mockErrantReporter); 61 | 62 | FieldUtils.writeField(testTask, "client", mockCosmosClient, true); 63 | } 64 | 65 | @After() 66 | public void resetContext() throws IllegalAccessException { 67 | FieldUtils.writeField(testTask, "context", null, true); 68 | } 69 | 70 | @Test 71 | public void testPutMapThatFailsDoesNotStopTask() throws JsonProcessingException, IllegalAccessException { 72 | 73 | Schema stringSchema = new ConnectSchema(Schema.Type.STRING); 74 | Schema mapSchema = new ConnectSchema(Schema.Type.MAP); 75 | when(mockContainer.upsertItem(any())).thenThrow(new BadRequestException("Something")); 76 | SinkRecord record = new SinkRecord(topicName, 1, stringSchema, "nokey", mapSchema, "{", 0L); 77 | testTask.put(List.of(record)); 78 | } 79 | 80 | @Test 81 | public void testPutMapThatFailsDoesNotStopTaskWithdlq() throws JsonProcessingException, IllegalAccessException { 82 | FieldUtils.writeField(testTask, "context", mockContext, true); 83 | Schema stringSchema = new ConnectSchema(Schema.Type.STRING); 84 | Schema mapSchema = new ConnectSchema(Schema.Type.MAP); 85 | when(mockContainer.upsertItem(any())).thenThrow(new BadRequestException("Something")); 86 | SinkRecord record = new SinkRecord(topicName, 1, stringSchema, "nokey", mapSchema, "{", 0L); 87 | testTask.put(List.of(record)); 88 | verify(mockContext.errantRecordReporter(), times(1)).report(any(), any()); 89 | } 90 | } 91 | 92 | -------------------------------------------------------------------------------- /src/test/java/com/azure/cosmos/kafka/connect/sink/PointWriterTest.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink; 5 | 6 | import com.azure.cosmos.CosmosContainer; 7 | import com.azure.cosmos.CosmosException; 8 | import com.azure.cosmos.implementation.BadRequestException; 9 | import com.azure.cosmos.implementation.HttpConstants; 10 | import com.azure.cosmos.implementation.RequestTimeoutException; 11 | import com.azure.cosmos.models.CosmosItemResponse; 12 | import org.apache.kafka.connect.data.ConnectSchema; 13 | import org.apache.kafka.connect.data.Schema; 14 | import org.apache.kafka.connect.sink.SinkRecord; 15 | import org.junit.Before; 16 | import org.junit.Test; 17 | import org.mockito.Mockito; 18 | 19 | import java.util.Arrays; 20 | import java.util.HashMap; 21 | import java.util.Map; 22 | import java.util.UUID; 23 | 24 | import static junit.framework.Assert.assertTrue; 25 | import static junit.framework.TestCase.assertEquals; 26 | import static org.mockito.Mockito.times; 27 | import static org.mockito.Mockito.verify; 28 | 29 | public class PointWriterTest { 30 | private final int MAX_RETRY_COUNT = 2; 31 | private final String TOPIC_NAME = "testtopic"; 32 | 33 | private CosmosContainer container; 34 | private PointWriter pointWriter; 35 | 36 | @Before 37 | public void setup(){ 38 | container = Mockito.mock(CosmosContainer.class); 39 | pointWriter = new PointWriter(container, MAX_RETRY_COUNT); 40 | } 41 | 42 | @Test 43 | public void testPointWriteSuccess() { 44 | SinkRecord record1 = createSinkRecord(); 45 | SinkRecord record2 = createSinkRecord(); 46 | CosmosItemResponse itemResponse = Mockito.mock(CosmosItemResponse.class); 47 | 48 | Mockito.when(container.upsertItem(record1.value())).thenReturn(itemResponse); 49 | Mockito.when(container.upsertItem(record2.value())).thenReturn(itemResponse); 50 | 51 | SinkWriteResponse response = pointWriter.write(Arrays.asList(record1, record2)); 52 | assertEquals(2, response.getSucceededRecords().size()); 53 | assertEquals(record1, response.getSucceededRecords().get(0)); 54 | assertEquals(record2, response.getSucceededRecords().get(1)); 55 | assertEquals(0, response.getFailedRecordResponses().size()); 56 | } 57 | 58 | @Test 59 | public void testPointWriteWithNonTransientException() { 60 | SinkRecord record1 = createSinkRecord(); 61 | SinkRecord record2 = createSinkRecord(); 62 | 63 | CosmosItemResponse itemResponse = Mockito.mock(CosmosItemResponse.class); 64 | Mockito.when(container.upsertItem(record1.value())).thenReturn(itemResponse); 65 | Mockito.when(container.upsertItem(record2.value())).thenThrow(new BadRequestException("Test")); 66 | 67 | SinkWriteResponse response = pointWriter.write(Arrays.asList(record1, record2)); 68 | // Validate record 1 succeeded 69 | assertEquals(1, response.getSucceededRecords().size()); 70 | assertEquals(record1, response.getSucceededRecords().get(0)); 71 | assertEquals(1, response.getFailedRecordResponses().size()); 72 | verify(container, times(1)).upsertItem(record1.value()); 73 | 74 | // Validate record2 failed 75 | assertEquals(record2, response.getFailedRecordResponses().get(0).getSinkRecord()); 76 | assertTrue(response.getFailedRecordResponses().get(0).getException() instanceof CosmosException); 77 | assertEquals(HttpConstants.StatusCodes.BADREQUEST, ((CosmosException)response.getFailedRecordResponses().get(0).getException()).getStatusCode()); 78 | verify(container, times(1)).upsertItem(record2.value()); 79 | } 80 | 81 | @Test 82 | public void testPointWriteSucceededWithTransientException() { 83 | SinkRecord record1 = createSinkRecord(); 84 | SinkRecord record2 = createSinkRecord(); 85 | 86 | CosmosItemResponse itemResponse = Mockito.mock(CosmosItemResponse.class); 87 | Mockito.when(container.upsertItem(record1.value())).thenReturn(itemResponse); 88 | Mockito 89 | .when(container.upsertItem(record2.value())) 90 | .thenThrow(new RequestTimeoutException()) 91 | .thenThrow(new RequestTimeoutException()) 92 | .thenReturn(itemResponse); 93 | 94 | SinkWriteResponse response = pointWriter.write(Arrays.asList(record1, record2)); 95 | 96 | assertEquals(2, response.getSucceededRecords().size()); 97 | assertEquals(record1, response.getSucceededRecords().get(0)); 98 | assertEquals(record2, response.getSucceededRecords().get(1)); 99 | assertEquals(0, response.getFailedRecordResponses().size()); 100 | 101 | verify(container, times(1)).upsertItem(record1.value()); 102 | verify(container, times(3)).upsertItem(record2.value()); 103 | } 104 | 105 | @Test 106 | public void testPointWriteFailedWithTransientException() { 107 | SinkRecord record1 = createSinkRecord(); 108 | SinkRecord record2 = createSinkRecord(); 109 | 110 | CosmosItemResponse itemResponse = Mockito.mock(CosmosItemResponse.class); 111 | Mockito.when(container.upsertItem(record1.value())).thenReturn(itemResponse); 112 | Mockito 113 | .when(container.upsertItem(record2.value())) 114 | .thenThrow(new RequestTimeoutException()) 115 | .thenThrow(new RequestTimeoutException()) 116 | .thenThrow(new RequestTimeoutException()); 117 | 118 | SinkWriteResponse response = pointWriter.write(Arrays.asList(record1, record2)); 119 | 120 | assertEquals(1, response.getSucceededRecords().size()); 121 | assertEquals(record1, response.getSucceededRecords().get(0)); 122 | assertEquals(record2, response.getFailedRecordResponses().get(0).getSinkRecord()); 123 | assertTrue(response.getFailedRecordResponses().get(0).getException() instanceof CosmosException); 124 | assertEquals(HttpConstants.StatusCodes.REQUEST_TIMEOUT, ((CosmosException)response.getFailedRecordResponses().get(0).getException()).getStatusCode()); 125 | 126 | verify(container, times(1)).upsertItem(record1.value()); 127 | verify(container, times(3)).upsertItem(record2.value()); 128 | } 129 | 130 | private SinkRecord createSinkRecord() { 131 | Schema stringSchema = new ConnectSchema(Schema.Type.STRING); 132 | Schema mapSchema = new ConnectSchema(Schema.Type.MAP); 133 | Map map = new HashMap<>(); 134 | map.put("foo", "baaarrrrrgh"); 135 | map.put("id", UUID.randomUUID().toString()); 136 | 137 | return new SinkRecord(TOPIC_NAME, 1, stringSchema, "nokey", mapSchema, map, 0L); 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /src/test/java/com/azure/cosmos/kafka/connect/sink/id/strategy/ProvidedInStrategyTest.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink.id.strategy; 5 | 6 | import org.apache.kafka.connect.data.Schema; 7 | import org.apache.kafka.connect.data.SchemaBuilder; 8 | import org.apache.kafka.connect.data.Struct; 9 | import org.apache.kafka.connect.errors.ConnectException; 10 | import org.apache.kafka.connect.sink.SinkRecord; 11 | import org.junit.Before; 12 | import org.junit.Test; 13 | import org.junit.runner.RunWith; 14 | import org.junit.runners.Parameterized; 15 | import org.mockito.Mock; 16 | 17 | import java.util.LinkedHashMap; 18 | import java.util.List; 19 | import java.util.Map; 20 | 21 | import static org.junit.Assert.assertEquals; 22 | import static org.mockito.Mockito.when; 23 | import static org.mockito.MockitoAnnotations.initMocks; 24 | 25 | @RunWith(Parameterized.class) 26 | public class ProvidedInStrategyTest { 27 | @Parameterized.Parameters(name = "{0}") 28 | public static Iterable parameters() { 29 | return List.of( 30 | new Object[]{ProvidedInValueStrategy.class, new ProvidedInValueStrategy()}, 31 | new Object[]{ProvidedInKeyStrategy.class, new ProvidedInKeyStrategy()} 32 | ); 33 | } 34 | 35 | @Parameterized.Parameter(0) 36 | public Class clazz; 37 | 38 | @Parameterized.Parameter(1) 39 | public IdStrategy strategy; 40 | 41 | @Mock 42 | SinkRecord record; 43 | 44 | @Before 45 | public void setUp() { 46 | initMocks(this); 47 | 48 | strategy.configure(Map.of()); 49 | } 50 | 51 | private void returnOnKeyOrValue(Schema schema, Object ret) { 52 | if (clazz == ProvidedInKeyStrategy.class) { 53 | when(record.keySchema()).thenReturn(schema); 54 | when(record.key()).thenReturn(ret); 55 | } else { 56 | when(record.valueSchema()).thenReturn(schema); 57 | when(record.value()).thenReturn(ret); 58 | } 59 | } 60 | 61 | @Test(expected = ConnectException.class) 62 | public void valueNotStructOrMapShouldFail() { 63 | returnOnKeyOrValue(Schema.STRING_SCHEMA, "a string"); 64 | strategy.generateId(record); 65 | } 66 | 67 | @Test(expected = ConnectException.class) 68 | public void noIdInValueShouldFail() { 69 | returnOnKeyOrValue(null, Map.of()); 70 | strategy.generateId(record); 71 | } 72 | 73 | @Test 74 | public void stringIdOnMapShouldReturn() { 75 | returnOnKeyOrValue(null, Map.of( 76 | "id", "1234567" 77 | )); 78 | assertEquals("1234567", strategy.generateId(record)); 79 | } 80 | 81 | @Test 82 | public void nonStringIdOnMapShouldReturn() { 83 | returnOnKeyOrValue(null, Map.of( 84 | "id", 1234567 85 | )); 86 | assertEquals("1234567", strategy.generateId(record)); 87 | } 88 | 89 | @Test 90 | public void stringIdOnStructShouldReturn() { 91 | Schema schema = SchemaBuilder.struct() 92 | .field("id", Schema.STRING_SCHEMA) 93 | .build(); 94 | Struct struct = new Struct(schema) 95 | .put("id", "1234567"); 96 | returnOnKeyOrValue(struct.schema(), struct); 97 | 98 | assertEquals("1234567", strategy.generateId(record)); 99 | } 100 | 101 | @Test 102 | public void structIdOnStructShouldReturn() { 103 | Schema idSchema = SchemaBuilder.struct() 104 | .field("name", Schema.STRING_SCHEMA) 105 | .build(); 106 | Schema schema = SchemaBuilder.struct() 107 | .field("id", idSchema) 108 | .build(); 109 | Struct struct = new Struct(schema) 110 | .put("id", new Struct(idSchema).put("name", "cosmos kramer")); 111 | returnOnKeyOrValue(struct.schema(), struct); 112 | 113 | assertEquals("{\"name\":\"cosmos kramer\"}", strategy.generateId(record)); 114 | } 115 | 116 | @Test 117 | public void jsonPathOnStruct() { 118 | strategy.configure(Map.of(ProvidedInConfig.JSON_PATH_CONFIG, "$.id.name")); 119 | 120 | Schema idSchema = SchemaBuilder.struct() 121 | .field("name", Schema.STRING_SCHEMA) 122 | .build(); 123 | Schema schema = SchemaBuilder.struct() 124 | .field("id", idSchema) 125 | .build(); 126 | Struct struct = new Struct(schema) 127 | .put("id", new Struct(idSchema).put("name", "franz kafka")); 128 | returnOnKeyOrValue(struct.schema(), struct); 129 | 130 | assertEquals("franz kafka", strategy.generateId(record)); 131 | } 132 | 133 | @Test 134 | public void jsonPathOnMap() { 135 | strategy.configure(Map.of(ProvidedInConfig.JSON_PATH_CONFIG, "$.id.name")); 136 | returnOnKeyOrValue(null, 137 | Map.of("id", Map.of("name", "franz kafka"))); 138 | 139 | assertEquals("franz kafka", strategy.generateId(record)); 140 | } 141 | 142 | @Test(expected = ConnectException.class) 143 | public void invalidJsonPathThrows() { 144 | strategy.configure(Map.of(ProvidedInConfig.JSON_PATH_CONFIG, "invalid.path")); 145 | returnOnKeyOrValue(null, 146 | Map.of("id", Map.of("name", "franz kafka"))); 147 | 148 | strategy.generateId(record); 149 | } 150 | 151 | @Test(expected = ConnectException.class) 152 | public void jsonPathNotExistThrows() { 153 | strategy.configure(Map.of(ProvidedInConfig.JSON_PATH_CONFIG, "$.id.not.exist")); 154 | returnOnKeyOrValue(null, 155 | Map.of("id", Map.of("name", "franz kafka"))); 156 | 157 | strategy.generateId(record); 158 | } 159 | 160 | @Test 161 | public void complexJsonPath() { 162 | Map map1 = new LinkedHashMap<>(); 163 | map1.put("id", 0); 164 | map1.put("name", "cosmos kramer"); 165 | map1.put("occupation", "unknown"); 166 | Map map2 = new LinkedHashMap<>(); 167 | map2.put("id", 1); 168 | map2.put("name", "franz kafka"); 169 | map2.put("occupation", "writer"); 170 | returnOnKeyOrValue(null, Map.of("id", List.of(map1, map2))); 171 | 172 | strategy.configure(Map.of(ProvidedInConfig.JSON_PATH_CONFIG, "$.id[0].name")); 173 | assertEquals("cosmos kramer", strategy.generateId(record)); 174 | 175 | strategy.configure(Map.of(ProvidedInConfig.JSON_PATH_CONFIG, "$.id[1].name")); 176 | assertEquals("franz kafka", strategy.generateId(record)); 177 | 178 | strategy.configure(Map.of(ProvidedInConfig.JSON_PATH_CONFIG, "$.id[*].id")); 179 | assertEquals("[0,1]", strategy.generateId(record)); 180 | 181 | strategy.configure(Map.of(ProvidedInConfig.JSON_PATH_CONFIG, "$.id")); 182 | assertEquals( 183 | "[{\"id\":0,\"name\":\"cosmos kramer\",\"occupation\":\"unknown\"},{\"id\":1,\"name\":\"franz kafka\",\"occupation\":\"writer\"}]", 184 | strategy.generateId(record)); 185 | } 186 | 187 | @Test 188 | public void generatedIdSanitized() { 189 | returnOnKeyOrValue(null, Map.of("id", "#my/special\\id?")); 190 | 191 | String id = strategy.generateId(record); 192 | assertEquals("_my_special_id_", id); 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /src/test/java/com/azure/cosmos/kafka/connect/sink/id/strategy/TemplateStrategyTest.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.sink.id.strategy; 5 | 6 | import org.apache.kafka.connect.data.Schema; 7 | import org.apache.kafka.connect.data.SchemaBuilder; 8 | import org.apache.kafka.connect.data.Struct; 9 | import org.apache.kafka.connect.sink.SinkRecord; 10 | import org.junit.Test; 11 | 12 | import java.util.Map; 13 | 14 | import static org.junit.Assert.assertEquals; 15 | import static org.mockito.Mockito.mock; 16 | import static org.mockito.Mockito.when; 17 | 18 | public class TemplateStrategyTest { 19 | IdStrategy strategy = new TemplateStrategy(); 20 | 21 | @Test 22 | public void simpleKey() { 23 | strategy.configure(Map.of(TemplateStrategyConfig.TEMPLATE_CONFIG, "${key}")); 24 | SinkRecord record = mock(SinkRecord.class); 25 | when(record.keySchema()).thenReturn(Schema.STRING_SCHEMA); 26 | when(record.key()).thenReturn("test"); 27 | 28 | String id = strategy.generateId(record); 29 | assertEquals("test", id); 30 | } 31 | 32 | @Test 33 | public void kafkaMetadata() { 34 | strategy.configure(Map.of(TemplateStrategyConfig.TEMPLATE_CONFIG, "${topic}-${partition}-${offset}")); 35 | SinkRecord record = mock(SinkRecord.class); 36 | when(record.topic()).thenReturn("mytopic"); 37 | when(record.kafkaPartition()).thenReturn(0); 38 | when(record.kafkaOffset()).thenReturn(1L); 39 | 40 | String id = strategy.generateId(record); 41 | assertEquals("mytopic-0-1", id); 42 | } 43 | 44 | @Test 45 | public void unknownVariablePreserved() { 46 | strategy.configure(Map.of(TemplateStrategyConfig.TEMPLATE_CONFIG, "${unknown}")); 47 | String id = strategy.generateId(mock(SinkRecord.class)); 48 | assertEquals("${unknown}", id); 49 | } 50 | 51 | @Test 52 | public void nestedStruct() { 53 | strategy.configure(Map.of(TemplateStrategyConfig.TEMPLATE_CONFIG, "${key}")); 54 | SinkRecord record = mock(SinkRecord.class); 55 | Schema nestedSchema = SchemaBuilder.struct() 56 | .field("nested_field", Schema.STRING_SCHEMA) 57 | .build(); 58 | Schema schema = SchemaBuilder.struct() 59 | .field("string_field", Schema.STRING_SCHEMA) 60 | .field("struct_field", nestedSchema) 61 | .build(); 62 | Struct value = new Struct(schema) 63 | .put("string_field", "value") 64 | .put("struct_field", 65 | new Struct(nestedSchema).put("nested_field", "a nest")); 66 | when(record.keySchema()).thenReturn(schema); 67 | when(record.key()).thenReturn(value); 68 | 69 | String id = strategy.generateId(record); 70 | assertEquals( 71 | "{\"string_field\":\"value\",\"struct_field\":{\"nested_field\":\"a nest\"}}", 72 | id); 73 | } 74 | 75 | @Test 76 | public void fullKeyStrategyUsesFullKey() { 77 | strategy = new FullKeyStrategy(); 78 | strategy.configure(Map.of()); 79 | SinkRecord record = mock(SinkRecord.class); 80 | Schema schema = SchemaBuilder.struct() 81 | .field("string_field", Schema.STRING_SCHEMA) 82 | .field("int64_field", Schema.INT64_SCHEMA) 83 | .build(); 84 | Struct value = new Struct(schema) 85 | .put("string_field", "value") 86 | .put("int64_field", 0L); 87 | 88 | when(record.keySchema()).thenReturn(schema); 89 | when(record.key()).thenReturn(value); 90 | 91 | String id = strategy.generateId(record); 92 | assertEquals( 93 | "{\"string_field\":\"value\",\"int64_field\":0}", 94 | id); 95 | } 96 | 97 | @Test 98 | public void metadataStrategyUsesMetadataWithDeliminator() { 99 | strategy = new KafkaMetadataStrategy(); 100 | strategy.configure(Map.of(KafkaMetadataStrategyConfig.DELIMITER_CONFIG, "_")); 101 | SinkRecord record = mock(SinkRecord.class); 102 | when(record.topic()).thenReturn("topic"); 103 | when(record.kafkaPartition()).thenReturn(0); 104 | when(record.kafkaOffset()).thenReturn(1L); 105 | 106 | String id = strategy.generateId(record); 107 | assertEquals("topic_0_1", id); 108 | } 109 | 110 | @Test 111 | public void generatedIdSanitized() { 112 | strategy = new TemplateStrategy(); 113 | strategy.configure( 114 | Map.of(TemplateStrategyConfig.TEMPLATE_CONFIG, "#my/special\\id?")); 115 | SinkRecord record = mock(SinkRecord.class); 116 | 117 | String id = strategy.generateId(record); 118 | assertEquals("_my_special_id_", id); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/test/java/com/azure/cosmos/kafka/connect/source/CosmosDBSourceConfigTest.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.source; 5 | 6 | import org.apache.kafka.common.config.ConfigException; 7 | import org.junit.Test; 8 | 9 | import java.util.HashMap; 10 | 11 | import static org.junit.Assert.assertEquals; 12 | import static org.junit.Assert.assertFalse; 13 | import static org.junit.Assert.assertNotNull; 14 | import static org.junit.Assert.assertThrows; 15 | 16 | public class CosmosDBSourceConfigTest { 17 | private static final String COSMOS_URL = "https://.documents.azure.com:443/"; 18 | private static final boolean DEFAULT_GATEWAY_MODE_ENABLED = false; 19 | private static final boolean DEFAULT_CONNECTION_SHARING_ENABLED = false; 20 | 21 | public static HashMap setupConfigs() { 22 | HashMap configs = new HashMap<>(); 23 | configs.put(CosmosDBSourceConfig.COSMOS_CONN_ENDPOINT_CONF, COSMOS_URL); 24 | configs.put(CosmosDBSourceConfig.COSMOS_CONN_KEY_CONF, "mykey"); 25 | configs.put(CosmosDBSourceConfig.COSMOS_DATABASE_NAME_CONF, "mydb"); 26 | configs.put(CosmosDBSourceConfig.COSMOS_CONTAINER_TOPIC_MAP_CONF, "mytopic5#mycontainer6"); 27 | return configs; 28 | } 29 | 30 | @Test 31 | public void shouldAcceptValidConfig() { 32 | // Adding required Configuration with no default value. 33 | CosmosDBSourceConfig config = new CosmosDBSourceConfig(setupConfigs()); 34 | assertNotNull(config); 35 | assertEquals(COSMOS_URL, config.getConnEndpoint()); 36 | assertEquals("mykey", config.getConnKey()); 37 | assertEquals("mydb", config.getDatabaseName()); 38 | assertEquals("mycontainer6", config.getTopicContainerMap().getContainerForTopic("mytopic5").get()); 39 | } 40 | 41 | @Test 42 | public void shouldHaveDefaultValues() { 43 | // Adding required Configuration with no default value. 44 | CosmosDBSourceConfig config = new CosmosDBSourceConfig(setupConfigs()); 45 | assertEquals(5000L, config.getTaskTimeout().longValue()); 46 | assertEquals(10000L, config.getTaskBufferSize().longValue()); 47 | assertEquals(100L, config.getTaskBatchSize().longValue()); 48 | assertEquals(1000L, config.getTaskPollInterval().longValue()); 49 | assertFalse(config.useLatestOffset()); 50 | } 51 | 52 | @Test 53 | public void shouldThrowExceptionWhenCosmosEndpointNotGiven() { 54 | // Adding required Configuration with no default value. 55 | HashMap settings = setupConfigs(); 56 | settings.remove(CosmosDBSourceConfig.COSMOS_CONN_ENDPOINT_CONF); 57 | assertThrows(ConfigException.class, () -> { 58 | new CosmosDBSourceConfig(settings); 59 | }); 60 | } 61 | 62 | @Test 63 | public void gatewayModeEnabledTest() { 64 | HashMap settings = setupConfigs(); 65 | 66 | // validate default max retry count 67 | CosmosDBSourceConfig config = new CosmosDBSourceConfig(settings); 68 | assertEquals(config.isGatewayModeEnabled(), DEFAULT_GATEWAY_MODE_ENABLED); 69 | 70 | // validate configured max retry count 71 | settings.put(CosmosDBSourceConfig.COSMOS_GATEWAY_MODE_ENABLED, "true"); 72 | config = new CosmosDBSourceConfig(settings); 73 | assertEquals(config.isGatewayModeEnabled(), true); 74 | } 75 | 76 | @Test 77 | public void connectionSharingEnabledTest() { 78 | HashMap settings = setupConfigs(); 79 | 80 | // validate default max retry count 81 | CosmosDBSourceConfig config = new CosmosDBSourceConfig(settings); 82 | assertEquals(config.isConnectionSharingEnabled(), DEFAULT_CONNECTION_SHARING_ENABLED); 83 | 84 | // validate configured max retry count 85 | settings.put(CosmosDBSourceConfig.COSMOS_CONNECTION_SHARING_ENABLED, "true"); 86 | config = new CosmosDBSourceConfig(settings); 87 | assertEquals(config.isConnectionSharingEnabled(), true); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/test/java/com/azure/cosmos/kafka/connect/source/CosmosDBSourceConnectorTest.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | package com.azure.cosmos.kafka.connect.source; 5 | 6 | import com.azure.cosmos.CosmosAsyncClient; 7 | import com.azure.cosmos.CosmosAsyncContainer; 8 | import com.azure.cosmos.CosmosAsyncDatabase; 9 | import com.azure.cosmos.kafka.connect.implementations.CosmosClientStore; 10 | import com.azure.cosmos.models.CosmosContainerResponse; 11 | import org.apache.kafka.common.config.ConfigDef; 12 | import org.apache.kafka.common.config.ConfigValue; 13 | import org.junit.BeforeClass; 14 | import org.junit.Test; 15 | import org.mockito.MockedStatic; 16 | import org.mockito.Mockito; 17 | import reactor.core.publisher.Mono; 18 | 19 | import java.util.List; 20 | import java.util.Map; 21 | import java.util.Set; 22 | 23 | import static org.junit.Assert.assertEquals; 24 | import static org.junit.Assert.assertNotNull; 25 | import static org.junit.Assert.assertNull; 26 | import static org.mockito.ArgumentMatchers.any; 27 | import static org.mockito.ArgumentMatchers.anyString; 28 | 29 | /** 30 | * Tests the configuration of Source Connector 31 | */ 32 | public class CosmosDBSourceConnectorTest { 33 | 34 | private static final String ASSIGNED_CONTAINER = CosmosDBSourceConfig.COSMOS_ASSIGNED_CONTAINER_CONF; 35 | private static final String BATCH_SETTING_NAME = CosmosDBSourceConfig.COSMOS_SOURCE_TASK_BATCH_SIZE_CONF; 36 | private static final Long BATCH_SETTING = new CosmosDBSourceConfig(CosmosDBSourceConfigTest.setupConfigs()).getTaskBatchSize(); 37 | 38 | @BeforeClass 39 | public static void setup() { 40 | MockedStatic clientStoreMock = Mockito.mockStatic(CosmosClientStore.class); 41 | CosmosAsyncClient clientMock = Mockito.mock(CosmosAsyncClient.class); 42 | clientStoreMock.when(() -> CosmosClientStore.getCosmosClient(any(), any())).thenReturn(clientMock); 43 | 44 | CosmosAsyncDatabase databaseMock = Mockito.mock(CosmosAsyncDatabase.class); 45 | Mockito.when(clientMock.getDatabase(anyString())).thenReturn(databaseMock); 46 | 47 | CosmosAsyncContainer containerMock = Mockito.mock(CosmosAsyncContainer.class); 48 | Mockito.when(databaseMock.getContainer(anyString())).thenReturn(containerMock); 49 | 50 | CosmosContainerResponse containerResponseMock = Mockito.mock(CosmosContainerResponse.class); 51 | Mockito.when(containerMock.read()).thenReturn(Mono.just(containerResponseMock)); 52 | } 53 | 54 | @Test 55 | public void testConfig(){ 56 | ConfigDef configDef = new CosmosDBSourceConnector().config(); 57 | assertNotNull(configDef); 58 | 59 | //Ensure all settings are represented 60 | CosmosDBSourceConfig config = new CosmosDBSourceConfig(CosmosDBSourceConfigTest.setupConfigs()); 61 | Set allSettingsNames = config.values().keySet(); 62 | assertEquals("Not all settings are representeed", allSettingsNames, configDef.names()); 63 | } 64 | 65 | 66 | @Test 67 | public void testAbsentDefaults(){ 68 | //Containers list is set in connector and does not have a default setting. Let's see if the configdef does 69 | assertNull(new CosmosDBSourceConnector().config().defaultValues() 70 | .get(ASSIGNED_CONTAINER)); 71 | } 72 | 73 | @Test 74 | public void testPresentDefaults(){ 75 | //The task batch size has a default setting. Let's see if the configdef does 76 | assertNotNull(BATCH_SETTING); 77 | assertEquals(BATCH_SETTING, new CosmosDBSourceConnector().config().defaultValues() 78 | .get(BATCH_SETTING_NAME)); 79 | } 80 | 81 | @Test 82 | public void testNumericValidation(){ 83 | Map settingAssignment = CosmosDBSourceConfigTest.setupConfigs(); 84 | settingAssignment.put(BATCH_SETTING_NAME, "definitely not a number"); 85 | ConfigDef config = new CosmosDBSourceConnector().config(); 86 | 87 | List postValidation = config.validate(settingAssignment); 88 | ConfigValue timeoutConfigValue = postValidation.stream().filter(item -> item.name().equals(BATCH_SETTING_NAME)).findFirst().get(); 89 | assertEquals("Expected error message when assigning non-numeric value to task timeout", 1, timeoutConfigValue.errorMessages().size()); 90 | } 91 | 92 | @Test 93 | public void testTaskConfigs() { 94 | Map settingAssignment = CosmosDBSourceConfigTest.setupConfigs(); 95 | CosmosDBSourceConnector sourceConnector = new CosmosDBSourceConnector(); 96 | sourceConnector.start(settingAssignment); 97 | List> taskConfigs = sourceConnector.taskConfigs(3); 98 | assertEquals(3, taskConfigs.size()); 99 | } 100 | 101 | @Test 102 | public void testValidTaskConfigContainerAssignment(){ 103 | Map settingAssignment = CosmosDBSourceConfigTest.setupConfigs(); 104 | settingAssignment.put(CosmosDBSourceConfig.COSMOS_CONTAINER_TOPIC_MAP_CONF, "T1#C1,T2#C2,T3#C3,T4#C4"); 105 | CosmosDBSourceConnector sourceConnector = new CosmosDBSourceConnector(); 106 | sourceConnector.start(settingAssignment); 107 | List> taskConfigs = sourceConnector.taskConfigs(6); 108 | 109 | assertEquals(6, taskConfigs.size()); 110 | assertEquals("C4", taskConfigs.get(0).get(ASSIGNED_CONTAINER)); 111 | assertEquals("C1", taskConfigs.get(1).get(ASSIGNED_CONTAINER)); 112 | assertEquals("C2", taskConfigs.get(2).get(ASSIGNED_CONTAINER)); 113 | assertEquals("C3", taskConfigs.get(3).get(ASSIGNED_CONTAINER)); 114 | assertEquals("C4", taskConfigs.get(4).get(ASSIGNED_CONTAINER)); 115 | assertEquals("C1", taskConfigs.get(5).get(ASSIGNED_CONTAINER)); 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | #Define console appender 2 | log4j.appender.console=org.apache.log4j.ConsoleAppender 3 | log4j.appender.console.Target=System.err 4 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.console.layout.ConversionPattern=%-5p %c{1} - %m%n 6 | #Loggers 7 | log4j.rootLogger=ERROR,console 8 | log4j.logger.com.microsoft=INFO 9 | log4j.logger.com.azure.cosmos.kafka=DEBUG 10 | -------------------------------------------------------------------------------- /src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker: -------------------------------------------------------------------------------- 1 | mock-maker-inline --------------------------------------------------------------------------------