├── .github ├── ISSUE_TEMPLATE │ ├── 01_question.md │ ├── 02_bug.md │ └── 03_feature.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── ci.yml │ ├── clean.yml │ ├── dependency-graph.yml │ ├── format.yml │ └── scala-steward.yml ├── .gitignore ├── .scala-steward.conf ├── .scalafix.conf ├── .scalafmt.conf ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── SECURITY.md ├── backup-gcs └── src │ └── main │ └── scala │ └── io │ └── aiven │ └── guardian │ └── kafka │ └── backup │ └── gcs │ └── BackupClient.scala ├── backup-s3 └── src │ ├── main │ └── scala │ │ └── io │ │ └── aiven │ │ └── guardian │ │ └── kafka │ │ └── backup │ │ └── s3 │ │ └── BackupClient.scala │ └── test │ └── scala │ └── io │ └── aiven │ └── guardian │ └── kafka │ └── backup │ └── s3 │ ├── BackupClientChunkState.scala │ ├── BackupClientSpec.scala │ ├── KafkaConsumerWithKillSwitch.scala │ ├── MinioBackupClientSpec.scala │ ├── MockedKafkaClientBackupConsumerSpec.scala │ ├── MockedS3BackupClientInterface.scala │ ├── RealS3BackupClientSpec.scala │ ├── RealS3BackupClientTest.scala │ └── RealS3GzipCompressionBackupClientSpec.scala ├── build.sbt ├── cli-backup └── src │ ├── main │ └── scala │ │ └── io │ │ └── aiven │ │ └── guardian │ │ └── kafka │ │ └── backup │ │ ├── App.scala │ │ ├── BackupApp.scala │ │ ├── Main.scala │ │ └── S3App.scala │ └── test │ ├── resources │ └── logback.xml │ └── scala │ └── io │ └── aiven │ └── guardian │ └── kafka │ └── backup │ └── CliSpec.scala ├── cli-compaction └── .gitkeep ├── cli-restore └── src │ ├── main │ └── scala │ │ └── io │ │ └── aiven │ │ └── guardian │ │ └── kafka │ │ └── restore │ │ ├── App.scala │ │ ├── Main.scala │ │ ├── RestoreApp.scala │ │ └── S3App.scala │ └── test │ └── scala │ └── io │ └── aiven │ └── guardian │ └── kafka │ └── restore │ └── CliSpec.scala ├── compaction-gcs └── src │ └── main │ ├── resources │ └── reference.conf │ └── scala │ └── io │ └── aiven │ └── guardian │ └── kafka │ └── compaction │ └── gcs │ ├── Config.scala │ ├── StorageClient.scala │ └── models │ └── StorageConfig.scala ├── compaction-s3 └── src │ └── main │ ├── resources │ └── reference.conf │ └── scala │ └── io │ └── aiven │ └── guardian │ └── kafka │ └── compaction │ └── s3 │ ├── Config.scala │ ├── StorageClient.scala │ └── models │ └── StorageConfig.scala ├── core-backup └── src │ ├── main │ ├── resources │ │ └── reference.conf │ └── scala │ │ └── io │ │ └── aiven │ │ └── guardian │ │ └── kafka │ │ └── backup │ │ ├── BackupClientInterface.scala │ │ ├── Config.scala │ │ ├── KafkaConsumer.scala │ │ ├── KafkaConsumerInterface.scala │ │ └── configs │ │ ├── Backup.scala │ │ ├── Compression.scala │ │ └── TimeConfiguration.scala │ └── test │ └── scala │ └── io │ └── aiven │ └── guardian │ └── kafka │ └── backup │ ├── BackupClientControlWrapper.scala │ ├── BackupClientInterfaceSpec.scala │ ├── BackupClientInterfaceTest.scala │ ├── CompressionSpec.scala │ ├── ConfigSpec.scala │ ├── ConfigurationChangeRestartSpec.scala │ ├── GzipCompressionBackupClientInterfaceSpec.scala │ ├── MockedBackupClientInterface.scala │ └── MockedKafkaConsumerInterface.scala ├── core-cli └── src │ └── main │ ├── resources │ ├── application.conf │ └── logback.xml │ └── scala │ └── io │ └── aiven │ └── guardian │ └── cli │ ├── MainUtils.scala │ ├── PekkoSettings.scala │ ├── arguments │ ├── PropertiesOpt.scala │ └── StorageOpt.scala │ └── options │ └── Options.scala ├── core-compaction └── src │ └── main │ └── scala │ └── io │ └── aiven │ └── guardian │ └── kafka │ └── compaction │ ├── DatabaseInterface.scala │ ├── PostgresJDBCDatabase.scala │ └── StorageInterface.scala ├── core-gcs └── src │ └── main │ ├── resources │ └── reference.conf │ └── scala │ └── io │ └── aiven │ └── guardian │ └── kafka │ └── gcs │ ├── Config.scala │ ├── configs │ └── GCS.scala │ └── errors │ └── GCSErrors.scala ├── core-restore └── src │ ├── main │ ├── resources │ │ └── reference.conf │ └── scala │ │ └── io │ │ └── aiven │ │ └── guardian │ │ └── kafka │ │ └── restore │ │ ├── Config.scala │ │ ├── KafkaProducer.scala │ │ ├── KafkaProducerInterface.scala │ │ ├── RestoreClientInterface.scala │ │ └── configs │ │ └── Restore.scala │ └── test │ └── scala │ └── io │ └── aiven │ └── guardian │ └── kafka │ └── restore │ ├── ConfigSpec.scala │ ├── GzipCompressionRestoreClientInterfaceSpec.scala │ ├── MockedKafkaProducerInterface.scala │ ├── MockedRestoreClientInterface.scala │ ├── RestoreClientInterfaceSpec.scala │ └── RestoreClientInterfaceTest.scala ├── core-s3 └── src │ ├── main │ ├── resources │ │ └── reference.conf │ └── scala │ │ └── io │ │ └── aiven │ │ └── guardian │ │ └── kafka │ │ └── s3 │ │ ├── Config.scala │ │ ├── configs │ │ └── S3.scala │ │ └── errors │ │ └── S3Errors.scala │ └── test │ ├── resources │ └── logback.xml │ └── scala │ ├── io │ └── aiven │ │ └── guardian │ │ └── kafka │ │ └── s3 │ │ ├── Generators.scala │ │ ├── Main.scala │ │ ├── MinioContainer.scala │ │ ├── MinioS3Test.scala │ │ ├── PureConfigS3HeadersSpec.scala │ │ ├── S3Spec.scala │ │ └── S3TestUtils.scala │ └── org │ └── apache │ └── pekko │ └── stream │ └── connectors │ └── s3 │ └── GeneratorsSpec.scala ├── core ├── README.md └── src │ ├── main │ ├── resources │ │ └── reference.conf │ └── scala │ │ └── io │ │ └── aiven │ │ └── guardian │ │ └── kafka │ │ ├── Config.scala │ │ ├── Errors.scala │ │ ├── ExtensionsMethods.scala │ │ ├── PureConfigUtils.scala │ │ ├── Utils.scala │ │ ├── codecs │ │ └── Circe.scala │ │ ├── configs │ │ └── KafkaCluster.scala │ │ └── models │ │ ├── BackupObjectMetadata.scala │ │ ├── CompressionType.scala │ │ └── ReducedConsumerRecord.scala │ └── test │ ├── resources │ ├── application.conf │ └── logback.xml │ └── scala │ └── io │ └── aiven │ └── guardian │ ├── kafka │ ├── ConfigSpec.scala │ ├── Generators.scala │ ├── KafkaClusterTest.scala │ └── TestUtils.scala │ └── pekko │ ├── AnyPropTestKit.scala │ ├── PekkoHttpTestKit.scala │ └── PekkoStreamTestKit.scala ├── dependency-check ├── dependency-check-report.html └── suppression.xml ├── docs └── src │ └── main │ └── paradox │ ├── application │ ├── design.md │ ├── index.md │ ├── logging.md │ └── packaging.md │ ├── backup │ ├── configuration.md │ ├── design.md │ └── index.md │ ├── ci.md │ ├── doc-generation.md │ ├── general-architecture │ ├── index.md │ └── logging.md │ ├── index.md │ ├── overview.md │ ├── persistence │ ├── design.md │ ├── index.md │ └── s3 │ │ ├── configuration.md │ │ └── index.md │ ├── restore │ ├── configuration.md │ └── index.md │ ├── security.md │ └── testing │ ├── index.md │ └── s3.md ├── project ├── LicenseReport.scala ├── build.properties ├── plugins.sbt └── project-info.conf ├── restore-gcs └── .gitkeep └── restore-s3 └── src ├── main └── scala │ └── io │ └── aiven │ └── guardian │ └── kafka │ └── restore │ └── s3 │ └── RestoreClient.scala └── test └── scala └── io └── aiven └── guardian └── kafka └── restore └── s3 ├── RealS3GzipCompressionRestoreClientSpec.scala ├── RealS3RestoreClientSpec.scala └── RealS3RestoreClientTest.scala /.github/ISSUE_TEMPLATE/01_question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: ❓ Ask a question 3 | about: Got stuck or missing something from the docs? Ask away! 4 | --- 5 | 6 | # What can we help you with? 7 | 8 | 9 | 10 | # Where would you expect to find this information? 11 | 12 | 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/02_bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐜 Report a bug 3 | about: Spotted a problem? Let us know 4 | --- 5 | 6 | # What happened? 7 | 8 | 9 | 10 | # What did you expect to happen? 11 | 12 | 13 | 14 | # What else do we need to know? 15 | 16 | 17 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/03_feature.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 💡 Feature suggestion 3 | about: What would make this even better? 4 | --- 5 | 6 | # What is currently missing? 7 | 8 | 9 | 10 | # How could this be improved? 11 | 12 | 13 | 14 | # Is this a feature you would work on yourself? 15 | 16 | * [ ] I plan to open a pull request for this feature 17 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | # About this change - What it does 3 | 4 | 5 | 6 | 7 | Resolves: #xxxxx 8 | 9 | # Why this way 10 | 11 | 12 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by sbt-github-actions using the 2 | # githubWorkflowGenerate task. You should add and commit this file to 3 | # your git repository. It goes without saying that you shouldn't edit 4 | # this file by hand! Instead, if you wish to make changes, you should 5 | # change your sbt build configuration to revise the workflow description 6 | # to meet your needs, then regenerate this file. 7 | 8 | name: Continuous Integration 9 | 10 | on: 11 | pull_request: 12 | branches: [main] 13 | push: 14 | branches: [main] 15 | 16 | permissions: 17 | id-token: write 18 | 19 | env: 20 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 21 | 22 | jobs: 23 | build: 24 | name: Build and Test 25 | strategy: 26 | matrix: 27 | os: [ubuntu-latest] 28 | scala: [2.13.13] 29 | java: [temurin@11] 30 | runs-on: ${{ matrix.os }} 31 | steps: 32 | - name: Checkout current branch (full) 33 | uses: actions/checkout@v4 34 | with: 35 | fetch-depth: 0 36 | 37 | - name: Setup Java (temurin@11) 38 | if: matrix.java == 'temurin@11' 39 | uses: actions/setup-java@v4 40 | with: 41 | distribution: temurin 42 | java-version: 11 43 | cache: sbt 44 | 45 | - name: 'Linter: Scalafix checks' 46 | run: sbt '++ ${{ matrix.scala }}' 'scalafixAll --check' 47 | 48 | - name: Configure AWS credentials 49 | uses: aws-actions/configure-aws-credentials@v2 50 | with: 51 | role-to-assume: 'arn:aws:iam::310017459104:role/aiven-guardian-github-action' 52 | aws-region: us-west-2 53 | role-duration-seconds: 7200 54 | 55 | - name: Check that workflows are up to date 56 | run: sbt '++ ${{ matrix.scala }}' githubWorkflowCheck 57 | 58 | - name: Build project 59 | env: 60 | PEKKO_CONNECTORS_S3_REGION_PROVIDER: default 61 | PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_PROVIDER: default 62 | run: sbt '++ ${{ matrix.scala }}' clean coverage test 63 | 64 | - name: Compile docs 65 | run: sbt '++ ${{ matrix.scala }}' docs/makeSite 66 | 67 | - name: Upload coverage data to Coveralls 68 | env: 69 | COVERALLS_REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} 70 | COVERALLS_FLAG_NAME: Scala ${{ matrix.scala }} 71 | run: sbt '++ ${{ matrix.scala }}' coverageReport coverageAggregate coveralls 72 | 73 | - name: Compress target directories 74 | run: tar cf targets.tar target cli-compaction/target compaction-gcs/target backup-s3/target compaction-s3/target docs/target cli-backup/target core-restore/target restore-s3/target core-gcs/target core-compaction/target core-s3/target core-backup/target core-cli/target cli-restore/target core/target restore-gcs/target backup-gcs/target project/target 75 | 76 | - name: Upload target directories 77 | uses: actions/upload-artifact@v4 78 | with: 79 | name: target-${{ matrix.os }}-${{ matrix.scala }}-${{ matrix.java }} 80 | path: targets.tar 81 | 82 | publish: 83 | name: Publish Artifacts 84 | needs: [build] 85 | if: github.event_name != 'pull_request' && (github.ref == 'refs/heads/main') 86 | strategy: 87 | matrix: 88 | os: [ubuntu-latest] 89 | scala: [2.13.13] 90 | java: [temurin@11] 91 | runs-on: ${{ matrix.os }} 92 | steps: 93 | - name: Checkout current branch (full) 94 | uses: actions/checkout@v4 95 | with: 96 | fetch-depth: 0 97 | 98 | - name: Setup Java (temurin@11) 99 | if: matrix.java == 'temurin@11' 100 | uses: actions/setup-java@v4 101 | with: 102 | distribution: temurin 103 | java-version: 11 104 | cache: sbt 105 | 106 | - name: Download target directories (2.13.13) 107 | uses: actions/download-artifact@v4 108 | with: 109 | name: target-${{ matrix.os }}-2.13.13-${{ matrix.java }} 110 | 111 | - name: Inflate target directories (2.13.13) 112 | run: | 113 | tar xf targets.tar 114 | rm targets.tar 115 | 116 | - run: | 117 | git config --global user.name "$(git --no-pager log --format=format:'%an' -n 1)" 118 | git config --global user.email "$(git --no-pager log --format=format:'%ae' -n 1)" 119 | 120 | - uses: webfactory/ssh-agent@v0.5.4 121 | with: 122 | ssh-private-key: ${{ secrets.GH_PAGES_SSH_PRIVATE_KEY }} 123 | 124 | - run: sbt docs/ghpagesPushSite 125 | -------------------------------------------------------------------------------- /.github/workflows/clean.yml: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by sbt-github-actions using the 2 | # githubWorkflowGenerate task. You should add and commit this file to 3 | # your git repository. It goes without saying that you shouldn't edit 4 | # this file by hand! Instead, if you wish to make changes, you should 5 | # change your sbt build configuration to revise the workflow description 6 | # to meet your needs, then regenerate this file. 7 | 8 | name: Clean 9 | 10 | on: push 11 | 12 | jobs: 13 | delete-artifacts: 14 | name: Delete Artifacts 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 18 | steps: 19 | - name: Delete artifacts 20 | shell: bash {0} 21 | run: | 22 | # Customize those three lines with your repository and credentials: 23 | REPO=${GITHUB_API_URL}/repos/${{ github.repository }} 24 | 25 | # A shortcut to call GitHub API. 26 | ghapi() { curl --silent --location --user _:$GITHUB_TOKEN "$@"; } 27 | 28 | # A temporary file which receives HTTP response headers. 29 | TMPFILE=$(mktemp) 30 | 31 | # An associative array, key: artifact name, value: number of artifacts of that name. 32 | declare -A ARTCOUNT 33 | 34 | # Process all artifacts on this repository, loop on returned "pages". 35 | URL=$REPO/actions/artifacts 36 | while [[ -n "$URL" ]]; do 37 | 38 | # Get current page, get response headers in a temporary file. 39 | JSON=$(ghapi --dump-header $TMPFILE "$URL") 40 | 41 | # Get URL of next page. Will be empty if we are at the last page. 42 | URL=$(grep '^Link:' "$TMPFILE" | tr ',' '\n' | grep 'rel="next"' | head -1 | sed -e 's/.*.*//') 43 | rm -f $TMPFILE 44 | 45 | # Number of artifacts on this page: 46 | COUNT=$(( $(jq <<<$JSON -r '.artifacts | length') )) 47 | 48 | # Loop on all artifacts on this page. 49 | for ((i=0; $i < $COUNT; i++)); do 50 | 51 | # Get name of artifact and count instances of this name. 52 | name=$(jq <<<$JSON -r ".artifacts[$i].name?") 53 | ARTCOUNT[$name]=$(( $(( ${ARTCOUNT[$name]} )) + 1)) 54 | 55 | id=$(jq <<<$JSON -r ".artifacts[$i].id?") 56 | size=$(( $(jq <<<$JSON -r ".artifacts[$i].size_in_bytes?") )) 57 | printf "Deleting '%s' #%d, %'d bytes\n" $name ${ARTCOUNT[$name]} $size 58 | ghapi -X DELETE $REPO/actions/artifacts/$id 59 | done 60 | done 61 | -------------------------------------------------------------------------------- /.github/workflows/dependency-graph.yml: -------------------------------------------------------------------------------- 1 | name: Update Dependency Graph 2 | on: 3 | push: 4 | branches: 5 | - main # default branch of the project 6 | permissions: 7 | contents: write 8 | jobs: 9 | dependency-graph: 10 | name: Update Dependency Graph 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: scalacenter/sbt-dependency-submission@v2 15 | -------------------------------------------------------------------------------- /.github/workflows/format.yml: -------------------------------------------------------------------------------- 1 | name: Scalafmt 2 | 3 | permissions: read-all 4 | 5 | on: 6 | pull_request: 7 | branches: ['**'] 8 | 9 | jobs: 10 | build: 11 | name: Code is formatted 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout current branch (full) 15 | uses: actions/checkout@v4 16 | with: 17 | fetch-depth: 0 18 | persist-credentials: false 19 | 20 | - name: Check project is formatted 21 | uses: jrouly/scalafmt-native-action@v3 22 | with: 23 | arguments: '--list --mode diff-ref=origin/main' 24 | -------------------------------------------------------------------------------- /.github/workflows/scala-steward.yml: -------------------------------------------------------------------------------- 1 | on: 2 | workflow_dispatch: 3 | schedule: 4 | - cron: '0 0 * * 0' 5 | 6 | name: Launch Scala Steward 7 | 8 | jobs: 9 | scala-steward: 10 | runs-on: ubuntu-22.04 11 | name: Launch Scala Steward 12 | steps: 13 | - name: Launch Scala Steward 14 | uses: scala-steward-org/scala-steward-action@v2 15 | with: 16 | github-app-id: ${{ secrets.APP_ID }} 17 | github-app-installation-id: ${{ secrets.APP_INSTALLATION_ID }} 18 | github-app-key: ${{ secrets.APP_PRIVATE_KEY }} 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### SBT template 2 | # Simple Build Tool 3 | # http://www.scala-sbt.org/release/docs/Getting-Started/Directories.html#configuring-version-control 4 | 5 | dist/* 6 | target/ 7 | lib_managed/ 8 | src_managed/ 9 | project/boot/ 10 | project/plugins/project/ 11 | .history 12 | .cache 13 | .lib/ 14 | 15 | ### Scala template 16 | *.class 17 | *.log 18 | -------------------------------------------------------------------------------- /.scala-steward.conf: -------------------------------------------------------------------------------- 1 | updatePullRequests = "always" 2 | -------------------------------------------------------------------------------- /.scalafix.conf: -------------------------------------------------------------------------------- 1 | rules = [ 2 | DisableSyntax, # Disables some constructs that make no semantic sense like `final val` 3 | ProcedureSyntax, # Procedure syntax in Scala is always discouraged 4 | ExplicitResultTypes, # To avoid public API breakages by mistake is good to always annotate the return types of public methods 5 | NoValInForComprehension, # `val` in for comprehensions are deprecated and shouldn't be used 6 | NoAutoTupling, # Avoids the automatic tupling in parameters 7 | RemoveUnused, # Removes unused elements 8 | LeakingImplicitClassVal, # This rule adds the private access modifier on the field of implicit value classes in order to prevent direct access. 9 | OrganizeImports # Organizes imports and removes unused ones 10 | ] 11 | 12 | ExplicitResultTypes.memberKind = [Def, Val, Var] 13 | ExplicitResultTypes.memberVisibility = [Public, Protected] 14 | ExplicitResultTypes.skipSimpleDefinitions = ['Lit', 'Term.New', 'Term.Ref'] 15 | ExplicitResultTypes.fatalWarnings = true 16 | DisableSyntax.noNulls = true 17 | DisableSyntax.noReturns = true 18 | DisableSyntax.noWhileLoops = true 19 | DisableSyntax.noIsInstanceOf = true 20 | DisableSyntax.noXml = true 21 | DisableSyntax.noFinalVal = true 22 | DisableSyntax.noFinalize = true 23 | DisableSyntax.noValPatterns = true 24 | RemoveUnused.imports = false # The plugin organize imports removes unused and clashes with this 25 | OrganizeImports.groups = [ 26 | "*" 27 | "scala." 28 | "re:javax?\\." 29 | ] # Reasoning for this config is to keep the more business related imports at the top, while language imports are on the bottom 30 | -------------------------------------------------------------------------------- /.scalafmt.conf: -------------------------------------------------------------------------------- 1 | version = 3.8.2 2 | runner.dialect = scala213 3 | preset = default 4 | align.preset = more 5 | maxColumn = 120 6 | project.git = true 7 | align.openParenDefnSite = true 8 | align.openParenCallSite = true 9 | align.arrowEnumeratorGenerator = true 10 | danglingParentheses.preset = true 11 | rewrite.rules = [RedundantBraces, RedundantParens] 12 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | opensource@aiven.io. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Welcome! 2 | 3 | Guardian for Apache Kafka follows the [fork and pull](https://help.github.com/articles/using-pull-requests/#fork--pull) 4 | development model. You can simply fork the repository, create and checkout a new branch and commit changes to that 5 | branch and then create a pull request once you are done. 6 | 7 | Feel free to submit a PR earlier rather than later, this is recommended as it can spur discussion to see if you are on 8 | the right track. If you create a PR before its ready, we recommend using github's 9 | [draft](https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/changing-the-stage-of-a-pull-request) 10 | feature to clear indicate that a PR is still being worked on. 11 | 12 | ## Setting up development environment 13 | 14 | If you haven't already done so, before you get started you need to set up your machine for development. Guardian for 15 | Apache Kafka is written in [Scala](https://www.scala-lang.org/) so a few steps are needed. 16 | 17 | ## JDK 18 | 19 | Guardian for Apache Kafka is developed on the latest stable branch of OpenJDK. For Windows and MacOS we recommend 20 | using [AdoptOpenJDK][adopt-openjdk-link] to download the latest installer. For Linux its recommended installing 21 | OpenJDK through your distribution (but you can also use [AdoptOpenJDK][adopt-openjdk-link] as a last resort) 22 | 23 | ## Scala and sbt 24 | Once you have installed JDK having [Scala](https://www.scala-lang.org) and [sbt][sbt-link] installed is recommended. 25 | Although some IDE's (such as Intellij) automatically handle Scala and sbt installation for you, it's still recommended 26 | having a standalone version so you can compile/test/run the project without an IDE/Editor. The Scala installation also 27 | comes with its own REPL which can aid in development. 28 | 29 | We recommend following the official [Scala2 documentation](https://www.scala-lang.org/download/scala2.html) on how to 30 | install Scala 31 | 32 | ## Editors/IDE's 33 | The following editors are recommended for development with Scala. Although It's possible to use other environments since 34 | Scala is a strongly typed language using a well supported editor is beneficial. 35 | 36 | ### Intellij IDEA 37 | 38 | [Intellij IDEA](https://www.jetbrains.com/idea/) is one of the most used editors for Scala development. Upon installing 39 | of IDEA you need to install the [scala plugin](https://plugins.jetbrains.com/plugin/1347-scala) so it can recognize sbt 40 | projects. After installation of the plugin you can simply open the cloned `guardian-for-apache-kafka` and it should 41 | setup everything for you. 42 | 43 | ### Metals 44 | 45 | [Metals][metals-link] is a Scala [LSP](https://en.wikipedia.org/wiki/Language_Server_Protocol) implementation that 46 | supports various editors. The primary supported editor for [Metals][metals-link] is 47 | [Visual Studio Code](https://code.visualstudio.com/) along with relevant 48 | [marketplace plugin](https://marketplace.visualstudio.com/items?itemName=scalameta.metals). 49 | 50 | Note that other editors can also be used with metals, documentation can be found 51 | [here](https://scalameta.org/metals/docs/). [Spacemacs](https://www.spacemacs.org/) an 52 | [Emacs](https://www.gnu.org/software/emacs/) distribution also supports [Metals][metals-link] via the 53 | [Scala layer](https://develop.spacemacs.org/layers/+lang/scala/README.html) 54 | 55 | ## Formatting 56 | 57 | The codebase is formatted with [scalafmt](https://scalameta.org/scalafmt/), as such the codebase needs to be formatted 58 | before submitting a PR. 59 | 60 | Various runners for Scalafmt exist, such as 61 | * A [sbt scalafmt plugin](https://github.com/scalameta/sbt-scalafmt) that lets you run scalafmt directly within sbt using 62 | * `scalafmt` to format base scala sources 63 | * `test:scalafmt` to format test scala sources 64 | * `scalafmtSbt` to format the `build.sbt` file 65 | * IntelliJ IDEA and VSCode will automatically detect projects with scalafmt and prompt you whether to use Scalafmt. See 66 | the [scalafmt installation guide][scalafmt-installation-link] for more details 67 | * There are native builds of Scalafmt that let you run a `scalafmt` as a CLI tool, see the CLI section in 68 | [scalafmt installation guide][scalafmt-installation-link] 69 | 70 | Note that a github action exists which will check that your code is formatted whenever you create a PR. For more details 71 | read the [documentation](https://aiven.github.io/guardian-for-apache-kafka/ci.html#scalafmt) 72 | 73 | ## sbt - Compiling, Building and Testing 74 | 75 | We use [sbt][sbt-link] as the primary build tool for the project. When you run [sbt][sbt-link] by itself 76 | it will start a REPL session where you can type in commands, i.e. 77 | 78 | * `compile` will compile the entire project 79 | * `test:compile` will only compile the test sources 80 | * `test` will run the tests for the entire project 81 | * `core/compile` will only compile the `core` project. See [build.sbt](build.sbt) to get a reference for how the projects 82 | are named 83 | * `publishLocal` will publish the project into the local `~/.m2` repository 84 | * `clean` will clean all builds targets (including documentation) from the project. Note that sbt stores build 85 | in sub-directories named `target` 86 | * `reload` will reload sbt which is used when the [sbt][sbt-link] build definition is changed 87 | 88 | ## Testing 89 | 90 | As mentioned before testing is completely handled using sbt, there are no custom shell scripts that are required to set 91 | up environments unless otherwise noted in 92 | the [testing docs](https://aiven.github.io/guardian-for-apache-kafka/testing/index.html) (typically when tests run 93 | against actual services such as S3) 94 | 95 | ### Docker 96 | 97 | For integration tests Guardian for Apache Kafka uses docker to spin up services. For MacOS the best way to install 98 | docker is from the [official website](https://www.docker.com/products/docker-desktop/) whereas if you are running Linux 99 | then consult your distribution/package manager/repository. 100 | 101 | Since Guardian for Apache Kafka uses [testcontainers](https://www.testcontainers.org/) you don't need to worry about 102 | starting/stopping the docker instances manually, this is automatically handled when you run the relevant test/s. 103 | 104 | ## sbt - documentation 105 | 106 | Documentation is also built within SBT, i.e. 107 | 108 | * `docs/makeSite` will compile documentation 109 | * `docs/previewSite` will compile documentation (if needed) and open the result in your system's default browser 110 | 111 | For details about how the document generation works go 112 | [here](https://aiven.github.io/guardian-for-apache-kafka/doc-generation.html) 113 | 114 | [adopt-openjdk-link]: https://adoptopenjdk.net/ 115 | [metals-link]: https://scalameta.org/metals/ 116 | [scalafmt-installation-link]: https://scalameta.org/scalafmt/docs/installation.html 117 | [sbt-link]: https://www.scala-sbt.org/ 118 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://github.com/aiven/guardian-for-apache-kafka/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/aiven/guardian-for-apache-kafka/actions/workflows/ci.yml?query=branch%3Amain) 2 | [![Apache License](https://img.shields.io/badge/license-APACHE_2-green.svg)](https://www.apache.org/licenses/LICENSE-2.0) 3 | [![Coverage](https://coveralls.io/repos/github/aiven/guardian-for-apache-kafka/badge.svg?branch=main)](https://coveralls.io/github/aiven/guardian-for-apache-kafka?branch=main) 4 | 5 | # Guardian for Apache Kafka® 6 | 7 | Guardian is a backup and restore tool for Apache Kafka clusters. It is designed to continuously stream kafka topics into 8 | persistent/object storages such as S3 and also provides tools for restoring said backups. 9 | 10 | ## Documentation 11 | 12 | * [Guardian reference](https://aiven-open.github.io/guardian-for-apache-kafka/) documentation. 13 | 14 | ## Trademarks 15 | 16 | Apache Kafka is either a registered trademark or trademark of the Apache Software Foundation in the United States and/or 17 | other countries. 18 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | We release patches for security vulnerabilities. Which versions are eligible 6 | to receive such patches depend on the CVSS v3.0 Rating: 7 | 8 | | CVSS v3.0 | Supported Versions | 9 | | --------- | ----------------------------------------- | 10 | | 4.0-10.0 | Most recent release | 11 | 12 | ## Reporting a Vulnerability 13 | 14 | Please report (suspected) security vulnerabilities to our **[bug bounty 15 | program](https://bugcrowd.com/aiven-mbb-og)**. You will receive a response from 16 | us within 2 working days. If the issue is confirmed, we will release a patch as 17 | soon as possible depending on impact and complexity. 18 | 19 | ## Qualifying Vulnerabilities 20 | 21 | Any reproducible vulnerability that has a severe effect on the security or 22 | privacy of our users is likely to be in scope for the program. 23 | 24 | We generally **aren't** interested in the following issues: 25 | * Social engineering (e.g. phishing, vishing, smishing) attacks 26 | * Brute force, DoS, text injection 27 | * Missing best practices such as HTTP security headers (CSP, X-XSS, etc.), 28 | email (SPF/DKIM/DMARC records), SSL/TLS configuration. 29 | * Software version disclosure / Banner identification issues / Descriptive 30 | error messages or headers (e.g. stack traces, application or server errors). 31 | * Clickjacking on pages with no sensitive actions 32 | * Theoretical vulnerabilities where you can't demonstrate a significant 33 | security impact with a proof of concept. 34 | -------------------------------------------------------------------------------- /backup-gcs/src/main/scala/io/aiven/guardian/kafka/backup/gcs/BackupClient.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup.gcs 2 | 3 | import io.aiven.guardian.kafka.backup.BackupClientInterface 4 | import io.aiven.guardian.kafka.backup.KafkaConsumerInterface 5 | import io.aiven.guardian.kafka.backup.configs.Backup 6 | import io.aiven.guardian.kafka.gcs.configs.{GCS => GCSConfig} 7 | import org.apache.pekko 8 | 9 | import scala.concurrent.ExecutionContext 10 | import scala.concurrent.Future 11 | 12 | import pekko.actor.ActorSystem 13 | import pekko.http.scaladsl.model.ContentTypes 14 | import pekko.stream.connectors.google.GoogleAttributes 15 | import pekko.stream.connectors.google.GoogleSettings 16 | import pekko.stream.connectors.googlecloud.storage.StorageObject 17 | import pekko.stream.connectors.googlecloud.storage.scaladsl.GCStorage 18 | import pekko.stream.scaladsl.Sink 19 | import pekko.util.ByteString 20 | 21 | // TODO: GCS implementation currently does not work correctly because of inability of current GCS implementation in 22 | // Pekko Connectors to allow us to commit Kafka cursor whenever chunks are uploaded 23 | class BackupClient[T <: KafkaConsumerInterface](maybeGoogleSettings: Option[GoogleSettings])(implicit 24 | override val kafkaClientInterface: T, 25 | override val backupConfig: Backup, 26 | override val system: ActorSystem, 27 | gcsConfig: GCSConfig 28 | ) extends BackupClientInterface[T] { 29 | 30 | override def empty: () => Future[Option[StorageObject]] = () => Future.successful(None) 31 | 32 | override type BackupResult = Option[StorageObject] 33 | 34 | override type State = Nothing 35 | 36 | override def getCurrentUploadState(key: String): Future[UploadStateResult] = 37 | Future.successful(UploadStateResult.empty) 38 | 39 | override def backupToStorageTerminateSink( 40 | previousState: PreviousState 41 | ): Sink[ByteString, Future[Option[StorageObject]]] = { 42 | val base = GCStorage 43 | .resumableUpload(gcsConfig.dataBucket, previousState.previousKey, ContentTypes.`application/json`) 44 | .mapMaterializedValue(future => future.map(result => Some(result))(ExecutionContext.parasitic)) 45 | 46 | maybeGoogleSettings 47 | .fold(base)(googleSettings => base.withAttributes(GoogleAttributes.settings(googleSettings))) 48 | } 49 | 50 | override def backupToStorageSink(key: String, 51 | currentState: Option[Nothing] 52 | ): Sink[(ByteString, kafkaClientInterface.CursorContext), Future[BackupResult]] = { 53 | val base = GCStorage 54 | .resumableUpload(gcsConfig.dataBucket, key, ContentTypes.`application/json`) 55 | .mapMaterializedValue(future => future.map(result => Some(result))(ExecutionContext.parasitic)) 56 | 57 | maybeGoogleSettings 58 | .fold(base)(googleSettings => base.withAttributes(GoogleAttributes.settings(googleSettings))) 59 | .contramap[(ByteString, kafkaClientInterface.CursorContext)] { case (byteString, _) => 60 | byteString 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/BackupClientChunkState.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup.s3 2 | 3 | import io.aiven.guardian.kafka.backup.KafkaConsumerInterface 4 | import io.aiven.guardian.kafka.backup.configs.Backup 5 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config} 6 | import org.apache.pekko 7 | 8 | import scala.collection.immutable 9 | import scala.concurrent.Future 10 | 11 | import java.util.concurrent.ConcurrentLinkedQueue 12 | 13 | import pekko.Done 14 | import pekko.actor.ActorSystem 15 | import pekko.stream.connectors.s3.S3Headers 16 | import pekko.stream.connectors.s3.S3Settings 17 | import pekko.stream.connectors.s3.SuccessfulUploadPart 18 | import pekko.stream.scaladsl.Flow 19 | import pekko.stream.scaladsl.Sink 20 | 21 | class BackupClientChunkState[T <: KafkaConsumerInterface](maybeS3Settings: Option[S3Settings])(implicit 22 | override val kafkaClientInterface: T, 23 | override val backupConfig: Backup, 24 | override val system: ActorSystem, 25 | s3Config: S3Config, 26 | s3Headers: S3Headers 27 | ) extends BackupClient[T](maybeS3Settings) { 28 | val processedChunks: ConcurrentLinkedQueue[SuccessfulUploadPart] = new ConcurrentLinkedQueue[SuccessfulUploadPart]() 29 | 30 | override def successSink 31 | : Sink[(SuccessfulUploadPart, immutable.Iterable[kafkaClientInterface.CursorContext]), Future[Done]] = 32 | Flow[(SuccessfulUploadPart, immutable.Iterable[kafkaClientInterface.CursorContext])] 33 | .alsoTo(Sink.foreach { case (part, _) => 34 | processedChunks.add(part) 35 | }) 36 | .to(super.successSink) 37 | .mapMaterializedValue(_ => Future.successful(Done)) 38 | } 39 | -------------------------------------------------------------------------------- /backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/BackupClientSpec.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup.s3 2 | 3 | import com.softwaremill.diffx.generic.auto._ 4 | import com.softwaremill.diffx.scalatest.DiffMustMatcher._ 5 | import io.aiven.guardian.kafka.Generators._ 6 | import io.aiven.guardian.kafka.backup.configs.PeriodFromFirst 7 | import io.aiven.guardian.kafka.codecs.Circe._ 8 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord 9 | import io.aiven.guardian.kafka.s3.Generators._ 10 | import io.aiven.guardian.kafka.s3.S3Spec 11 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config} 12 | import org.apache.pekko 13 | import org.mdedetrich.pekko.stream.support.CirceStreamSupport 14 | import org.scalatest.BeforeAndAfterAll 15 | import org.scalatest.TestData 16 | import org.scalatest.matchers.must.Matchers 17 | 18 | import scala.concurrent.ExecutionContext 19 | import scala.concurrent.Future 20 | import scala.concurrent.duration._ 21 | import scala.language.postfixOps 22 | 23 | import java.time.OffsetDateTime 24 | 25 | import pekko.stream.connectors.s3.scaladsl.S3 26 | import pekko.stream.scaladsl.Keep 27 | import pekko.stream.scaladsl.Sink 28 | import pekko.stream.scaladsl.Source 29 | 30 | trait BackupClientSpec extends S3Spec with Matchers with BeforeAndAfterAll { 31 | 32 | val ThrottleElements: Int = 100 33 | val ThrottleAmount: FiniteDuration = 1 millis 34 | 35 | property("backup method completes flow correctly for all valid Kafka events") { implicit td: TestData => 36 | forAll(kafkaDataWithTimePeriodsGen(), s3ConfigGen(useVirtualDotHost, bucketPrefix)) { 37 | (kafkaDataWithTimePeriod: KafkaDataWithTimePeriod, s3Config: S3Config) => 38 | logger.info(s"Data bucket is ${s3Config.dataBucket}") 39 | val backupClient = new MockedS3BackupClientInterface( 40 | Source(kafkaDataWithTimePeriod.data).throttle(ThrottleElements, ThrottleAmount), 41 | PeriodFromFirst(kafkaDataWithTimePeriod.periodSlice), 42 | s3Config, 43 | Some(s3Settings) 44 | ) 45 | 46 | val delay = 47 | (ThrottleAmount * (kafkaDataWithTimePeriod.data.size / ThrottleElements) * 1.2) + (10 millis) match { 48 | case fd: FiniteDuration => fd 49 | case _: Duration.Infinite => throw new Exception("Expected Finite Duration") 50 | } 51 | 52 | val calculatedFuture = for { 53 | _ <- createBucket(s3Config.dataBucket) 54 | _ <- backupClient.backup.run() 55 | _ <- pekko.pattern.after(delay)(Future.successful(())) 56 | bucketContents <- 57 | S3.listBucket(s3Config.dataBucket, None, s3Headers) 58 | .withAttributes(s3Attrs) 59 | .toMat(Sink.collection)(Keep.right) 60 | .run() 61 | keysWithRecords <- Future.sequence(bucketContents.map { bucketContents => 62 | S3.getObject(s3Config.dataBucket, bucketContents.key) 63 | .withAttributes(s3Attrs) 64 | .via(CirceStreamSupport.decode[List[Option[ReducedConsumerRecord]]]) 65 | .toMat(Sink.collection)(Keep.right) 66 | .run() 67 | .map(list => (bucketContents.key, list.flatten))(ExecutionContext.parasitic) 68 | }) 69 | sorted = keysWithRecords.toList.sortBy { case (key, _) => 70 | val date = key.replace(".json", "") 71 | OffsetDateTime.parse(date).toEpochSecond 72 | }(Ordering[Long].reverse) 73 | flattened = sorted.flatMap { case (_, records) => records } 74 | } yield flattened.collect { case Some(reducedConsumerRecord) => 75 | reducedConsumerRecord 76 | } 77 | val observed = calculatedFuture.futureValue 78 | 79 | kafkaDataWithTimePeriod.data.containsSlice(observed) mustEqual true 80 | if (observed.nonEmpty) { 81 | observed.head mustMatchTo (kafkaDataWithTimePeriod.data.head) 82 | } 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/KafkaConsumerWithKillSwitch.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup.s3 2 | 3 | import io.aiven.guardian.kafka.backup.KafkaConsumer 4 | import io.aiven.guardian.kafka.backup.configs.Backup 5 | import io.aiven.guardian.kafka.configs.KafkaCluster 6 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord 7 | import org.apache.pekko 8 | 9 | import pekko.actor.ActorSystem 10 | import pekko.kafka.CommitterSettings 11 | import pekko.kafka.ConsumerMessage 12 | import pekko.kafka.ConsumerSettings 13 | import pekko.kafka.scaladsl.Consumer 14 | import pekko.stream.SharedKillSwitch 15 | import pekko.stream.scaladsl.SourceWithContext 16 | 17 | class KafkaConsumerWithKillSwitch( 18 | configureConsumer: Option[ 19 | ConsumerSettings[Array[Byte], Array[Byte]] => ConsumerSettings[Array[Byte], Array[Byte]] 20 | ] = None, 21 | configureCommitter: Option[ 22 | CommitterSettings => CommitterSettings 23 | ] = None, 24 | killSwitch: SharedKillSwitch 25 | )(implicit system: ActorSystem, kafkaClusterConfig: KafkaCluster, backupConfig: Backup) 26 | extends KafkaConsumer(configureConsumer, configureCommitter) { 27 | override def getSource 28 | : SourceWithContext[ReducedConsumerRecord, ConsumerMessage.CommittableOffset, Consumer.Control] = 29 | super.getSource.via(killSwitch.flow) 30 | } 31 | -------------------------------------------------------------------------------- /backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/MinioBackupClientSpec.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup.s3 2 | 3 | import io.aiven.guardian.kafka.s3.MinioS3Test 4 | import io.aiven.guardian.pekko.AnyPropTestKit 5 | import org.apache.pekko.actor.ActorSystem 6 | 7 | class MinioBackupClientSpec 8 | extends AnyPropTestKit(ActorSystem("MinioS3BackupClientSpec")) 9 | with BackupClientSpec 10 | with MinioS3Test { 11 | 12 | /** Since Minio doesn't do DNS name verification we can enable this 13 | */ 14 | override lazy val useVirtualDotHost: Boolean = true 15 | } 16 | -------------------------------------------------------------------------------- /backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/MockedKafkaClientBackupConsumerSpec.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup.s3 2 | 3 | import com.softwaremill.diffx.scalatest.DiffMustMatcher._ 4 | import io.aiven.guardian.kafka.Generators._ 5 | import io.aiven.guardian.kafka.Utils 6 | import io.aiven.guardian.kafka.backup.MockedBackupClientInterface 7 | import io.aiven.guardian.kafka.backup.MockedKafkaConsumerInterface 8 | import io.aiven.guardian.kafka.backup.configs.Backup 9 | import io.aiven.guardian.kafka.backup.configs.PeriodFromFirst 10 | import io.aiven.guardian.kafka.codecs.Circe._ 11 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord 12 | import io.aiven.guardian.kafka.s3.Generators._ 13 | import io.aiven.guardian.kafka.s3.S3Spec 14 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config} 15 | import io.aiven.guardian.pekko.AnyPropTestKit 16 | import org.apache.pekko 17 | import org.mdedetrich.pekko.stream.support.CirceStreamSupport 18 | import org.scalatest.TestData 19 | import org.scalatest.matchers.must.Matchers 20 | 21 | import scala.concurrent.ExecutionContext 22 | import scala.concurrent.Future 23 | import scala.concurrent.duration.FiniteDuration 24 | import scala.concurrent.duration._ 25 | import scala.language.postfixOps 26 | 27 | import pekko.actor.ActorSystem 28 | import pekko.stream.connectors.s3.S3Settings 29 | import pekko.stream.connectors.s3.scaladsl.S3 30 | import pekko.stream.scaladsl.Sink 31 | import pekko.stream.scaladsl.Source 32 | 33 | class MockedKafkaClientBackupConsumerSpec 34 | extends AnyPropTestKit(ActorSystem("MockedKafkaClientBackupClientSpec")) 35 | with S3Spec 36 | with Matchers { 37 | override lazy val s3Settings: S3Settings = S3Settings() 38 | 39 | /** Virtual Dot Host in bucket names are disabled because you need an actual DNS certificate otherwise AWS will fail 40 | * on bucket creation 41 | */ 42 | override lazy val useVirtualDotHost: Boolean = false 43 | override lazy val bucketPrefix: Option[String] = Some("guardian-") 44 | override lazy val enableCleanup: Option[FiniteDuration] = Some(5 seconds) 45 | 46 | property( 47 | "Creating many objects in a small period of time works despite S3's in progress multipart upload eventual consistency issues" 48 | ) { implicit td: TestData => 49 | forAll( 50 | kafkaDataWithTimePeriodsGen(20, 51 | 20, 52 | padTimestampsMillis = Range.inclusive(1000, 1000), 53 | trailingSentinelValue = true 54 | ), 55 | s3ConfigGen(useVirtualDotHost, bucketPrefix) 56 | ) { (kafkaDataWithTimePeriod: KafkaDataWithTimePeriod, s3Config: S3Config) => 57 | logger.info(s"Data bucket is ${s3Config.dataBucket}") 58 | val data = kafkaDataWithTimePeriod.data 59 | 60 | implicit val config: S3Config = s3Config 61 | implicit val backupConfig: Backup = 62 | Backup(MockedBackupClientInterface.KafkaGroupId, PeriodFromFirst(1 second), 10 seconds, None) 63 | 64 | val backupClient = 65 | new BackupClient(Some(s3Settings))(new MockedKafkaConsumerInterface(Source(data)), 66 | implicitly, 67 | implicitly, 68 | implicitly, 69 | implicitly 70 | ) 71 | 72 | val calculatedFuture = for { 73 | _ <- createBucket(s3Config.dataBucket) 74 | _ = backupClient.backup.run() 75 | bucketContents <- pekko.pattern.after(10 seconds)( 76 | S3.listBucket(s3Config.dataBucket, None).withAttributes(s3Attrs).runWith(Sink.seq) 77 | ) 78 | keysSorted = bucketContents.map(_.key).sortBy(Utils.keyToOffsetDateTime) 79 | downloaded <- 80 | Future 81 | .sequence(keysSorted.map { key => 82 | S3.getObject(s3Config.dataBucket, key) 83 | .withAttributes(s3Attrs) 84 | .via(CirceStreamSupport.decode[List[Option[ReducedConsumerRecord]]]) 85 | .runWith(Sink.seq) 86 | }) 87 | .map(_.flatten)(ExecutionContext.parasitic) 88 | 89 | } yield downloaded.flatten.collect { case Some(reducedConsumerRecord) => 90 | reducedConsumerRecord 91 | } 92 | 93 | val downloaded = calculatedFuture.futureValue 94 | 95 | // Only care about ordering when it comes to key 96 | val downloadedGroupedAsKey = downloaded 97 | .groupBy(_.key) 98 | .view 99 | .mapValues { reducedConsumerRecords => 100 | reducedConsumerRecords.map(_.value) 101 | } 102 | .toMap 103 | 104 | val inputAsKey = data 105 | .dropRight(1) // Drop the generated sentinel value which we don't care about 106 | .groupBy(_.key) 107 | .view 108 | .mapValues { reducedConsumerRecords => 109 | reducedConsumerRecords.map(_.value) 110 | } 111 | .toMap 112 | 113 | downloadedGroupedAsKey mustMatchTo inputAsKey 114 | } 115 | } 116 | 117 | } 118 | -------------------------------------------------------------------------------- /backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/MockedS3BackupClientInterface.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup.s3 2 | 3 | import io.aiven.guardian.kafka.backup.MockedBackupClientInterface 4 | import io.aiven.guardian.kafka.backup.MockedKafkaConsumerInterface 5 | import io.aiven.guardian.kafka.backup.configs.Backup 6 | import io.aiven.guardian.kafka.backup.configs.TimeConfiguration 7 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord 8 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config} 9 | import org.apache.pekko 10 | 11 | import scala.concurrent.duration._ 12 | import scala.language.postfixOps 13 | 14 | import pekko.NotUsed 15 | import pekko.actor.ActorSystem 16 | import pekko.stream.connectors.s3.S3Headers 17 | import pekko.stream.connectors.s3.S3Settings 18 | import pekko.stream.scaladsl.Source 19 | 20 | class MockedS3BackupClientInterface( 21 | kafkaData: Source[ReducedConsumerRecord, NotUsed], 22 | timeConfiguration: TimeConfiguration, 23 | s3Config: S3Config, 24 | maybeS3Settings: Option[S3Settings] 25 | )(implicit val s3Headers: S3Headers, system: ActorSystem) 26 | extends BackupClient( 27 | maybeS3Settings 28 | )( 29 | new MockedKafkaConsumerInterface(kafkaData), 30 | Backup(MockedBackupClientInterface.KafkaGroupId, timeConfiguration, 10 seconds, None), 31 | implicitly, 32 | s3Config, 33 | implicitly 34 | ) 35 | -------------------------------------------------------------------------------- /backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/RealS3BackupClientSpec.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup.s3 2 | 3 | import io.aiven.guardian.kafka.backup.configs.Compression 4 | import io.aiven.guardian.pekko.AnyPropTestKit 5 | import org.apache.pekko.actor.ActorSystem 6 | 7 | class RealS3BackupClientSpec extends AnyPropTestKit(ActorSystem("RealS3BackupClientSpec")) with RealS3BackupClientTest { 8 | override val compression: Option[Compression] = None 9 | } 10 | -------------------------------------------------------------------------------- /backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/RealS3GzipCompressionBackupClientSpec.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup.s3 2 | 3 | import io.aiven.guardian.kafka.backup.configs.Compression 4 | import io.aiven.guardian.kafka.models.Gzip 5 | import io.aiven.guardian.pekko.AnyPropTestKit 6 | import org.apache.pekko.actor.ActorSystem 7 | 8 | class RealS3GzipCompressionBackupClientSpec 9 | extends AnyPropTestKit(ActorSystem("RealS3GzipCompressionBackupClientSpec")) 10 | with RealS3BackupClientTest { 11 | override val compression: Option[Compression] = Some(Compression(Gzip, None)) 12 | } 13 | -------------------------------------------------------------------------------- /cli-backup/src/main/scala/io/aiven/guardian/kafka/backup/App.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup 2 | 3 | import com.typesafe.scalalogging.LazyLogging 4 | import io.aiven.guardian.kafka.backup.BackupClientInterface 5 | import io.aiven.guardian.kafka.backup.KafkaConsumer 6 | import io.aiven.guardian.kafka.backup.KafkaConsumerInterface 7 | import org.apache.pekko 8 | 9 | import scala.concurrent.ExecutionContext 10 | import scala.concurrent.Future 11 | 12 | import pekko.Done 13 | import pekko.actor.ActorSystem 14 | import pekko.kafka.scaladsl.Consumer 15 | import pekko.stream.ActorAttributes 16 | import pekko.stream.Supervision 17 | 18 | trait App[T <: KafkaConsumerInterface] extends LazyLogging { 19 | implicit val kafkaClient: T 20 | implicit val backupClient: BackupClientInterface[KafkaConsumer] 21 | implicit val actorSystem: ActorSystem 22 | implicit val executionContext: ExecutionContext 23 | 24 | def run(): Consumer.Control = { 25 | val decider: Supervision.Decider = { e => 26 | logger.error("Unhandled exception in stream", e) 27 | Supervision.Stop 28 | } 29 | 30 | backupClient.backup.withAttributes(ActorAttributes.supervisionStrategy(decider)).run() 31 | } 32 | 33 | def shutdown(control: Consumer.Control): Future[Done] = { 34 | logger.info("Shutdown of Guardian detected") 35 | val future = control.shutdown() 36 | future.onComplete(_ => logger.info("Guardian shut down")) 37 | future 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /cli-backup/src/main/scala/io/aiven/guardian/kafka/backup/BackupApp.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup 2 | 3 | import io.aiven.guardian.cli.PekkoSettings 4 | import io.aiven.guardian.kafka.backup.KafkaConsumer 5 | import io.aiven.guardian.kafka.backup.{Config => BackupConfig} 6 | import io.aiven.guardian.kafka.{Config => KafkaConfig} 7 | 8 | trait BackupApp extends BackupConfig with KafkaConfig with PekkoSettings { 9 | implicit lazy val kafkaClient: KafkaConsumer = new KafkaConsumer() 10 | } 11 | -------------------------------------------------------------------------------- /cli-backup/src/main/scala/io/aiven/guardian/kafka/backup/S3App.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup 2 | 3 | import io.aiven.guardian.kafka.backup.KafkaConsumer 4 | import io.aiven.guardian.kafka.backup.s3.BackupClient 5 | import io.aiven.guardian.kafka.s3.{Config => S3Config} 6 | import org.apache.pekko 7 | 8 | import pekko.stream.connectors.s3.S3Settings 9 | 10 | trait S3App extends S3Config with BackupApp with App[KafkaConsumer] { 11 | lazy val s3Settings: S3Settings = S3Settings() 12 | implicit lazy val backupClient: BackupClient[KafkaConsumer] = new BackupClient[KafkaConsumer](Some(s3Settings)) 13 | } 14 | -------------------------------------------------------------------------------- /cli-backup/src/test/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | [%highlight(%-5level)] %d{HH:mm:ss.SSS} %logger{0} - %msg%n 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /cli-backup/src/test/scala/io/aiven/guardian/kafka/backup/CliSpec.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup 2 | 3 | import com.typesafe.scalalogging.StrictLogging 4 | import io.aiven.guardian.kafka.backup.configs.ChronoUnitSlice 5 | import io.aiven.guardian.kafka.backup.configs.Compression 6 | import io.aiven.guardian.kafka.backup.configs.{Backup => BackupConfig} 7 | import io.aiven.guardian.kafka.configs.{KafkaCluster => KafkaClusterConfig} 8 | import io.aiven.guardian.kafka.models.Gzip 9 | import markatta.futiles.CancellableFuture 10 | import org.apache.pekko 11 | import org.scalatest.concurrent.ScalaFutures 12 | import org.scalatest.matchers.must.Matchers 13 | import org.scalatest.propspec.AnyPropSpecLike 14 | 15 | import scala.annotation.nowarn 16 | import scala.concurrent.ExecutionContext 17 | import scala.concurrent.Future 18 | import scala.concurrent.Promise 19 | import scala.concurrent.duration._ 20 | import scala.language.postfixOps 21 | 22 | import java.time.temporal.ChronoUnit 23 | import java.util.concurrent.TimeUnit 24 | 25 | import pekko.actor.ActorSystem 26 | import pekko.testkit.TestKit 27 | 28 | @nowarn("msg=method main in class CommandApp is deprecated") 29 | class CliSpec 30 | extends TestKit(ActorSystem("BackupCliSpec")) 31 | with AnyPropSpecLike 32 | with Matchers 33 | with ScalaFutures 34 | with StrictLogging { 35 | implicit val ec: ExecutionContext = system.dispatcher 36 | implicit override val patienceConfig: PatienceConfig = PatienceConfig(5 minutes, 100 millis) 37 | 38 | property("Command line args are properly passed into application") { 39 | val groupId = "my-consumer-group" 40 | val topic = "topic" 41 | val bootstrapServer = "localhost:9092" 42 | val dataBucket = "backup-bucket" 43 | 44 | val args = List( 45 | "--storage", 46 | "s3", 47 | "--kafka-topics", 48 | topic, 49 | "--kafka-bootstrap-servers", 50 | bootstrapServer, 51 | "--s3-data-bucket", 52 | dataBucket, 53 | "--kafka-group-id", 54 | groupId, 55 | "--chrono-unit-slice", 56 | "hours", 57 | "--commit-timeout-buffer-window", 58 | "1 second", 59 | "gzip", 60 | "--compression-level", 61 | "5" 62 | ) 63 | 64 | val cancellable = CancellableFuture { 65 | Main.main(args.toArray) 66 | } 67 | 68 | def checkUntilMainInitialized(main: io.aiven.guardian.kafka.backup.Entry): Future[(App[_], Promise[Unit])] = 69 | main.initializedApp.get() match { 70 | case Some((app, promise)) => Future.successful((app, promise)) 71 | case None => pekko.pattern.after(100 millis)(checkUntilMainInitialized(main)) 72 | } 73 | 74 | val (app, promise) = checkUntilMainInitialized(Main).futureValue 75 | 76 | cancellable.cancel() 77 | promise.success(()) 78 | 79 | app match { 80 | case s3App: S3App => 81 | s3App.backupConfig mustEqual BackupConfig(groupId, 82 | ChronoUnitSlice(ChronoUnit.HOURS), 83 | FiniteDuration(1, TimeUnit.SECONDS), 84 | Some(Compression(Gzip, Some(5))) 85 | ) 86 | s3App.kafkaClusterConfig mustEqual KafkaClusterConfig(Set(topic)) 87 | s3App.kafkaClient.consumerSettings.getProperty("bootstrap.servers") mustEqual bootstrapServer 88 | s3App.s3Config.dataBucket mustEqual dataBucket 89 | case _ => 90 | fail("Expected an App to be initialized") 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /cli-compaction/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/guardian-for-apache-kafka/9fadf3388140820b161cf28744d1587b91bf0776/cli-compaction/.gitkeep -------------------------------------------------------------------------------- /cli-restore/src/main/scala/io/aiven/guardian/kafka/restore/App.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.restore 2 | 3 | import com.typesafe.scalalogging.LazyLogging 4 | import io.aiven.guardian.kafka.restore.KafkaProducer 5 | import io.aiven.guardian.kafka.restore.s3.RestoreClient 6 | import org.apache.pekko 7 | 8 | import scala.concurrent.Future 9 | 10 | import pekko.Done 11 | import pekko.actor.ActorSystem 12 | import pekko.stream.ActorAttributes 13 | import pekko.stream.KillSwitch 14 | import pekko.stream.Supervision 15 | import pekko.stream.UniqueKillSwitch 16 | 17 | trait App extends LazyLogging { 18 | implicit val kafkaProducer: KafkaProducer 19 | implicit val restoreClient: RestoreClient[KafkaProducer] 20 | implicit val actorSystem: ActorSystem 21 | 22 | val decider: Supervision.Decider = { e => 23 | logger.error("Unhandled exception in stream", e) 24 | Supervision.Stop 25 | } 26 | 27 | def run(): (UniqueKillSwitch, Future[Done]) = 28 | restoreClient.restore.withAttributes(ActorAttributes.supervisionStrategy(decider)).run() 29 | 30 | def shutdown(killSwitch: KillSwitch): Unit = { 31 | logger.info("Shutdown of Guardian detected") 32 | killSwitch.shutdown() 33 | logger.info("Guardian shut down") 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /cli-restore/src/main/scala/io/aiven/guardian/kafka/restore/RestoreApp.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.restore 2 | 3 | import io.aiven.guardian.cli.PekkoSettings 4 | import io.aiven.guardian.kafka.restore.KafkaProducer 5 | import io.aiven.guardian.kafka.restore.{Config => RestoreConfig} 6 | import io.aiven.guardian.kafka.{Config => KafkaConfig} 7 | 8 | trait RestoreApp extends RestoreConfig with KafkaConfig with PekkoSettings { 9 | implicit lazy val kafkaProducer: KafkaProducer = new KafkaProducer() 10 | } 11 | -------------------------------------------------------------------------------- /cli-restore/src/main/scala/io/aiven/guardian/kafka/restore/S3App.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.restore 2 | 3 | import com.typesafe.scalalogging.LazyLogging 4 | import io.aiven.guardian.kafka.restore.s3.RestoreClient 5 | import io.aiven.guardian.kafka.s3.{Config => S3Config} 6 | import org.apache.pekko 7 | 8 | import pekko.stream.ActorAttributes 9 | import pekko.stream.Attributes 10 | import pekko.stream.Supervision 11 | import pekko.stream.connectors.s3.S3Settings 12 | 13 | trait S3App extends S3Config with RestoreApp with App with LazyLogging { 14 | lazy val s3Settings: S3Settings = S3Settings() 15 | implicit lazy val restoreClient: RestoreClient[KafkaProducer] = 16 | new RestoreClient[KafkaProducer](Some(s3Settings)) { 17 | override val maybeAttributes: Some[Attributes] = { 18 | val decider: Supervision.Decider = { e => 19 | logger.error("Unhandled exception in stream", e) 20 | Supervision.Stop 21 | } 22 | 23 | Some(ActorAttributes.supervisionStrategy(decider)) 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /cli-restore/src/test/scala/io/aiven/guardian/kafka/restore/CliSpec.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.restore 2 | 3 | import io.aiven.guardian.kafka.configs.{KafkaCluster => KafkaClusterConfig} 4 | import io.aiven.guardian.kafka.restore.configs.{Restore => RestoreConfig} 5 | import org.apache.kafka.clients.producer.ProducerConfig 6 | import org.scalatest.matchers.must.Matchers 7 | import org.scalatest.propspec.AnyPropSpec 8 | 9 | import scala.annotation.nowarn 10 | import scala.jdk.CollectionConverters._ 11 | 12 | import java.time.Instant 13 | import java.time.ZoneId 14 | 15 | @nowarn("msg=method main in class CommandApp is deprecated") 16 | class CliSpec extends AnyPropSpec with Matchers { 17 | 18 | property("Command line args are properly passed into application") { 19 | val bootstrapServer = "localhost:9092" 20 | val fromWhen = Instant.ofEpochMilli(0).atZone(ZoneId.of("UTC")).toOffsetDateTime 21 | val topic1 = "topic-1" 22 | val topic2 = "topic-2" 23 | val restoreTopicOne = s"restore-$topic1" 24 | val restoreTopicTwo = s"restore-$topic2" 25 | val overrideTopicOne = s"$topic1:$restoreTopicOne" 26 | val overrideTopicTwo = s"$topic2:$restoreTopicTwo" 27 | val dataBucket = "backup-bucket" 28 | 29 | val args = List( 30 | "--storage", 31 | "s3", 32 | "--kafka-topics", 33 | topic1, 34 | "--kafka-topics", 35 | topic2, 36 | "--kafka-bootstrap-servers", 37 | bootstrapServer, 38 | "--s3-data-bucket", 39 | dataBucket, 40 | "--from-when", 41 | fromWhen.toString, 42 | "--override-topics", 43 | overrideTopicOne, 44 | "--override-topics", 45 | overrideTopicTwo, 46 | "--single-message-per-kafka-request" 47 | ) 48 | 49 | try Main.main(args.toArray) 50 | catch { 51 | case _: Throwable => 52 | } 53 | Main.initializedApp.get() match { 54 | case Some(s3App: S3App) => 55 | s3App.restoreConfig mustEqual RestoreConfig(Some(fromWhen), 56 | Some( 57 | Map( 58 | topic1 -> restoreTopicOne, 59 | topic2 -> restoreTopicTwo 60 | ) 61 | ) 62 | ) 63 | s3App.kafkaClusterConfig mustEqual KafkaClusterConfig(Set(topic1, topic2)) 64 | s3App.kafkaProducer.producerSettings.getProperties.get("bootstrap.servers") mustEqual bootstrapServer 65 | s3App.s3Config.dataBucket mustEqual dataBucket 66 | (Map( 67 | ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG -> true.toString, 68 | ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION -> 1.toString, 69 | ProducerConfig.BATCH_SIZE_CONFIG -> 0.toString 70 | ): Map[String, AnyRef]).toSet 71 | .subsetOf(s3App.kafkaProducer.producerSettings.getProperties.asScala.toMap.toSet) mustEqual true 72 | s3App.kafkaProducer.producerSettings.parallelism mustEqual 1 73 | case _ => 74 | fail("Expected an App to be initialized") 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /compaction-gcs/src/main/resources/reference.conf: -------------------------------------------------------------------------------- 1 | storage-config-gcs { 2 | parallel-object-download-limit = 10 3 | parallel-object-download-limit = ${?STORAGE_CONFIG_GCS_PARALLEL_OBJECT_DOWNLOAD_LIMIT} 4 | } 5 | -------------------------------------------------------------------------------- /compaction-gcs/src/main/scala/io/aiven/guardian/kafka/compaction/gcs/Config.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.compaction.gcs 2 | 3 | import io.aiven.guardian.kafka.compaction.gcs.models.StorageConfig 4 | import pureconfig.ConfigSource 5 | import pureconfig.generic.auto._ 6 | 7 | import scala.annotation.nowarn 8 | 9 | trait Config { 10 | @nowarn("cat=lint-byname-implicit") 11 | implicit lazy val storageConfigGCS: StorageConfig = 12 | ConfigSource.default.at("storage-config-gcs").loadOrThrow[StorageConfig] 13 | } 14 | 15 | object Config extends Config 16 | -------------------------------------------------------------------------------- /compaction-gcs/src/main/scala/io/aiven/guardian/kafka/compaction/gcs/StorageClient.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.compaction.gcs 2 | 3 | import com.typesafe.scalalogging.LazyLogging 4 | import io.aiven.guardian.kafka.compaction.StorageInterface 5 | import io.aiven.guardian.kafka.compaction.gcs.models.StorageConfig 6 | import io.aiven.guardian.kafka.gcs.errors.GCSErrors 7 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord 8 | import org.apache.pekko 9 | 10 | import scala.annotation.nowarn 11 | 12 | import pekko.NotUsed 13 | import pekko.stream.connectors.googlecloud.storage.scaladsl.GCStorage 14 | import pekko.stream.scaladsl.Source 15 | 16 | class StorageClient(bucketName: String, maybePrefix: Option[String])(implicit storageConfig: StorageConfig) 17 | extends StorageInterface 18 | with LazyLogging { 19 | 20 | /** Retrieve Kafka data from a given storage source 21 | * 22 | * @return 23 | */ 24 | @throws(classOf[GCSErrors.ExpectedObjectToExist]) 25 | override def retrieveKafkaData: Source[ReducedConsumerRecord, NotUsed] = { 26 | 27 | @nowarn("msg=is never used") 28 | // TODO filter the correct buckets to retrieve 29 | val byteStringSource = GCStorage 30 | .listBucket(bucketName, maybePrefix, versions = false) 31 | .flatMapMerge( 32 | storageConfig.parallelObjectDownloadLimit, 33 | storageObject => 34 | GCStorage 35 | .download(bucketName, storageObject.name) 36 | .map( 37 | _.getOrElse( 38 | throw GCSErrors.ExpectedObjectToExist(bucketName, maybePrefix) 39 | ) 40 | ) 41 | ) 42 | 43 | // TODO serialization from raw bytes to Kafka Topic Format 44 | ??? 45 | } 46 | 47 | /** Checks whether the storage exists and is accessible 48 | */ 49 | def checkStorageAccessible: Source[Boolean, NotUsed] = 50 | GCStorage.getBucketSource(bucketName).map(_.isDefined).map { 51 | case false => 52 | logger.error(s"Failed accessing GCS $bucketName") 53 | false 54 | case true => 55 | logger.info(s"Successfully accessed GCS $bucketName") 56 | true 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /compaction-gcs/src/main/scala/io/aiven/guardian/kafka/compaction/gcs/models/StorageConfig.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.compaction.gcs.models 2 | 3 | final case class StorageConfig(parallelObjectDownloadLimit: Int) 4 | -------------------------------------------------------------------------------- /compaction-s3/src/main/resources/reference.conf: -------------------------------------------------------------------------------- 1 | storage-config-s3 { 2 | parallel-object-download-limit = 10 3 | parallel-object-download-limit = ${?STORAGE_CONFIG_S3_PARALLEL_OBJECT_DOWNLOAD_LIMIT} 4 | } 5 | -------------------------------------------------------------------------------- /compaction-s3/src/main/scala/io/aiven/guardian/kafka/compaction/s3/Config.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.compaction.s3 2 | 3 | import io.aiven.guardian.kafka.compaction.s3.models.StorageConfig 4 | import pureconfig.ConfigSource 5 | import pureconfig.generic.auto._ 6 | 7 | import scala.annotation.nowarn 8 | 9 | trait Config { 10 | @nowarn("cat=lint-byname-implicit") 11 | implicit lazy val storageConfigS3: StorageConfig = 12 | ConfigSource.default.at("storage-config-s3").loadOrThrow[StorageConfig] 13 | } 14 | 15 | object Config extends Config 16 | -------------------------------------------------------------------------------- /compaction-s3/src/main/scala/io/aiven/guardian/kafka/compaction/s3/StorageClient.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.compaction.s3 2 | 3 | import com.typesafe.scalalogging.LazyLogging 4 | import io.aiven.guardian.kafka.compaction.StorageInterface 5 | import io.aiven.guardian.kafka.compaction.s3.models.StorageConfig 6 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord 7 | import io.aiven.guardian.kafka.s3.errors.S3Errors 8 | import org.apache.pekko 9 | 10 | import scala.annotation.nowarn 11 | 12 | import pekko.NotUsed 13 | import pekko.stream.connectors.s3.BucketAccess 14 | import pekko.stream.connectors.s3.S3Headers 15 | import pekko.stream.connectors.s3.scaladsl.S3 16 | import pekko.stream.scaladsl.Source 17 | 18 | class StorageClient(bucketName: String, prefix: Option[String], s3Headers: S3Headers)(implicit 19 | storageConfig: StorageConfig 20 | ) extends StorageInterface 21 | with LazyLogging { 22 | 23 | /** Retrieve Kafka data from a given storage source 24 | * 25 | * @return 26 | */ 27 | @throws(classOf[S3Errors.ExpectedObjectToExist]) 28 | override def retrieveKafkaData: Source[ReducedConsumerRecord, NotUsed] = { 29 | // TODO filter the correct buckets to retrieve 30 | @nowarn("msg=is never used") 31 | val byteStringSource = S3 32 | .listBucket(bucketName, prefix, s3Headers) 33 | .flatMapMerge( 34 | storageConfig.parallelObjectDownloadLimit, 35 | bucketDetails => S3.getObject(bucketName, bucketDetails.key, None, None, s3Headers) 36 | ) 37 | 38 | // TODO serialization from raw bytes to Kafka Topic Format 39 | ??? 40 | } 41 | 42 | /** Checks whether the storage exists and is accessible 43 | */ 44 | def checkStorageAccessible: Source[Boolean, NotUsed] = 45 | S3.checkIfBucketExistsSource(bucketName, s3Headers).map { 46 | case e @ (BucketAccess.AccessDenied | BucketAccess.NotExists) => 47 | logger.error(s"Accessing S3 $bucketName gave ${e.toString}") 48 | false 49 | case BucketAccess.AccessGranted => 50 | logger.info(s"Successfully accessed S3 $bucketName") 51 | true 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /compaction-s3/src/main/scala/io/aiven/guardian/kafka/compaction/s3/models/StorageConfig.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.compaction.s3.models 2 | 3 | final case class StorageConfig(parallelObjectDownloadLimit: Int) 4 | -------------------------------------------------------------------------------- /core-backup/src/main/resources/reference.conf: -------------------------------------------------------------------------------- 1 | pekko.kafka.consumer = { 2 | poll-interval = ${?PEKKO_KAFKA_CONSUMER_POLL_INTERVAL} 3 | poll-timeout = ${?PEKKO_KAFKA_CONSUMER_POLL_TIMEOUT} 4 | stop-timeout = ${?PEKKO_KAFKA_CONSUMER_STOP_TIMEOUT} 5 | close-timeout = ${?PEKKO_KAFKA_CONSUMER_CLOSE_TIMEOUT} 6 | commit-time-warning = ${?PEKKO_KAFKA_CONSUMER_COMMIT_TIME_WARNING} 7 | commit-refresh-interval = ${?PEKKO_KAFKA_CONSUMER_COMMIT_REFRESH_INTERVAL} 8 | use-dispatcher = ${?PEKKO_KAFKA_CONSUMER_USE_DISPATCHER} 9 | wait-close-partition = ${?PEKKO_KAFKA_CONSUMER_WAIT_CLOSE_PARTITION} 10 | position-timeout = ${?PEKKO_KAFKA_CONSUMER_POSITION_TIMEOUT} 11 | offset-for-times-timeout = ${?PEKKO_KAFKA_OFFSET_FOR_TIMES_TIMEOUT} 12 | metadata-request-timeout = ${?PEKKO_KAFKA_METADATA_REQUEST_TIMEOUT} 13 | eos-draining-check-interval = ${?PEKKO_KAFKA_CONSUMER_EOS_DRAINING_CHECK_INTERVAL} 14 | connection-checker = { 15 | enable = ${?PEKKO_KAFKA_CONSUMER_CONNECTION_CHECKER_ENABLE} 16 | max-retries = ${?PEKKO_KAFKA_CONSUMER_CONNECTION_CHECKER_MAX_RETRIES} 17 | backoff-factor = ${?PEKKO_KAFKA_CONSUMER_CONNECTION_CHECKER_BACKOFF_FACTOR} 18 | check-interval = ${?PEKKO_KAFKA_CONSUMER_CONNECTION_CHECKER_CHECK_INTERVAL} 19 | } 20 | partition-handler-warning = ${?PEKKO_KAFKA_CONSUMER_PARTITION_HANDLER_WARNING} 21 | offset-reset-protection = { 22 | enable = ${?PEKKO_KAFKA_CONSUMER_OFFSET_RESET_PROTECTION_ENABLE} 23 | offset-threshold = ${?PEKKO_KAFKA_CONSUMER_OFFSET_RESET_PROTECTION_OFFSET_THRESHOLD} 24 | time-threshold = ${?PEKKO_KAFKA_CONSUMER_OFFSET_RESET_PROTECTION_TIME_THRESHOLD} 25 | } 26 | } 27 | 28 | pekko.kafka.committer = { 29 | max-batch = 100000 30 | max-batch = ${?PEKKO_KAFKA_COMMITTER_MAX_BATCH} 31 | max-interval = 1 hour 32 | max-interval = ${?PEKKO_KAFKA_COMMITTER_MAX_INTERVAL} 33 | parallelism = ${?PEKKO_KAFKA_COMMITTER_PARALLELISM} 34 | parallelism = 10000 35 | } 36 | 37 | backup { 38 | kafka-group-id = ${?BACKUP_KAFKA_GROUP_ID} 39 | time-configuration = { 40 | type = chrono-unit-slice 41 | type = ${?BACKUP_TIME_CONFIGURATION_TYPE} 42 | chrono-unit = hours 43 | chrono-unit = ${?BACKUP_TIME_CONFIGURATION_CHRONO_UNIT} 44 | duration = 1 hour 45 | duration = ${?BACKUP_TIME_CONFIGURATION_DURATION} 46 | } 47 | commit-timeout-buffer-window = 10 seconds 48 | commit-timeout-buffer-window = ${?BACKUP_COMMIT_TIMEOUT_BUFFER} 49 | } 50 | -------------------------------------------------------------------------------- /core-backup/src/main/scala/io/aiven/guardian/kafka/backup/Config.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup 2 | 3 | import io.aiven.guardian.kafka.backup.configs.Backup 4 | import pureconfig.ConfigSource 5 | import pureconfig.generic.auto._ 6 | 7 | import scala.annotation.nowarn 8 | 9 | trait Config { 10 | 11 | @nowarn("cat=lint-byname-implicit") 12 | implicit lazy val backupConfig: Backup = ConfigSource.default.at("backup").loadOrThrow[Backup] 13 | } 14 | 15 | object Config extends Config 16 | -------------------------------------------------------------------------------- /core-backup/src/main/scala/io/aiven/guardian/kafka/backup/KafkaConsumer.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup 2 | 3 | import com.typesafe.scalalogging.LazyLogging 4 | import io.aiven.guardian.kafka.backup.configs.Backup 5 | import io.aiven.guardian.kafka.backup.configs.ChronoUnitSlice 6 | import io.aiven.guardian.kafka.backup.configs.PeriodFromFirst 7 | import io.aiven.guardian.kafka.configs.KafkaCluster 8 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord 9 | import org.apache.kafka.clients.consumer.ConsumerConfig 10 | import org.apache.kafka.clients.consumer.ConsumerRecord 11 | import org.apache.kafka.common.serialization.ByteArrayDeserializer 12 | import org.apache.pekko 13 | 14 | import scala.collection.immutable 15 | import scala.concurrent.Future 16 | import scala.jdk.DurationConverters._ 17 | 18 | import java.util.Base64 19 | 20 | import pekko.Done 21 | import pekko.actor.ActorSystem 22 | import pekko.kafka.CommitDelivery 23 | import pekko.kafka.CommitterSettings 24 | import pekko.kafka.ConsumerMessage.CommittableOffset 25 | import pekko.kafka.ConsumerMessage.CommittableOffsetBatch 26 | import pekko.kafka.ConsumerSettings 27 | import pekko.kafka.Subscriptions 28 | import pekko.kafka.scaladsl.Committer 29 | import pekko.kafka.scaladsl.Consumer 30 | import pekko.stream.scaladsl.Sink 31 | import pekko.stream.scaladsl.SourceWithContext 32 | 33 | /** A Kafka Client that uses Pekko Connectors Kafka Consumer under the hood to create a stream of events from a Kafka 34 | * cluster. To configure the Pekko Connectors Kafka Consumer use the standard typesafe configuration i.e. 35 | * pekko.kafka.consumer (note that the `keySerializer` and `valueSerializer` are hardcoded so you cannot override 36 | * this). 37 | * @param configureConsumer 38 | * A way to configure the underlying Kafka consumer settings 39 | * @param configureCommitter 40 | * A way to configure the underlying kafka committer settings 41 | * @param system 42 | * A classic `ActorSystem` 43 | * @param kafkaClusterConfig 44 | * Additional cluster configuration that is needed 45 | */ 46 | class KafkaConsumer( 47 | configureConsumer: Option[ 48 | ConsumerSettings[Array[Byte], Array[Byte]] => ConsumerSettings[Array[Byte], Array[Byte]] 49 | ] = None, 50 | configureCommitter: Option[ 51 | CommitterSettings => CommitterSettings 52 | ] = None 53 | )(implicit system: ActorSystem, kafkaClusterConfig: KafkaCluster, backupConfig: Backup) 54 | extends KafkaConsumerInterface 55 | with LazyLogging { 56 | override type CursorContext = CommittableOffset 57 | override type Control = Consumer.Control 58 | override type MatCombineResult = Consumer.DrainingControl[Done] 59 | override type BatchedCursorContext = CommittableOffsetBatch 60 | 61 | import KafkaConsumer._ 62 | 63 | if (kafkaClusterConfig.topics.isEmpty) 64 | logger.warn("Kafka Cluster configuration has no topics set") 65 | 66 | private[kafka] val consumerSettings = { 67 | val base = ConsumerSettings(system, new ByteArrayDeserializer, new ByteArrayDeserializer) 68 | configureConsumer 69 | .fold(base)(block => block(base)) 70 | .withProperties( 71 | ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "earliest" 72 | ) 73 | .withCommitTimeout { 74 | val baseDuration = backupConfig.timeConfiguration match { 75 | case PeriodFromFirst(duration) => duration 76 | case ChronoUnitSlice(chronoUnit) => 77 | chronoUnit.getDuration.toScala 78 | } 79 | 80 | baseDuration + backupConfig.commitTimeoutBufferWindow 81 | } 82 | .withGroupId( 83 | backupConfig.kafkaGroupId 84 | ) 85 | } 86 | 87 | private[kafka] val subscriptions = Subscriptions.topics(kafkaClusterConfig.topics) 88 | 89 | /** @return 90 | * A `SourceWithContext` that returns a Kafka Stream which automatically handles committing of cursors 91 | */ 92 | override def getSource: SourceWithContext[ReducedConsumerRecord, CommittableOffset, Consumer.Control] = 93 | Consumer 94 | .sourceWithOffsetContext(consumerSettings, subscriptions) 95 | .map(consumerRecordToReducedConsumerRecord) 96 | 97 | private[kafka] val committerSettings: CommitterSettings = { 98 | val base = CommitterSettings(system) 99 | configureCommitter 100 | .fold(base)(block => block(base)) 101 | .withDelivery(CommitDelivery.waitForAck) 102 | } 103 | 104 | /** @return 105 | * A `Sink` that allows you to commit a `CursorContext` to Kafka to signify you have processed a message 106 | */ 107 | override def commitCursor: Sink[CommittableOffsetBatch, Future[Done]] = Committer.sink(committerSettings) 108 | 109 | /** @return 110 | * The result of this function gets directly passed into the `combine` parameter of 111 | * [[pekko.stream.scaladsl.Source.toMat]] 112 | */ 113 | override def matCombine: (Consumer.Control, Future[Done]) => Consumer.DrainingControl[Done] = 114 | Consumer.DrainingControl[Done].apply 115 | 116 | /** How to batch an immutable iterable of `CursorContext` into a `BatchedCursorContext` 117 | * @param cursors 118 | * The cursors that need to be batched 119 | * @return 120 | * A collection data structure that represents the batched cursors 121 | */ 122 | override def batchCursorContext(cursors: immutable.Iterable[CommittableOffset]): CommittableOffsetBatch = 123 | CommittableOffsetBatch(cursors.toSeq) 124 | } 125 | 126 | object KafkaConsumer { 127 | def consumerRecordToReducedConsumerRecord( 128 | consumerRecord: ConsumerRecord[Array[Byte], Array[Byte]] 129 | ): ReducedConsumerRecord = 130 | ReducedConsumerRecord( 131 | consumerRecord.topic(), 132 | consumerRecord.partition(), 133 | consumerRecord.offset(), 134 | Option(consumerRecord.key()).map(byteArray => Base64.getEncoder.encodeToString(byteArray)), 135 | Base64.getEncoder.encodeToString(consumerRecord.value()), 136 | consumerRecord.timestamp(), 137 | consumerRecord.timestampType() 138 | ) 139 | } 140 | -------------------------------------------------------------------------------- /core-backup/src/main/scala/io/aiven/guardian/kafka/backup/KafkaConsumerInterface.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup 2 | 3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord 4 | import org.apache.pekko 5 | 6 | import scala.collection.immutable 7 | import scala.concurrent.Future 8 | 9 | import pekko.Done 10 | import pekko.stream.scaladsl.Sink 11 | import pekko.stream.scaladsl.SourceWithContext 12 | 13 | trait KafkaConsumerInterface { 14 | 15 | /** The type of the context to pass around. In context of a Kafka consumer, this typically holds offset data to be 16 | * automatically committed 17 | */ 18 | type CursorContext 19 | 20 | /** The type that represents how to control the given stream, i.e. if you want to shut it down or add metrics 21 | */ 22 | type Control 23 | 24 | /** The type that represents the result of the `combine` parameter that is supplied to 25 | * [[pekko.stream.scaladsl.Source.toMat]] 26 | */ 27 | type MatCombineResult 28 | 29 | /** The type that represents the result of batching a `CursorContext` 30 | */ 31 | type BatchedCursorContext 32 | 33 | /** @return 34 | * A `SourceWithContext` that returns a Kafka Stream which automatically handles committing of cursors 35 | */ 36 | def getSource: SourceWithContext[ReducedConsumerRecord, CursorContext, Control] 37 | 38 | /** @return 39 | * A `Sink` that allows you to commit a `CursorContext` to Kafka to signify you have processed a message 40 | */ 41 | def commitCursor: Sink[BatchedCursorContext, Future[Done]] 42 | 43 | /** @return 44 | * The result of this function gets directly passed into the `combine` parameter of 45 | * [[pekko.stream.scaladsl.Source.toMat]] 46 | */ 47 | def matCombine: (Control, Future[Done]) => MatCombineResult 48 | 49 | /** How to batch an immutable iterable of `CursorContext` into a `BatchedCursorContext` 50 | * @param cursors 51 | * The cursors that need to be batched 52 | * @return 53 | * A collection data structure that represents the batched cursors 54 | */ 55 | def batchCursorContext(cursors: immutable.Iterable[CursorContext]): BatchedCursorContext 56 | } 57 | -------------------------------------------------------------------------------- /core-backup/src/main/scala/io/aiven/guardian/kafka/backup/configs/Backup.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup.configs 2 | 3 | import scala.concurrent.duration.FiniteDuration 4 | 5 | /** @param kafkaGroupId 6 | * The group-id that the Kafka consumer will use 7 | * @param timeConfiguration 8 | * Determines how the backed up objects/files are segregated depending on a time configuration 9 | * @param commitTimeoutBufferWindow 10 | * A buffer that is added ontop of the `timeConfiguration` when setting the Kafka Consumer commit timeout. 11 | * @param compression 12 | * Which compression to use for the backed up data 13 | */ 14 | final case class Backup(kafkaGroupId: String, 15 | timeConfiguration: TimeConfiguration, 16 | commitTimeoutBufferWindow: FiniteDuration, 17 | compression: Option[Compression] 18 | ) 19 | -------------------------------------------------------------------------------- /core-backup/src/main/scala/io/aiven/guardian/kafka/backup/configs/Compression.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup.configs 2 | 3 | import io.aiven.guardian.kafka.models.CompressionType 4 | 5 | final case class Compression(`type`: CompressionType, level: Option[Int]) 6 | -------------------------------------------------------------------------------- /core-backup/src/main/scala/io/aiven/guardian/kafka/backup/configs/TimeConfiguration.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup.configs 2 | 3 | import scala.concurrent.duration.FiniteDuration 4 | 5 | import java.time.temporal.ChronoUnit 6 | 7 | sealed trait TimeConfiguration 8 | 9 | /** Backs up objects/files depending on the timestamp fo the first received Kafka message. Suspending/resuming the 10 | * backup client will always create a new object/file 11 | * @param duration 12 | * The maximum span of time for each object/file, when this duration is exceeded a new file is created 13 | */ 14 | final case class PeriodFromFirst(duration: FiniteDuration) extends TimeConfiguration 15 | 16 | /** Backs up objects/files by collecting received Kafka messages into a single time slice based on a 17 | * [[java.time.temporal.ChronoUnit]]. When suspending/resuming the backup client, this option will reuse existing 18 | * objects/files if they fall into the currently configured `chronoUnit`. 19 | * @param chronoUnit 20 | * Timestamps for kafka messages that are contained within the configured [[java.time.temporal.ChronoUnit]] will be 21 | * placed into the same object/file. 22 | */ 23 | final case class ChronoUnitSlice(chronoUnit: ChronoUnit) extends TimeConfiguration 24 | -------------------------------------------------------------------------------- /core-backup/src/test/scala/io/aiven/guardian/kafka/backup/BackupClientControlWrapper.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup 2 | 3 | import org.apache.pekko 4 | 5 | import scala.concurrent.ExecutionContext 6 | import scala.concurrent.Future 7 | 8 | import pekko.Done 9 | import pekko.kafka.scaladsl.Consumer 10 | import pekko.stream.Materializer 11 | 12 | /** A wrapper that is designed to make it easier to cleanly shutdown resources in tests 13 | */ 14 | class BackupClientControlWrapper[T <: KafkaConsumer](backupClient: BackupClientInterface[T])(implicit 15 | materializer: Materializer, 16 | ec: ExecutionContext 17 | ) { 18 | 19 | private var control: Consumer.DrainingControl[Done] = _ 20 | 21 | def run(): Unit = 22 | control = backupClient.backup.run() 23 | 24 | @SuppressWarnings(Array("DisableSyntax.null")) 25 | def shutdown(): Future[Done] = 26 | if (control != null) 27 | control.drainAndShutdown() 28 | else 29 | Future.successful(Done) 30 | } 31 | -------------------------------------------------------------------------------- /core-backup/src/test/scala/io/aiven/guardian/kafka/backup/BackupClientInterfaceSpec.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup 2 | 3 | import io.aiven.guardian.kafka.backup.configs.Compression 4 | import io.aiven.guardian.pekko.AnyPropTestKit 5 | import org.apache.pekko.actor.ActorSystem 6 | 7 | class BackupClientInterfaceSpec 8 | extends AnyPropTestKit(ActorSystem("BackupClientInterfaceSpec")) 9 | with BackupClientInterfaceTest { 10 | override val compression: Option[Compression] = None 11 | } 12 | -------------------------------------------------------------------------------- /core-backup/src/test/scala/io/aiven/guardian/kafka/backup/CompressionSpec.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup 2 | 3 | import io.aiven.guardian.kafka.backup.configs.{Compression => CompressionModel} 4 | import io.aiven.guardian.kafka.models.Gzip 5 | import io.aiven.guardian.pekko.AnyPropTestKit 6 | import io.aiven.guardian.pekko.PekkoStreamTestKit 7 | import org.apache.pekko 8 | import org.scalatest.concurrent.ScalaFutures 9 | import org.scalatest.matchers.must.Matchers 10 | import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks 11 | 12 | import scala.concurrent.ExecutionContext 13 | import scala.concurrent.duration._ 14 | import scala.language.postfixOps 15 | 16 | import pekko.actor.ActorSystem 17 | import pekko.stream.scaladsl.Compression 18 | import pekko.stream.scaladsl.Source 19 | import pekko.stream.scaladsl.SourceWithContext 20 | import pekko.util.ByteString 21 | 22 | class CompressionSpec 23 | extends AnyPropTestKit(ActorSystem("CompressionSpec")) 24 | with Matchers 25 | with ScalaFutures 26 | with ScalaCheckPropertyChecks 27 | with PekkoStreamTestKit { 28 | 29 | implicit val ec: ExecutionContext = system.dispatcher 30 | 31 | // Due to pekko-streams taking a while to initialize for the first time we need a longer 32 | // increase in the timeout 33 | implicit override val patienceConfig: PatienceConfig = PatienceConfig(10 seconds, 15 millis) 34 | 35 | property("GZip compression works with a SourceWithContext/FlowWithContext") { _ => 36 | forAll { data: List[String] => 37 | val asByteString = data.map(ByteString.fromString) 38 | val zippedWithIndex = asByteString.zipWithIndex 39 | val sourceWithContext = SourceWithContext.fromTuples( 40 | Source(zippedWithIndex) 41 | ) 42 | 43 | val calculatedFuture = for { 44 | compressed <- sourceWithContext 45 | .unsafeDataVia(BackupClientInterface.compressionFlow(CompressionModel(Gzip, None))) 46 | .asSource 47 | .map { case (byteString, _) => byteString } 48 | .runFold(ByteString.empty)(_ ++ _) 49 | decompressed <- Source.single(compressed).via(Compression.gunzip()).runFold(ByteString.empty)(_ ++ _) 50 | } yield decompressed 51 | 52 | val decompressed = calculatedFuture.futureValue 53 | data.mkString mustEqual decompressed.utf8String 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /core-backup/src/test/scala/io/aiven/guardian/kafka/backup/ConfigSpec.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup 2 | 3 | import com.typesafe.config.ConfigFactory 4 | import com.typesafe.config.ConfigValueFactory 5 | import io.aiven.guardian.kafka.backup.configs.Backup 6 | import io.aiven.guardian.kafka.backup.configs.ChronoUnitSlice 7 | import io.aiven.guardian.kafka.backup.configs.PeriodFromFirst 8 | import io.aiven.guardian.kafka.backup.configs.TimeConfiguration 9 | import org.scalacheck.Arbitrary 10 | import org.scalacheck.Gen 11 | import org.scalatest.matchers.must.Matchers 12 | import org.scalatest.propspec.AnyPropSpec 13 | import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks 14 | import pureconfig.ConfigSource 15 | import pureconfig.generic.auto._ 16 | 17 | import scala.annotation.nowarn 18 | import scala.concurrent.duration.FiniteDuration 19 | 20 | import java.time.temporal.ChronoUnit 21 | 22 | class ConfigSpec extends AnyPropSpec with Matchers with ScalaCheckPropertyChecks { 23 | implicit val chronoUnitArb: Arbitrary[ChronoUnit] = Arbitrary( 24 | Gen.oneOf(ChronoUnit.values().toList) 25 | ) 26 | 27 | property("Valid TimeConfiguration chrono-unit-slice configs should parse correctly") { 28 | forAll { (chronoUnit: ChronoUnit) => 29 | val conf = 30 | s""" 31 | |time-configuration = { 32 | | type = chrono-unit-slice 33 | | chrono-unit = ${chronoUnit.name.toLowerCase} 34 | |} 35 | |""".stripMargin 36 | 37 | @nowarn("cat=lint-byname-implicit") 38 | val backup = ConfigSource.string(conf).at("time-configuration").loadOrThrow[TimeConfiguration] 39 | backup mustEqual ChronoUnitSlice(chronoUnit) 40 | } 41 | } 42 | 43 | property("Valid TimeConfiguration period-from-first configs should parse correctly") { 44 | forAll { (finiteDuration: FiniteDuration) => 45 | val conf = 46 | s""" 47 | |time-configuration = { 48 | | type = period-from-first 49 | | duration = ${finiteDuration.toString()} 50 | |} 51 | |""".stripMargin 52 | 53 | @nowarn("cat=lint-byname-implicit") 54 | val backup = ConfigSource.string(conf).at("time-configuration").loadOrThrow[TimeConfiguration] 55 | backup mustEqual PeriodFromFirst(finiteDuration) 56 | } 57 | } 58 | 59 | property("Default Backup configuration loads") { 60 | val config = ConfigFactory.load() 61 | 62 | // Inject mandatory values that have no default into the configuration 63 | val configWithMandatoryValues = 64 | config.withValue("backup.kafka-group-id", ConfigValueFactory.fromAnyRef(MockedBackupClientInterface.KafkaGroupId)) 65 | 66 | @nowarn("cat=lint-byname-implicit") 67 | def readConfiguration = ConfigSource.fromConfig(configWithMandatoryValues).at("backup").loadOrThrow[Backup] 68 | 69 | noException should be thrownBy readConfiguration 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /core-backup/src/test/scala/io/aiven/guardian/kafka/backup/GzipCompressionBackupClientInterfaceSpec.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup 2 | 3 | import io.aiven.guardian.kafka.backup.configs.Compression 4 | import io.aiven.guardian.kafka.models.Gzip 5 | import io.aiven.guardian.pekko.AnyPropTestKit 6 | import org.apache.pekko.actor.ActorSystem 7 | 8 | class GzipCompressionBackupClientInterfaceSpec 9 | extends AnyPropTestKit(ActorSystem("GzipCompressionBackupClientInterfaceSpec")) 10 | with BackupClientInterfaceTest { 11 | override val compression: Option[Compression] = Some(Compression(Gzip, None)) 12 | } 13 | -------------------------------------------------------------------------------- /core-backup/src/test/scala/io/aiven/guardian/kafka/backup/MockedKafkaConsumerInterface.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.backup 2 | 3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord 4 | import org.apache.pekko 5 | 6 | import scala.collection.immutable 7 | import scala.concurrent.Future 8 | import scala.concurrent.duration.FiniteDuration 9 | 10 | import java.time.Instant 11 | import java.time.temporal.ChronoUnit 12 | import java.util.concurrent.ConcurrentLinkedDeque 13 | import java.util.concurrent.atomic.AtomicReference 14 | 15 | import pekko.Done 16 | import pekko.NotUsed 17 | import pekko.stream.scaladsl._ 18 | 19 | /** A mocked `KafkaClientInterface` that returns a specific data as its source 20 | * 21 | * @param kafkaData 22 | * The data which the mock will output 23 | * @param commitStorage 24 | * A collection that keeps track of whenever a cursor is committed 25 | * @param stopAfterDuration 26 | * Dont produce any data from `kafkaData` if its offset is after `stopAfterOffset` based off of the first committed 27 | * [[io.aiven.guardian.kafka.models.ReducedConsumerRecord.timestamp]]. Handy to simulate the premature closing of a 28 | * KafkaClient before its completed producing all source elements (i.e. suspend/resume and restart scenarios). 29 | * @param handleOffsets 30 | * Tells the MockedKafkaClientInterface to handleOffsets rather than just ignoring them. This means that the mock 31 | * will only add commits to the `commitStorage` if its later than any currently processed offsets. Furthermore it 32 | * will not replay source data if it has already been committed. 33 | */ 34 | class MockedKafkaConsumerInterface(kafkaData: Source[ReducedConsumerRecord, NotUsed], 35 | commitStorage: ConcurrentLinkedDeque[Long] = new ConcurrentLinkedDeque[Long](), 36 | stopAfterDuration: Option[FiniteDuration] = None, 37 | handleOffsets: Boolean = false 38 | ) extends KafkaConsumerInterface { 39 | 40 | /** The type of the context to pass around. In context of a Kafka consumer, this typically holds offset data to be 41 | * automatically committed 42 | */ 43 | override type CursorContext = Long 44 | 45 | /** The type that represents how to control the given stream, i.e. if you want to shut it down or add metrics 46 | */ 47 | override type Control = Future[NotUsed] 48 | 49 | /** The type that represents the result of the `combine` parameter that is supplied to 50 | * [[pekko.stream.scaladsl.Source.toMat]] 51 | */ 52 | override type MatCombineResult = Future[NotUsed] 53 | 54 | /** The type that represents the result of batching a `CursorContext` 55 | */ 56 | override type BatchedCursorContext = Long 57 | 58 | private val firstReducedConsumerRecord: AtomicReference[ReducedConsumerRecord] = 59 | new AtomicReference[ReducedConsumerRecord]() 60 | 61 | /** @return 62 | * A `SourceWithContext` that returns a Kafka Stream which automatically handles committing of cursors 63 | */ 64 | override def getSource: SourceWithContext[ReducedConsumerRecord, Long, Future[NotUsed]] = { 65 | val source = kafkaData 66 | .prefixAndTail(1) 67 | .flatMapConcat { 68 | case (Seq(head), rest) => 69 | firstReducedConsumerRecord.set(head) 70 | Source.combine( 71 | Source.single(head), 72 | rest 73 | )(Concat(_)) 74 | case _ => Source.empty[ReducedConsumerRecord] 75 | } 76 | 77 | val finalSource = if (handleOffsets) { 78 | source.filter { reducedConsumerRecord => 79 | (commitStorage.isEmpty || { 80 | reducedConsumerRecord.offset > commitStorage.getLast 81 | }) && { 82 | (stopAfterDuration, Option(firstReducedConsumerRecord.get())) match { 83 | case (Some(afterDuration), Some(firstRecord)) => 84 | val difference = 85 | ChronoUnit.MILLIS.between(Instant.ofEpochMilli(firstRecord.timestamp), 86 | Instant.ofEpochMilli(reducedConsumerRecord.timestamp) 87 | ) 88 | afterDuration.toMillis > difference 89 | case _ => true 90 | } 91 | } 92 | } 93 | } else 94 | source 95 | 96 | SourceWithContext 97 | .fromTuples(finalSource.map { reducedConsumerRecord => 98 | (reducedConsumerRecord, reducedConsumerRecord.offset) 99 | }) 100 | .mapMaterializedValue(Future.successful) 101 | } 102 | 103 | /** @return 104 | * The result of this function gets directly passed into the `combine` parameter of 105 | * [[pekko.stream.scaladsl.Source.toMat]] 106 | */ 107 | override def matCombine: (Future[NotUsed], Future[Done]) => Future[NotUsed] = Keep.left 108 | 109 | /** @return 110 | * A `Sink` that allows you to commit a `CursorContext` to Kafka to signify you have processed a message 111 | */ 112 | override def commitCursor: Sink[Long, Future[Done]] = Sink.foreach { cursor => 113 | if (handleOffsets && !commitStorage.isEmpty) { 114 | if (commitStorage.getLast < cursor) 115 | commitStorage.add(cursor) 116 | } else 117 | commitStorage.add(cursor) 118 | } 119 | 120 | /** How to batch an immutable iterable of `CursorContext` into a `BatchedCursorContext` 121 | * @param cursors 122 | * The cursors that need to be batched 123 | * @return 124 | * A collection data structure that represents the batched cursors 125 | */ 126 | override def batchCursorContext(cursors: immutable.Iterable[Long]): Long = cursors.max 127 | 128 | } 129 | -------------------------------------------------------------------------------- /core-cli/src/main/resources/application.conf: -------------------------------------------------------------------------------- 1 | pekko { 2 | loggers = ["org.apache.pekko.event.slf4j.Slf4jLogger"] 3 | loglevel = "INFO" 4 | logging-filter = "org.apache.pekko.event.slf4j.Slf4jLoggingFilter" 5 | } 6 | -------------------------------------------------------------------------------- /core-cli/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | [%highlight(%-5level)] %d{HH:mm:ss.SSS} %logger{0} - %msg%n 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /core-cli/src/main/scala/io/aiven/guardian/cli/MainUtils.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.cli 2 | 3 | import ch.qos.logback.classic.joran.JoranConfigurator 4 | import ch.qos.logback.core.Context 5 | import org.slf4j.ILoggerFactory 6 | 7 | import scala.concurrent.ExecutionContext 8 | import scala.concurrent.Future 9 | import scala.concurrent.Promise 10 | import scala.concurrent.blocking 11 | import scala.io.StdIn 12 | import scala.util.Failure 13 | import scala.util.Success 14 | import scala.util.Using 15 | 16 | import java.nio.file.Files 17 | import java.nio.file.Path 18 | 19 | object MainUtils { 20 | 21 | /** Hook that lets the user specify the future that will signal the shutdown of the server whenever completed. Adapted 22 | * from 23 | * https://github.com/apache/incubator-pekko-http/blob/94d1b1c153cc39216dae4217fd0e927f04d53cd2/http/src/main/scala/org/apache/pekko/http/scaladsl/server/HttpApp.scala#L164-L176 24 | */ 25 | @SuppressWarnings( 26 | Array( 27 | "scalafix:DisableSyntax.null" 28 | ) 29 | ) 30 | def waitForShutdownSignal(promise: Promise[Unit] = Promise[Unit]())(implicit ec: ExecutionContext): Future[Unit] = { 31 | sys.addShutdownHook { 32 | promise.trySuccess(()) 33 | } 34 | Future { 35 | blocking { 36 | if (StdIn.readLine("Press RETURN to stop...\n") != null) 37 | promise.trySuccess(()) 38 | } 39 | } 40 | promise.future 41 | } 42 | 43 | /** Allows you to override the default logback.xml file with a custom one 44 | * @see 45 | * https://stackoverflow.com/a/21886322/1519631 46 | */ 47 | def setLogbackFile(path: Path, loggerContext: ILoggerFactory): Unit = 48 | Using(Files.newInputStream(path)) { inputStream => 49 | val configurator = new JoranConfigurator 50 | configurator.setContext(loggerContext.asInstanceOf[Context]) 51 | configurator.doConfigure(inputStream) 52 | } match { 53 | case Failure(exception) => throw exception 54 | case Success(value) => value 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /core-cli/src/main/scala/io/aiven/guardian/cli/PekkoSettings.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.cli 2 | 3 | import org.apache.pekko.actor.ActorSystem 4 | 5 | import scala.concurrent.ExecutionContext 6 | 7 | trait PekkoSettings { 8 | implicit val actorSystem: ActorSystem = ActorSystem() 9 | implicit val executionContext: ExecutionContext = ExecutionContext.global 10 | } 11 | -------------------------------------------------------------------------------- /core-cli/src/main/scala/io/aiven/guardian/cli/arguments/PropertiesOpt.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.cli.arguments 2 | 3 | import cats.data.ValidatedNel 4 | import cats.implicits._ 5 | import com.monovore.decline.Argument 6 | 7 | import scala.util.Failure 8 | import scala.util.Success 9 | import scala.util.Using 10 | 11 | import java.io.BufferedReader 12 | import java.io.FileNotFoundException 13 | import java.io.FileReader 14 | import java.util.Properties 15 | 16 | object PropertiesOpt { 17 | implicit val propertiesArgument: Argument[Properties] = new Argument[Properties] { 18 | override def read(string: String): ValidatedNel[String, Properties] = { 19 | val prop = new Properties() 20 | Using(new BufferedReader(new FileReader(string))) { reader => 21 | prop.load(reader) 22 | } match { 23 | case Failure(_: FileNotFoundException) => 24 | s"Properties file at path $string does not exist".invalidNel 25 | case Failure(_) => 26 | s"Unable to read file at path $string".invalidNel 27 | case Success(_) => prop.validNel 28 | } 29 | } 30 | 31 | override def defaultMetavar: String = "path" 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /core-cli/src/main/scala/io/aiven/guardian/cli/arguments/StorageOpt.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.cli.arguments 2 | 3 | import cats.data.ValidatedNel 4 | import cats.implicits._ 5 | import com.monovore.decline.Argument 6 | import enumeratum._ 7 | 8 | sealed trait StorageOpt extends EnumEntry with EnumEntry.Lowercase 9 | 10 | object StorageOpt extends Enum[StorageOpt] { 11 | case object S3 extends StorageOpt 12 | 13 | val values: IndexedSeq[StorageOpt] = findValues 14 | 15 | implicit val storageArgument: Argument[StorageOpt] = new Argument[StorageOpt] { 16 | override def read(string: String): ValidatedNel[String, StorageOpt] = 17 | StorageOpt.withNameOption(string).toValidNel("Invalid Storage Argument") 18 | 19 | override def defaultMetavar: String = "storage" 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /core-cli/src/main/scala/io/aiven/guardian/cli/options/Options.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.cli.options 2 | 3 | import cats.data.NonEmptyList 4 | import cats.implicits._ 5 | import com.monovore.decline.Opts 6 | import com.typesafe.config.ConfigException.Missing 7 | import com.typesafe.config.ConfigFactory 8 | import io.aiven.guardian.cli.arguments.StorageOpt 9 | import io.aiven.guardian.kafka.configs.KafkaCluster 10 | import pureconfig.error.ConfigReaderException 11 | 12 | import java.nio.file.Path 13 | 14 | trait Options { 15 | val storageOpt: Opts[StorageOpt] = 16 | Opts.option[StorageOpt]("storage", help = "Which type of storage to persist kafka topics") 17 | 18 | val dataBucketOpt: Opts[Option[String]] = 19 | Opts.option[String]("s3-data-bucket", help = "S3 Bucket for storage of main backup data").orNone 20 | 21 | val topicsOpt: Opts[Option[NonEmptyList[String]]] = 22 | Opts.options[String]("kafka-topics", help = "Kafka topics to operate on").orNone 23 | 24 | val bootstrapServersOpt: Opts[Option[NonEmptyList[String]]] = 25 | Opts.options[String]("kafka-bootstrap-servers", help = "Kafka bootstrap servers").orNone 26 | 27 | val logbackFileOpt: Opts[Option[Path]] = 28 | Opts.option[Path]("logback-file", help = "Specify logback.xml configuration to override default").orNone 29 | 30 | def optionalPureConfigValue[T](value: () => T): Option[T] = 31 | try Some(value()) 32 | catch { 33 | case _: ConfigReaderException[_] => 34 | None 35 | } 36 | 37 | @SuppressWarnings( 38 | Array( 39 | "scalafix:DisableSyntax.null" 40 | ) 41 | ) 42 | def checkConfigKeyIsDefined(path: String): Boolean = 43 | try ConfigFactory.load().getAnyRef(path) != null 44 | catch { 45 | case _: Missing => false 46 | } 47 | 48 | val kafkaClusterOpt: Opts[KafkaCluster] = topicsOpt.mapValidated { topics => 49 | import io.aiven.guardian.kafka.{Config => KafkaConfig} 50 | topics match { 51 | case Some(value) => 52 | KafkaCluster(value.toList.toSet).validNel 53 | case None if KafkaConfig.kafkaClusterConfig.topics.nonEmpty => KafkaConfig.kafkaClusterConfig.validNel 54 | case _ => 55 | "kafka-topics is a mandatory value that needs to be configured".invalidNel 56 | } 57 | } 58 | 59 | } 60 | 61 | object Options extends Options 62 | -------------------------------------------------------------------------------- /core-compaction/src/main/scala/io/aiven/guardian/kafka/compaction/DatabaseInterface.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.compaction 2 | 3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord 4 | import org.apache.pekko 5 | 6 | import scala.concurrent.Future 7 | 8 | import pekko.NotUsed 9 | import pekko.stream.javadsl.Flow 10 | import pekko.stream.scaladsl.Source 11 | import pekko.util.ByteString 12 | 13 | trait DatabaseInterface { 14 | 15 | /** Given a source of storage where Kafka messages are contained, stream it into a database. 16 | * @param kafkaStorageSource 17 | * @param encodeKafkaRowToByteString 18 | * @return 19 | * Number of rows updated 20 | */ 21 | def streamInsert(kafkaStorageSource: Source[ReducedConsumerRecord, NotUsed], 22 | encodeKafkaRowToByteString: Flow[ReducedConsumerRecord, ByteString, NotUsed] 23 | ): Future[Long] 24 | } 25 | -------------------------------------------------------------------------------- /core-compaction/src/main/scala/io/aiven/guardian/kafka/compaction/PostgresJDBCDatabase.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.compaction 2 | 3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord 4 | import org.apache.pekko 5 | import org.postgresql.copy.CopyManager 6 | import org.postgresql.core.BaseConnection 7 | 8 | import scala.concurrent.ExecutionContext 9 | import scala.concurrent.Future 10 | import scala.concurrent.blocking 11 | 12 | import java.sql.Connection 13 | 14 | import pekko.NotUsed 15 | import pekko.stream.ActorAttributes 16 | import pekko.stream.Materializer 17 | import pekko.stream.javadsl.Flow 18 | import pekko.stream.scaladsl.Source 19 | import pekko.stream.scaladsl.StreamConverters 20 | import pekko.util.ByteString 21 | 22 | /** A Postgres Database backed by JDBC which uses the Postgres COPY command to insert data into the database. Note that 23 | * since this uses JDBC and CopyManager, its implementation is blocking under the hood. 24 | * @param scheduler 25 | * @param materializer 26 | * @param conn 27 | */ 28 | class PostgresJDBCDatabase()(implicit executionContext: ExecutionContext, materializer: Materializer, conn: Connection) 29 | extends DatabaseInterface { 30 | 31 | /** Inserts data into a Postgres Database using the COPY method (see 32 | * https://www.postgresql.org/docs/9.4/sql-copy.html). This means the data insertion is buffered and also extremely 33 | * fast since it bypasses internal parts of the Postgres engine which are not necessary. 34 | * 35 | * Since it uses JDBC plus `java.io.InputStream` under the hood, the operation is inherently blocking even though it 36 | * returns a `scala.concurrent.Future`. Due to this we have used blocking IO dispatchers to avoid problems that are 37 | * typical of blocking IO 38 | * 39 | * @return 40 | * Number of rows updated 41 | */ 42 | override def streamInsert(kafkaStorageSource: Source[ReducedConsumerRecord, NotUsed], 43 | encodeKafkaRowToByteString: Flow[ReducedConsumerRecord, ByteString, NotUsed] 44 | ): Future[Long] = { 45 | // TODO implement SQL query 46 | val sql = """""" 47 | 48 | // Since this is blocking IO we use a custom dispatcher dealt to handle with this 49 | val sink = StreamConverters 50 | .asInputStream() 51 | .withAttributes(ActorAttributes.dispatcher(ActorAttributes.IODispatcher.dispatcher)) 52 | 53 | val postgresSource = kafkaStorageSource.via(encodeKafkaRowToByteString) 54 | 55 | blocking(Future { 56 | postgresSource.runWith( 57 | sink.mapMaterializedValue(inputStream => 58 | new CopyManager(conn.asInstanceOf[BaseConnection]).copyIn( 59 | sql, 60 | inputStream 61 | ) 62 | ) 63 | ) 64 | }) 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /core-compaction/src/main/scala/io/aiven/guardian/kafka/compaction/StorageInterface.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.compaction 2 | 3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord 4 | import org.apache.pekko 5 | 6 | import pekko.NotUsed 7 | import pekko.stream.scaladsl.Source 8 | 9 | trait StorageInterface { 10 | 11 | /** Retrieve Kafka data from a given storage source 12 | * @return 13 | */ 14 | def retrieveKafkaData: Source[ReducedConsumerRecord, NotUsed] 15 | 16 | /** Checks whether the storage exists and is accessible 17 | */ 18 | def checkStorageAccessible: Source[Boolean, NotUsed] 19 | } 20 | -------------------------------------------------------------------------------- /core-gcs/src/main/resources/reference.conf: -------------------------------------------------------------------------------- 1 | gcs-config = { 2 | data-bucket = ${?GCS_CONFIG_DATA_BUCKET} 3 | compaction-bucket = ${?GCS_CONFIG_COMPACTION_BUCKET} 4 | } 5 | -------------------------------------------------------------------------------- /core-gcs/src/main/scala/io/aiven/guardian/kafka/gcs/Config.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.gcs 2 | 3 | import io.aiven.guardian.kafka.gcs.configs.GCS 4 | import pureconfig._ 5 | import pureconfig.generic.auto._ 6 | 7 | import scala.annotation.nowarn 8 | 9 | trait Config { 10 | @nowarn("cat=lint-byname-implicit") 11 | implicit lazy val gcsConfig: GCS = 12 | ConfigSource.default.at("gcs-config").loadOrThrow[GCS] 13 | } 14 | 15 | object Config extends Config 16 | -------------------------------------------------------------------------------- /core-gcs/src/main/scala/io/aiven/guardian/kafka/gcs/configs/GCS.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.gcs.configs 2 | 3 | /** GCS specific configuration used when storing Kafka ConsumerRecords to a GCS bucket 4 | * @param dataBucket 5 | * The bucket where a Kafka Consumer directly streams data into as storage 6 | * @param compactionBucket 7 | * The bucket where compaction results are stored 8 | */ 9 | final case class GCS(dataBucket: String, compactionBucket: String) 10 | -------------------------------------------------------------------------------- /core-gcs/src/main/scala/io/aiven/guardian/kafka/gcs/errors/GCSErrors.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.gcs.errors 2 | 3 | import io.aiven.guardian.kafka.Errors 4 | 5 | sealed abstract class GCSErrors extends Errors 6 | 7 | object GCSErrors { 8 | final case class ExpectedObjectToExist(bucketName: String, maybePrefix: Option[String]) extends GCSErrors { 9 | override def getMessage: String = 10 | ??? 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /core-restore/src/main/resources/reference.conf: -------------------------------------------------------------------------------- 1 | pekko.kafka.producer { 2 | discovery-method = ${?PEKKO_KAFKA_PRODUCER_DISCOVERY_METHOD} 3 | service-name = ${?PEKKO_KAFKA_PRODUCER_SERVICE_NAME} 4 | resolve-timeout = ${?PEKKO_KAFKA_PRODUCER_RESOLVE_TIMEOUT} 5 | parallelism = ${?PEKKO_KAFKA_PRODUCER_PARALLELISM} 6 | close-timeout = ${?PEKKO_KAFKA_PRODUCER_CLOSE_TIMEOUT} 7 | close-on-producer-stop = ${?PEKKO_KAFKA_PRODUCER_CLOSE_ON_PRODUCER_STOP} 8 | use-dispatcher = ${?PEKKO_KAFKA_PRODUCER_USE_DISPATCHER} 9 | eos-commit-interval = ${?PEKKO_KAFKA_PRODUCER_EOS_COMMIT_INTERVAL} 10 | } 11 | 12 | restore { 13 | from-when = ${?RESTORE_FROM_WHEN} 14 | override-topics = ${?RESTORE_OVERRIDE_TOPICS} 15 | } 16 | -------------------------------------------------------------------------------- /core-restore/src/main/scala/io/aiven/guardian/kafka/restore/Config.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.restore 2 | 3 | import io.aiven.guardian.kafka.restore.configs.Restore 4 | import pureconfig._ 5 | import pureconfig.configurable._ 6 | import pureconfig.generic.auto._ 7 | 8 | import scala.annotation.nowarn 9 | 10 | import java.time.OffsetDateTime 11 | import java.time.format.DateTimeFormatter 12 | 13 | trait Config { 14 | implicit val localDateConvert: ConfigConvert[OffsetDateTime] = offsetDateTimeConfigConvert( 15 | DateTimeFormatter.ISO_OFFSET_DATE_TIME 16 | ) 17 | 18 | @nowarn("cat=lint-byname-implicit") 19 | implicit lazy val restoreConfig: Restore = ConfigSource.default.at("restore").loadOrThrow[Restore] 20 | } 21 | 22 | object Config extends Config 23 | -------------------------------------------------------------------------------- /core-restore/src/main/scala/io/aiven/guardian/kafka/restore/KafkaProducer.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.restore 2 | 3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord 4 | import io.aiven.guardian.kafka.restore.configs.Restore 5 | import org.apache.kafka.clients.producer.ProducerRecord 6 | import org.apache.kafka.common.serialization.ByteArraySerializer 7 | import org.apache.pekko 8 | 9 | import scala.concurrent.Future 10 | 11 | import java.util.Base64 12 | 13 | import pekko.Done 14 | import pekko.actor.ActorSystem 15 | import pekko.kafka.ProducerSettings 16 | import pekko.kafka.scaladsl.Producer 17 | import pekko.stream.scaladsl.Sink 18 | 19 | class KafkaProducer( 20 | configureProducer: Option[ 21 | ProducerSettings[Array[Byte], Array[Byte]] => ProducerSettings[Array[Byte], Array[Byte]] 22 | ] = None 23 | )(implicit system: ActorSystem, restoreConfig: Restore) 24 | extends KafkaProducerInterface { 25 | 26 | private[kafka] val producerSettings = { 27 | val base = ProducerSettings(system, new ByteArraySerializer, new ByteArraySerializer) 28 | configureProducer 29 | .fold(base)(block => block(base)) 30 | } 31 | 32 | override def getSink: Sink[ReducedConsumerRecord, Future[Done]] = 33 | Producer.plainSink(producerSettings).contramap[ReducedConsumerRecord] { reducedConsumerRecord => 34 | val topic = restoreConfig.overrideTopics match { 35 | case Some(map) => 36 | map.getOrElse(reducedConsumerRecord.topic, reducedConsumerRecord.topic) 37 | case None => reducedConsumerRecord.topic 38 | } 39 | val valueAsByteArray = Base64.getDecoder.decode(reducedConsumerRecord.value) 40 | reducedConsumerRecord.key match { 41 | case Some(key) => 42 | new ProducerRecord[Array[Byte], Array[Byte]]( 43 | topic, 44 | Base64.getDecoder.decode(key), 45 | valueAsByteArray 46 | ) 47 | case None => 48 | new ProducerRecord[Array[Byte], Array[Byte]]( 49 | topic, 50 | valueAsByteArray 51 | ) 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /core-restore/src/main/scala/io/aiven/guardian/kafka/restore/KafkaProducerInterface.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.restore 2 | 3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord 4 | import org.apache.pekko 5 | 6 | import scala.concurrent.Future 7 | 8 | import pekko.Done 9 | import pekko.stream.scaladsl.Sink 10 | 11 | trait KafkaProducerInterface { 12 | def getSink: Sink[ReducedConsumerRecord, Future[Done]] 13 | } 14 | -------------------------------------------------------------------------------- /core-restore/src/main/scala/io/aiven/guardian/kafka/restore/RestoreClientInterface.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.restore 2 | 3 | import com.typesafe.scalalogging.LazyLogging 4 | import io.aiven.guardian.kafka.ExtensionsMethods._ 5 | import io.aiven.guardian.kafka.Utils 6 | import io.aiven.guardian.kafka.codecs.Circe._ 7 | import io.aiven.guardian.kafka.configs.KafkaCluster 8 | import io.aiven.guardian.kafka.models.BackupObjectMetadata 9 | import io.aiven.guardian.kafka.models.Gzip 10 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord 11 | import io.aiven.guardian.kafka.restore.configs.Restore 12 | import org.apache.pekko 13 | import org.mdedetrich.pekko.stream.support.CirceStreamSupport 14 | import org.typelevel.jawn.AsyncParser 15 | 16 | import scala.concurrent.ExecutionContext 17 | import scala.concurrent.Future 18 | 19 | import java.time.OffsetDateTime 20 | 21 | import pekko.Done 22 | import pekko.NotUsed 23 | import pekko.actor.ActorSystem 24 | import pekko.stream.Attributes 25 | import pekko.stream.KillSwitches 26 | import pekko.stream.UniqueKillSwitch 27 | import pekko.stream.scaladsl.Compression 28 | import pekko.stream.scaladsl.Concat 29 | import pekko.stream.scaladsl.Flow 30 | import pekko.stream.scaladsl.Keep 31 | import pekko.stream.scaladsl.RunnableGraph 32 | import pekko.stream.scaladsl.Source 33 | import pekko.util.ByteString 34 | 35 | trait RestoreClientInterface[T <: KafkaProducerInterface] extends LazyLogging { 36 | implicit val kafkaProducerInterface: T 37 | implicit val restoreConfig: Restore 38 | implicit val kafkaClusterConfig: KafkaCluster 39 | implicit val system: ActorSystem 40 | val maybeAttributes: Option[Attributes] = None 41 | 42 | def retrieveBackupKeys: Future[List[String]] 43 | 44 | def downloadFlow: Flow[String, ByteString, NotUsed] 45 | 46 | private[kafka] def keysWithOffsetDateTime(keys: List[String]): List[(String, OffsetDateTime)] = keys.map { key => 47 | (key, Utils.keyToOffsetDateTime(key)) 48 | } 49 | 50 | private[kafka] def finalKeys: Future[List[String]] = { 51 | implicit val ec: ExecutionContext = system.dispatcher 52 | 53 | for { 54 | backupKeys <- retrieveBackupKeys 55 | withTime = keysWithOffsetDateTime(backupKeys) 56 | sorted = withTime.sortBy { case (_, time) => 57 | time 58 | } 59 | 60 | latest = restoreConfig.fromWhen match { 61 | case Some(pickedDate) => 62 | val index = sorted.indexWhere { case (_, time) => 63 | time >= pickedDate 64 | } 65 | index match { 66 | case 0 => sorted 67 | case -1 => 68 | sorted.lastOption match { 69 | case Some((key, value)) => 70 | // Its still technically possible that the last key can contain a picked value. 71 | List((key, value)) 72 | case _ => List.empty 73 | } 74 | case index => 75 | val (_, rest) = sorted.splitAt(index - 1) 76 | rest 77 | } 78 | case None => sorted 79 | } 80 | } yield latest.map { case (key, _) => key } 81 | } 82 | 83 | private[kafka] def checkTopicInConfig(reducedConsumerRecord: ReducedConsumerRecord): Boolean = 84 | kafkaClusterConfig.topics.contains(reducedConsumerRecord.topic) 85 | 86 | private[kafka] def checkTopicGreaterThanTime(reducedConsumerRecord: ReducedConsumerRecord): Boolean = 87 | restoreConfig.fromWhen match { 88 | case Some(pickedDate) => 89 | reducedConsumerRecord.toOffsetDateTime >= pickedDate 90 | case None => true 91 | } 92 | 93 | private[kafka] def restoreKey(key: String): Source[ByteString, NotUsed] = { 94 | val source = Source 95 | .single(key) 96 | .via(downloadFlow) 97 | 98 | BackupObjectMetadata.fromKey(key).compression match { 99 | case Some(Gzip) => source.via(Compression.gunzip()) 100 | case None => source 101 | } 102 | } 103 | 104 | def restore: RunnableGraph[(UniqueKillSwitch, Future[Done])] = { 105 | val sourceWithCompression = Source.future(finalKeys).flatMapConcat { keys => 106 | keys.map(key => restoreKey(key)) match { 107 | case first :: Nil => first 108 | case first :: second :: Nil => Source.combine(first, second)(Concat(_)) 109 | case first :: second :: rest => Source.combine(first, second, rest: _*)(Concat(_)) 110 | case Nil => Source.empty[ByteString] 111 | } 112 | } 113 | 114 | val asReducedConsumerRecord = sourceWithCompression 115 | .via(CirceStreamSupport.decode[Option[ReducedConsumerRecord]](AsyncParser.UnwrapArray, multiValue = true)) 116 | .collect { 117 | case Some(reducedConsumerRecord) 118 | if checkTopicInConfig(reducedConsumerRecord) && checkTopicGreaterThanTime(reducedConsumerRecord) => 119 | reducedConsumerRecord 120 | } 121 | 122 | asReducedConsumerRecord.viaMat(KillSwitches.single)(Keep.right).toMat(kafkaProducerInterface.getSink)(Keep.both) 123 | } 124 | 125 | } 126 | -------------------------------------------------------------------------------- /core-restore/src/main/scala/io/aiven/guardian/kafka/restore/configs/Restore.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.restore.configs 2 | 3 | import java.time.OffsetDateTime 4 | 5 | /** @param fromWhen 6 | * An optional datetime which only restores topics are are after or equal to that date 7 | * @param overrideTopics 8 | * An optional map that allows you to translate topics that are backed up to a new topic name in the destination 9 | * Kafka cluster. The key is the backed up topic name and the value is the new topic name. If this map doesn't 10 | * contain a key for a topic then its backed up as normal. 11 | */ 12 | final case class Restore(fromWhen: Option[OffsetDateTime], overrideTopics: Option[Map[String, String]]) 13 | 14 | object Restore { 15 | def empty: Restore = Restore(None, None) 16 | } 17 | -------------------------------------------------------------------------------- /core-restore/src/test/scala/io/aiven/guardian/kafka/restore/ConfigSpec.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.restore 2 | 3 | import io.aiven.guardian.kafka.Generators.kafkaTopic 4 | import io.aiven.guardian.kafka.restore.configs.Restore 5 | import org.scalacheck.Gen 6 | import org.scalacheck.ops.time.ImplicitJavaTimeGenerators._ 7 | import org.scalatest.matchers.must.Matchers 8 | import org.scalatest.propspec.AnyPropSpec 9 | import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks 10 | import pureconfig._ 11 | import pureconfig.configurable._ 12 | import pureconfig.generic.auto._ 13 | 14 | import scala.annotation.nowarn 15 | 16 | import java.time.OffsetDateTime 17 | import java.time.format.DateTimeFormatter 18 | 19 | class ConfigSpec extends AnyPropSpec with Matchers with ScalaCheckPropertyChecks { 20 | implicit val localDateConvert: ConfigConvert[OffsetDateTime] = offsetDateTimeConfigConvert( 21 | DateTimeFormatter.ISO_OFFSET_DATE_TIME 22 | ) 23 | 24 | property("Valid Restore configs should parse correctly") { 25 | val overrideMapGen = for { 26 | size <- Gen.choose(1, 10) 27 | keys <- Gen.containerOfN[Set, String](size, kafkaTopic) 28 | values <- Gen.containerOfN[Set, String](size, kafkaTopic) 29 | } yield keys.zip(values).toMap 30 | 31 | val offsetDateTimeGen = arbZonedDateTime.arbitrary.map(_.toOffsetDateTime) 32 | 33 | forAll(offsetDateTimeGen, overrideMapGen) { (fromWhen: OffsetDateTime, overrideTopics: Map[String, String]) => 34 | val topics = overrideTopics 35 | .map { case (key, value) => 36 | val k = "\"" + key + "\"" 37 | val v = "\"" + value + "\"" 38 | s"$k=$v" 39 | } 40 | .mkString("", "\n ", "") 41 | 42 | val conf = s""" 43 | |restore { 44 | | from-when = "${fromWhen.toString}" 45 | | override-topics = { 46 | | $topics 47 | | } 48 | |} 49 | |""".stripMargin 50 | 51 | @nowarn("cat=lint-byname-implicit") 52 | val restore = ConfigSource.string(conf).at("restore").loadOrThrow[Restore] 53 | restore mustEqual Restore(Some(fromWhen), Some(overrideTopics)) 54 | } 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /core-restore/src/test/scala/io/aiven/guardian/kafka/restore/GzipCompressionRestoreClientInterfaceSpec.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.restore 2 | 3 | import io.aiven.guardian.kafka.backup.configs.Compression 4 | import io.aiven.guardian.kafka.models.Gzip 5 | import io.aiven.guardian.pekko.AnyPropTestKit 6 | import org.apache.pekko.actor.ActorSystem 7 | 8 | class GzipCompressionRestoreClientInterfaceSpec 9 | extends AnyPropTestKit(ActorSystem("GzipCompressionRestoreClientInterfaceSpec")) 10 | with RestoreClientInterfaceTest { 11 | override val compression: Option[Compression] = Some(Compression(Gzip, None)) 12 | } 13 | -------------------------------------------------------------------------------- /core-restore/src/test/scala/io/aiven/guardian/kafka/restore/MockedKafkaProducerInterface.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.restore 2 | 3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord 4 | import org.apache.pekko 5 | 6 | import scala.concurrent.Future 7 | 8 | import java.util.concurrent.ConcurrentLinkedQueue 9 | 10 | import pekko.Done 11 | import pekko.stream.scaladsl.Sink 12 | 13 | class MockedKafkaProducerInterface() extends KafkaProducerInterface { 14 | val producedData: ConcurrentLinkedQueue[ReducedConsumerRecord] = new ConcurrentLinkedQueue[ReducedConsumerRecord]() 15 | 16 | override def getSink: Sink[ReducedConsumerRecord, Future[Done]] = 17 | Sink.foreach[ReducedConsumerRecord] { reducedConsumerRecord => 18 | producedData.add(reducedConsumerRecord) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /core-restore/src/test/scala/io/aiven/guardian/kafka/restore/MockedRestoreClientInterface.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.restore 2 | 3 | import io.aiven.guardian.kafka.configs.KafkaCluster 4 | import io.aiven.guardian.kafka.restore.configs.Restore 5 | import org.apache.pekko 6 | 7 | import scala.concurrent.Future 8 | 9 | import pekko.NotUsed 10 | import pekko.actor.ActorSystem 11 | import pekko.stream.scaladsl.Flow 12 | import pekko.util.ByteString 13 | 14 | class MockedRestoreClientInterface(backupData: Map[String, ByteString])(implicit 15 | override val kafkaProducerInterface: MockedKafkaProducerInterface, 16 | override val restoreConfig: Restore, 17 | override val kafkaClusterConfig: KafkaCluster, 18 | override val system: ActorSystem 19 | ) extends RestoreClientInterface[MockedKafkaProducerInterface] { 20 | 21 | override def retrieveBackupKeys: Future[List[String]] = Future.successful( 22 | backupData.keys.toList 23 | ) 24 | 25 | override def downloadFlow: Flow[String, ByteString, NotUsed] = Flow.fromFunction { key: String => 26 | backupData(key) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /core-restore/src/test/scala/io/aiven/guardian/kafka/restore/RestoreClientInterfaceSpec.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.restore 2 | 3 | import io.aiven.guardian.kafka.backup.configs.Compression 4 | import io.aiven.guardian.pekko.AnyPropTestKit 5 | import org.apache.pekko.actor.ActorSystem 6 | 7 | class RestoreClientInterfaceSpec 8 | extends AnyPropTestKit(ActorSystem("RestoreClientInterfaceSpec")) 9 | with RestoreClientInterfaceTest { 10 | override val compression: Option[Compression] = None 11 | } 12 | -------------------------------------------------------------------------------- /core-s3/src/main/resources/reference.conf: -------------------------------------------------------------------------------- 1 | pekko.connectors.s3 { 2 | buffer = ${?PEKKO_CONNECTORS_S3_BUFFER} 3 | disk-buffer-path = ${?PEKKO_CONNECTORS_S3_DISK_BUFFER_PATH} 4 | 5 | forward-proxy { 6 | scheme = ${?PEKKO_CONNECTORS_S3_FORWARD_PROXY_SCHEME} 7 | host = ${?PEKKO_CONNECTORS_S3_FORWARD_PROXY_HOST} 8 | port = ${?PEKKO_CONNECTORS_S3_FORWARD_PROXY_PORT} 9 | 10 | credentials { 11 | username = ${?PEKKO_CONNECTORS_S3_FORWARD_PROXY_CREDENTIALS_USERNAME} 12 | password = ${?PEKKO_CONNECTORS_S3_FORWARD_PROXY_CREDENTIALS_PASSWORD} 13 | } 14 | } 15 | 16 | aws { 17 | credentials { 18 | access-key-id = ${?PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_ACCESS_KEY_ID} 19 | secret-access-key = ${?PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_SECRET_ACCESS_KEY} 20 | token = ${?PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_TOKEN} 21 | provider = ${?PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_PROVIDER} 22 | } 23 | 24 | region { 25 | default-region = ${?PEKKO_CONNECTORS_S3_REGION_DEFAULT_REGION} 26 | provider = ${?PEKKO_CONNECTORS_S3_REGION_PROVIDER} 27 | } 28 | } 29 | 30 | path-style-access = ${?PEKKO_CONNECTORS_S3_PATH_STYLE_ACCESS} 31 | access-style = ${?PEKKO_CONNECTORS_S3_ACCESS_STYLE} 32 | endpoint-url = ${?PEKKO_CONNECTORS_S3_ENDPOINT_URL} 33 | list-bucket-api-version = ${?PEKKO_CONNECTORS_S3_LIST_BUCKET_API_VERSION} 34 | validate-object-key = ${?PEKKO_CONNECTORS_S3_VALIDATE_OBJECT_KEY} 35 | 36 | retry-settings { 37 | max-retries = ${?PEKKO_CONNECTORS_S3_RETRY_SETTINGS_MAX_RETRIES} 38 | min-backoff = ${?PEKKO_CONNECTORS_S3_RETRY_SETTINGS_MIN_BACKOFF} 39 | max-backoff = ${?PEKKO_CONNECTORS_S3_RETRY_SETTINGS_MAX_BACKOFF} 40 | random-factor = ${?PEKKO_CONNECTORS_S3_RETRY_SETTINGS_RANDOM_FACTOR} 41 | } 42 | } 43 | 44 | s3-headers = { 45 | canned-acl = ${?S3_HEADERS_CANNED_ACL} 46 | storage-class = ${?S3_HEADERS_STORAGE_CLASS} 47 | server-side-encryption = ${?S3_HEADERS_SERVER_SIDE_ENCRYPTION} 48 | } 49 | 50 | s3-config = { 51 | data-bucket = ${?S3_CONFIG_DATA_BUCKET} 52 | data-bucket-prefix = ${?S3_CONFIG_DATA_BUCKET_PREFIX} 53 | error-restart-settings = { 54 | min-backoff = 5 millis 55 | min-backoff = ${?S3_CONFIG_ERROR_RESTART_SETTINGS_MIN_BACKOFF} 56 | max-backoff = 10 seconds 57 | max-backoff = ${?S3_CONFIG_ERROR_RESTART_SETTINGS_MAX_BACKOFF} 58 | random-factor = 0.2 59 | random-factor = ${?S3_CONFIG_ERROR_RESTART_SETTINGS_RANDOM_FACTOR} 60 | max-restarts = ${?S3_CONFIG_ERROR_RESTART_SETTINGS_MAX_RESTARTS} 61 | max-restarts-within = ${?S3_CONFIG_ERROR_RESTART_SETTINGS_MAX_RESTARTS_WITHIN} 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /core-s3/src/main/scala/io/aiven/guardian/kafka/s3/Config.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka 2 | package s3 3 | 4 | import io.aiven.guardian.kafka.PureConfigUtils._ 5 | import io.aiven.guardian.kafka.s3.configs.S3 6 | import org.apache.pekko 7 | import pureconfig.ConfigCursor 8 | import pureconfig.ConfigReader 9 | import pureconfig.ConfigReader._ 10 | import pureconfig.ConfigSource 11 | import pureconfig.error.UserValidationFailed 12 | 13 | import scala.annotation.nowarn 14 | import scala.concurrent.duration.FiniteDuration 15 | 16 | import pekko.stream.RestartSettings 17 | import pekko.stream.connectors.s3.MetaHeaders 18 | import pekko.stream.connectors.s3.S3Headers 19 | import pekko.stream.connectors.s3.headers.CannedAcl 20 | import pekko.stream.connectors.s3.headers.ServerSideEncryption 21 | import pekko.stream.connectors.s3.headers.StorageClass 22 | 23 | trait Config { 24 | 25 | // TODO Unfortunately the following boilerplate is here because the S3 Pekko Connectors providers no public constructors 26 | // for S3Headers apart from the limited S3Headers(). This means we can't use pureconfig.generic.auto._ and hence 27 | // we have to write this out manually 28 | 29 | implicit val cannedACLConfigReader: ConfigReader[CannedAcl] = (cur: ConfigCursor) => 30 | cur.asString.flatMap { 31 | case CannedAcl.AuthenticatedRead.value => Right(CannedAcl.AuthenticatedRead) 32 | case CannedAcl.AwsExecRead.value => Right(CannedAcl.AwsExecRead) 33 | case CannedAcl.BucketOwnerFullControl.value => Right(CannedAcl.BucketOwnerFullControl) 34 | case CannedAcl.BucketOwnerRead.value => Right(CannedAcl.BucketOwnerRead) 35 | case CannedAcl.Private.value => Right(CannedAcl.Private) 36 | case CannedAcl.PublicRead.value => Right(CannedAcl.PublicRead) 37 | case CannedAcl.PublicReadWrite.value => Right(CannedAcl.PublicReadWrite) 38 | case rest => Left(failure(cur, rest, "CannedAcl")) 39 | } 40 | 41 | implicit val metaHeadersConfigReader: ConfigReader[MetaHeaders] = mapReader[String].map(MetaHeaders.apply) 42 | 43 | implicit val storageClassConfigReader: ConfigReader[StorageClass] = (cur: ConfigCursor) => 44 | cur.asString.flatMap { 45 | case StorageClass.Standard.storageClass => Right(StorageClass.Standard) 46 | case StorageClass.InfrequentAccess.storageClass => Right(StorageClass.InfrequentAccess) 47 | case StorageClass.Glacier.storageClass => Right(StorageClass.Glacier) 48 | case StorageClass.ReducedRedundancy.storageClass => Right(StorageClass.ReducedRedundancy) 49 | case rest => Left(failure(cur, rest, "StorageClass")) 50 | } 51 | 52 | implicit val serverSideEncryptionReader: ConfigReader[ServerSideEncryption] = (cur: ConfigCursor) => 53 | cur.fluent.at("type").asString.flatMap { 54 | case "aes256" => 55 | Right(ServerSideEncryption.aes256()) 56 | case "kms" => 57 | ConfigReader 58 | .forProduct2("key-id", "context") { (keyId: String, context: Option[String]) => 59 | val base = ServerSideEncryption.kms(keyId) 60 | context.fold(base)(base.withContext) 61 | } 62 | .from(cur) 63 | case "customer-keys" => 64 | ConfigReader 65 | .forProduct2("key", "md5") { (key: String, md5: Option[String]) => 66 | val base = ServerSideEncryption.customerKeys(key) 67 | md5.fold(base)(base.withMd5) 68 | } 69 | .from(cur) 70 | } 71 | 72 | implicit val s3HeadersConfigReader: ConfigReader[S3Headers] = 73 | ConfigReader.forProduct5("canned-acl", 74 | "meta-headers", 75 | "storage-class", 76 | "custom-headers", 77 | "server-side-encryption" 78 | ) { 79 | (cannedAcl: Option[CannedAcl], 80 | metaHeaders: Option[MetaHeaders], 81 | storageClass: Option[StorageClass], 82 | customHeaders: Option[Map[String, String]], 83 | serverSideEncryption: Option[ServerSideEncryption] 84 | ) => 85 | val base = S3Headers() 86 | val base2 = cannedAcl.fold(base)(base.withCannedAcl) 87 | val base3 = metaHeaders.fold(base2)(base2.withMetaHeaders) 88 | val base4 = storageClass.fold(base3)(base3.withStorageClass) 89 | val base5 = customHeaders.fold(base4)(base4.withCustomHeaders) 90 | serverSideEncryption.fold(base5)(base5.withServerSideEncryption) 91 | } 92 | 93 | implicit lazy val s3Headers: S3Headers = ConfigSource.default.at("s3-headers").loadOrThrow[S3Headers] 94 | 95 | // See https://pureconfig.github.io/docs/error-handling.html#validations-in-custom-readers for details 96 | // on custom validation 97 | private val restartSettingsBase = ConfigReader.forProduct5( 98 | "min-backoff", 99 | "max-backoff", 100 | "random-factor", 101 | "max-restarts", 102 | "max-restarts-within" 103 | ) { 104 | (minBackoff: FiniteDuration, 105 | maxBackoff: FiniteDuration, 106 | randomFactor: Double, 107 | maxRestarts: Option[Int], 108 | maxRestartsWithin: Option[FiniteDuration] 109 | ) => 110 | (minBackoff, maxBackoff, randomFactor, maxRestarts, maxRestartsWithin) 111 | } 112 | 113 | implicit val restartSettingsConfigReader: ConfigReader[RestartSettings] = 114 | ConfigReader.fromCursor[RestartSettings] { cur => 115 | restartSettingsBase.from(cur).flatMap { 116 | case (_, _, _, Some(_), None) => 117 | cur.failed( 118 | UserValidationFailed( 119 | "Both max-restarts and max-restarts-within need to exist if defining a maximum restarts configuration, max-restarts-within is missing" 120 | ) 121 | ) 122 | case (_, _, _, None, Some(_)) => 123 | cur.failed( 124 | UserValidationFailed( 125 | "Both max-restarts and max-restarts-within need to exist if defining a maximum restarts configuration, max-restarts is missing" 126 | ) 127 | ) 128 | case (minBackoff, maxBackoff, randomFactor, Some(maxRestarts), Some(maxRestartsWithin)) => 129 | Right(RestartSettings(minBackoff, maxBackoff, randomFactor).withMaxRestarts(maxRestarts, maxRestartsWithin)) 130 | case (minBackoff, maxBackoff, randomFactor, None, None) => 131 | Right(RestartSettings(minBackoff, maxBackoff, randomFactor)) 132 | } 133 | } 134 | 135 | @nowarn("cat=lint-byname-implicit") 136 | implicit lazy val s3Config: S3 = { 137 | import pureconfig.generic.auto._ 138 | ConfigSource.default.at("s3-config").loadOrThrow[S3] 139 | } 140 | } 141 | 142 | object Config extends Config 143 | -------------------------------------------------------------------------------- /core-s3/src/main/scala/io/aiven/guardian/kafka/s3/configs/S3.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.s3.configs 2 | 3 | import org.apache.pekko.stream.RestartSettings 4 | 5 | /** S3 specific configuration used when storing Kafka ConsumerRecords to a S3 bucket 6 | * 7 | * @param dataBucket 8 | * The bucket where a Kafka Consumer directly streams data into as storage 9 | * @param dataBucketPrefix 10 | * Prefix for the data bucket (if any) 11 | * @param errorRestartSettings 12 | * Restart settings that are used whenever an pekko-stream encounters an error 13 | */ 14 | final case class S3(dataBucket: String, dataBucketPrefix: Option[String], errorRestartSettings: RestartSettings) 15 | 16 | object S3 { 17 | def apply(dataBucket: String, errorRestartSettings: RestartSettings): S3 = S3(dataBucket, None, errorRestartSettings) 18 | } 19 | -------------------------------------------------------------------------------- /core-s3/src/main/scala/io/aiven/guardian/kafka/s3/errors/S3Errors.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.s3.errors 2 | 3 | import io.aiven.guardian.kafka.Errors 4 | import org.apache.pekko 5 | 6 | import pekko.http.scaladsl.model.headers.ByteRange 7 | import pekko.stream.connectors.s3.S3Headers 8 | 9 | sealed abstract class S3Errors extends Errors 10 | 11 | object S3Errors { 12 | final case class ExpectedObjectToExist(bucket: String, 13 | key: String, 14 | range: Option[ByteRange], 15 | versionId: Option[String], 16 | s3Headers: S3Headers 17 | ) extends S3Errors { 18 | override def getMessage: String = { 19 | val finalVersionId = versionId.getOrElse("latest") 20 | s"S3 object key:$key and version:$finalVersionId inside bucket:$bucket doesn't exist. S3 headers are ${s3Headers.toString()}" 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /core-s3/src/test/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger - %msg%n 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /core-s3/src/test/scala/io/aiven/guardian/kafka/s3/Generators.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.s3 2 | 3 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config} 4 | import org.apache.pekko.stream.RestartSettings 5 | import org.scalacheck.Gen 6 | 7 | import scala.annotation.nowarn 8 | import scala.concurrent.duration._ 9 | import scala.language.postfixOps 10 | 11 | object Generators { 12 | val MaxBucketLength: Int = 63 13 | 14 | // See https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html for valid 15 | // bucketnames 16 | 17 | lazy val bucketLetterOrNumberCharGen: Gen[Char] = Gen.frequency( 18 | (1, Gen.numChar), 19 | (1, Gen.alphaLowerChar) 20 | ) 21 | 22 | def bucketAllCharGen(useVirtualDotHost: Boolean): Gen[Char] = { 23 | val base = List( 24 | (10, Gen.alphaLowerChar), 25 | (1, Gen.const('-')), 26 | (1, Gen.numChar) 27 | ) 28 | 29 | val frequency = if (useVirtualDotHost) (1, Gen.const('.')) +: base else base 30 | 31 | Gen.frequency(frequency: _*) 32 | } 33 | 34 | @nowarn("msg=not.*?exhaustive") 35 | private def checkInvalidDuplicateChars(chars: List[Char]): Boolean = 36 | chars.sliding(2).forall { case Seq(before, after) => 37 | !(before == '.' && after == '.' || before == '-' && after == '.' || before == '.' && after == '-') 38 | } 39 | 40 | private def checkAlphaChar(c: Char): Boolean = 41 | c >= 'a' && c <= 'z' 42 | 43 | private def allCharCheck(useVirtualDotHost: Boolean, string: String): Boolean = 44 | if (useVirtualDotHost) { 45 | string.forall(char => Character.isDigit(char) || checkAlphaChar(char) || char == '-' || char == '.') && 46 | checkInvalidDuplicateChars(string.toList) 47 | } else 48 | string.forall(char => Character.isDigit(char) || checkAlphaChar(char) || char == '-') 49 | 50 | def validatePrefix(useVirtualDotHost: Boolean, prefix: Option[String]): Option[String] = { 51 | val withoutWhitespace = prefix match { 52 | case Some(value) if value.trim == "" => None 53 | case Some(value) => Some(value) 54 | case None => None 55 | } 56 | 57 | withoutWhitespace match { 58 | case Some(value) if !(Character.isDigit(value.head) || checkAlphaChar(value.head)) => 59 | throw new IllegalArgumentException( 60 | s"Invalid starting digit for prefix $value, ${value.head} needs to be an alpha char or digit" 61 | ) 62 | case Some(value) if value.length > 1 => 63 | if (!allCharCheck(useVirtualDotHost, value.drop(1))) 64 | throw new IllegalArgumentException( 65 | s"Prefix $value contains invalid characters" 66 | ) 67 | case Some(value) if value.length > MaxBucketLength - 1 => 68 | throw new IllegalArgumentException( 69 | s"Prefix is too long, it has size ${value.length} where as the max bucket size is $MaxBucketLength" 70 | ) 71 | case _ => () 72 | } 73 | 74 | withoutWhitespace 75 | } 76 | 77 | def bucketNameGen(useVirtualDotHost: Boolean, prefix: Option[String] = None): Gen[String] = { 78 | val finalPrefix = validatePrefix(useVirtualDotHost, prefix) 79 | 80 | for { 81 | range <- { 82 | val maxLength = finalPrefix match { 83 | case Some(p) => MaxBucketLength - p.length 84 | case None => MaxBucketLength 85 | } 86 | 87 | if (maxLength > 3) 88 | Gen.choose(3, maxLength) 89 | else 90 | Gen.const(maxLength) 91 | } 92 | startString = finalPrefix.getOrElse("") 93 | 94 | bucketName <- range match { 95 | case 3 => 96 | for { 97 | first <- bucketLetterOrNumberCharGen 98 | second <- bucketAllCharGen(useVirtualDotHost) 99 | third <- bucketLetterOrNumberCharGen 100 | } yield startString ++ List(first, second, third).mkString 101 | case _ => 102 | for { 103 | first <- bucketLetterOrNumberCharGen 104 | last <- bucketLetterOrNumberCharGen 105 | middle <- { 106 | val gen = Gen.listOfN(range - 2, bucketAllCharGen(useVirtualDotHost)) 107 | if (useVirtualDotHost) gen.filter(checkInvalidDuplicateChars) else gen 108 | } 109 | } yield startString ++ first.toString ++ middle.mkString ++ last.toString 110 | } 111 | } yield bucketName 112 | } 113 | 114 | val restartSetting: RestartSettings = RestartSettings( 115 | 5 millis, 116 | 10 seconds, 117 | 0.2 118 | ) 119 | 120 | def s3ConfigGen(useVirtualDotHost: Boolean, prefix: Option[String] = None): Gen[S3Config] = for { 121 | dataBucket <- bucketNameGen(useVirtualDotHost, prefix) 122 | } yield S3Config(dataBucket, restartSetting) 123 | 124 | } 125 | -------------------------------------------------------------------------------- /core-s3/src/test/scala/io/aiven/guardian/kafka/s3/Main.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.s3 2 | 3 | import cats.data.NonEmptyList 4 | import cats.implicits._ 5 | import com.monovore.decline.Command 6 | import com.monovore.decline.CommandApp 7 | import com.monovore.decline.Opts 8 | import com.typesafe.scalalogging.LazyLogging 9 | import io.aiven.guardian.kafka.s3.Entry.computeAndDeleteBuckets 10 | import org.apache.pekko 11 | 12 | import scala.concurrent._ 13 | import scala.concurrent.duration._ 14 | import scala.util.control.NonFatal 15 | 16 | import pekko.actor.ActorSystem 17 | import pekko.stream.Attributes 18 | import pekko.stream.connectors.s3.S3Attributes 19 | import pekko.stream.connectors.s3.S3Settings 20 | import pekko.stream.connectors.s3.scaladsl.S3 21 | import pekko.stream.scaladsl.Sink 22 | 23 | class Entry 24 | extends CommandApp( 25 | name = "guardian-s3-test-utils", 26 | header = "Guardian S3 Test Utilities", 27 | main = { 28 | val cleanBucketsCommand = Command( 29 | name = "clean-buckets", 30 | header = "Clean buckets left over by Guardian S3 tests" 31 | ) { 32 | val prefixOpt: Opts[String] = 33 | Opts 34 | .option[String]("prefix", help = "Only delete buckets with specified prefix") 35 | 36 | val excludeBucketsOpt: Opts[Option[NonEmptyList[String]]] = 37 | Opts 38 | .options[String]("exclude-buckets", 39 | help = "Buckets that will always be excluded from cleanup, irrespective of prefix" 40 | ) 41 | .orNone 42 | 43 | (prefixOpt, excludeBucketsOpt).tupled 44 | } 45 | 46 | Opts.subcommand(cleanBucketsCommand).map { case (bucketPrefix, excludeBuckets) => 47 | implicit val system: ActorSystem = ActorSystem() 48 | implicit val ec: ExecutionContext = system.dispatcher 49 | implicit val s3Settings: S3Settings = S3Settings() 50 | 51 | val excludeBucketsSet = excludeBuckets.map(_.toList.toSet).getOrElse(Set.empty) 52 | 53 | try { 54 | Await.result(computeAndDeleteBuckets(bucketPrefix, excludeBucketsSet), Duration.Inf) 55 | System.exit(0) 56 | } catch { 57 | case NonFatal(_) => 58 | System.exit(1) 59 | } 60 | } 61 | } 62 | ) 63 | 64 | object Entry extends LazyLogging { 65 | def computeAndDeleteBuckets(bucketPrefix: String, excludeBuckets: Set[String])(implicit 66 | executionContext: ExecutionContext, 67 | system: ActorSystem, 68 | s3Settings: S3Settings 69 | ): Future[Set[String]] = for { 70 | bucketsToDelete <- computeBucketsToDelete(bucketPrefix, excludeBuckets) 71 | _ <- if (bucketsToDelete.nonEmpty) { 72 | deleteBuckets(bucketsToDelete) 73 | } else 74 | Future { 75 | logger.info("No buckets to delete") 76 | } 77 | } yield bucketsToDelete 78 | 79 | def computeBucketsToDelete(bucketPrefix: String, excludeBuckets: Set[String])(implicit 80 | system: ActorSystem, 81 | s3Settings: S3Settings 82 | ): Future[Set[String]] = 83 | S3.listBuckets() 84 | .withAttributes(S3Attributes.settings(s3Settings)) 85 | .runWith(Sink.seq) 86 | .map { allBuckets => 87 | allBuckets.map(_.name).toSet.filter(fromS3Bucket => fromS3Bucket.startsWith(bucketPrefix)).diff(excludeBuckets) 88 | }(ExecutionContext.parasitic) 89 | 90 | def deleteBuckets( 91 | buckets: Set[String] 92 | )(implicit executionContext: ExecutionContext, system: ActorSystem, s3Settings: S3Settings): Future[Unit] = { 93 | implicit val s3Attrs: Attributes = S3Attributes.settings(s3Settings) 94 | val futures = buckets.map { bucket => 95 | logger.info(s"Deleting bucket $bucket") 96 | S3TestUtils.cleanAndDeleteBucket(bucket) 97 | } 98 | Future.sequence(futures).map(_ => ())(ExecutionContext.parasitic) 99 | } 100 | } 101 | 102 | object Main extends Entry 103 | -------------------------------------------------------------------------------- /core-s3/src/test/scala/io/aiven/guardian/kafka/s3/MinioContainer.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.s3 2 | 3 | import com.dimafeng.testcontainers.GenericContainer 4 | import org.testcontainers.containers.wait.strategy.Wait 5 | 6 | import java.time.Duration 7 | 8 | class MinioContainer(accessKey: String, secretKey: String) 9 | extends GenericContainer( 10 | "minio/minio", 11 | exposedPorts = List(9000), 12 | waitStrategy = Some(Wait.forHttp("/minio/health/ready").forPort(9000).withStartupTimeout(Duration.ofSeconds(10))), 13 | command = List("server", "/data"), 14 | env = Map( 15 | "MINIO_ACCESS_KEY" -> accessKey, 16 | "MINIO_SECRET_KEY" -> secretKey 17 | ) 18 | ) { 19 | 20 | def getHostAddress: String = 21 | s"http://${container.getHost}:${container.getMappedPort(9000)}" 22 | } 23 | -------------------------------------------------------------------------------- /core-s3/src/test/scala/io/aiven/guardian/kafka/s3/MinioS3Test.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.s3 2 | 3 | import com.dimafeng.testcontainers.ForAllTestContainer 4 | import org.apache.pekko 5 | import org.scalatest.Suite 6 | import software.amazon.awssdk.auth.credentials.AwsBasicCredentials 7 | import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider 8 | import software.amazon.awssdk.regions.Region 9 | import software.amazon.awssdk.regions.providers.AwsRegionProvider 10 | 11 | import pekko.stream.connectors.s3.AccessStyle 12 | import pekko.stream.connectors.s3.S3Settings 13 | import pekko.testkit.TestKitBase 14 | 15 | trait MinioS3Test extends ForAllTestContainer with TestKitBase { this: Suite => 16 | private val S3DummyAccessKey = "DUMMY_ACCESS_KEY" 17 | private val S3DummySecretKey = "DUMMY_SECRET_KEY" 18 | 19 | lazy val s3Settings: S3Settings = S3Settings() 20 | .withEndpointUrl(container.getHostAddress) 21 | .withCredentialsProvider( 22 | StaticCredentialsProvider.create(AwsBasicCredentials.create(S3DummyAccessKey, S3DummySecretKey)) 23 | ) 24 | .withS3RegionProvider(new AwsRegionProvider { 25 | lazy val getRegion: Region = Region.US_EAST_1 26 | }) 27 | .withAccessStyle(AccessStyle.PathAccessStyle) 28 | 29 | override lazy val container: MinioContainer = new MinioContainer(S3DummyAccessKey, S3DummySecretKey) 30 | } 31 | -------------------------------------------------------------------------------- /core-s3/src/test/scala/io/aiven/guardian/kafka/s3/S3TestUtils.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.s3 2 | 3 | import com.typesafe.scalalogging.StrictLogging 4 | import markatta.futiles.Retry 5 | import org.apache.pekko 6 | 7 | import scala.concurrent.ExecutionContext 8 | import scala.concurrent.Future 9 | import scala.concurrent.duration._ 10 | import scala.language.postfixOps 11 | 12 | import pekko.actor.ActorSystem 13 | import pekko.stream.Attributes 14 | import pekko.stream.connectors.s3.scaladsl.S3 15 | import pekko.stream.scaladsl.Sink 16 | 17 | object S3TestUtils extends StrictLogging { 18 | 19 | /** Completely cleans a bucket contents as well as deleting it afterwards. 20 | */ 21 | def cleanAndDeleteBucket(bucket: String)(implicit system: ActorSystem, s3Attrs: Attributes): Future[Unit] = { 22 | implicit val ec: ExecutionContext = system.dispatcher 23 | for { 24 | _ <- S3.deleteBucketContents(bucket, deleteAllVersions = true).withAttributes(s3Attrs).runWith(Sink.ignore) 25 | multiParts <- 26 | S3.listMultipartUpload(bucket, None).withAttributes(s3Attrs).runWith(Sink.seq) 27 | _ <- Future.sequence(multiParts.map { part => 28 | S3.deleteUpload(bucket, part.key, part.uploadId) 29 | }) 30 | _ <- Retry.retryWithBackOff( 31 | 5, 32 | 100 millis, 33 | throwable => throwable.getMessage.contains("The bucket you tried to delete is not empty") 34 | )(S3.deleteBucket(bucket)) 35 | _ = logger.info(s"Completed deleting bucket $bucket") 36 | } yield () 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /core-s3/src/test/scala/org/apache/pekko/stream/connectors/s3/GeneratorsSpec.scala: -------------------------------------------------------------------------------- 1 | package org.apache.pekko.stream.connectors.s3 2 | 3 | import com.typesafe.config.Config 4 | import com.typesafe.config.ConfigFactory 5 | import com.typesafe.config.ConfigValueFactory 6 | import io.aiven.guardian.kafka.s3.Generators 7 | import org.scalacheck.Gen 8 | import org.scalatest.matchers.must.Matchers 9 | import org.scalatest.propspec.AnyPropSpec 10 | import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks 11 | 12 | import scala.annotation.nowarn 13 | 14 | class GeneratorsSpec extends AnyPropSpec with Matchers with ScalaCheckPropertyChecks { 15 | 16 | def createBasicConfigFactory(virtualDotHost: Boolean): Config = { 17 | @nowarn("msg=possible missing interpolator: detected an interpolated expression") 18 | val baseS3SettingsConf = 19 | """ 20 | |buffer = "memory" 21 | |disk-buffer-path = "" 22 | | 23 | |aws { 24 | | credentials { 25 | | provider = default 26 | | } 27 | | region { 28 | | provider = default 29 | | } 30 | |} 31 | |access-style = virtual 32 | |list-bucket-api-version = 2 33 | |validate-object-key = true 34 | |retry-settings { 35 | | max-retries = 3 36 | | min-backoff = 200ms 37 | | max-backoff = 10s 38 | | random-factor = 0.0 39 | |} 40 | |multipart-upload { 41 | | retry-settings = ${retry-settings} 42 | |} 43 | |sign-anonymous-requests = true 44 | |""".stripMargin 45 | 46 | val config = ConfigFactory.parseString(baseS3SettingsConf).resolve() 47 | if (virtualDotHost) 48 | config.withValue("access-style", ConfigValueFactory.fromAnyRef("virtual")) 49 | else 50 | config.withValue("access-style", ConfigValueFactory.fromAnyRef("path")) 51 | } 52 | 53 | property("Bucket name generators generates valid bucket names according to S3Settings with virtualDotHost") { 54 | forAll(Generators.bucketNameGen(useVirtualDotHost = true)) { bucket => 55 | noException must be thrownBy BucketAndKey.validateBucketName(bucket, S3Settings(createBasicConfigFactory(true))) 56 | } 57 | } 58 | 59 | property("Bucket name generators generates valid bucket names according to S3Settings without virtualDotHost") { 60 | forAll(Generators.bucketNameGen(useVirtualDotHost = false)) { bucket => 61 | noException must be thrownBy BucketAndKey.validateBucketName(bucket, S3Settings(createBasicConfigFactory(false))) 62 | } 63 | } 64 | 65 | def withPrefixGen(useVirtualDotHost: Boolean): Gen[String] = for { 66 | range <- Gen.choose(2, Generators.MaxBucketLength - 3) 67 | firstChar <- Generators.bucketLetterOrNumberCharGen 68 | chars <- Gen.listOfN(range, Generators.bucketAllCharGen(useVirtualDotHost = false)) 69 | bucketName <- Generators.bucketNameGen(useVirtualDotHost, Some((firstChar +: chars).mkString)) 70 | } yield bucketName 71 | 72 | property( 73 | "Bucket name generators generates valid bucket names according to S3Settings with virtualDotHost and prefix" 74 | ) { 75 | forAll(withPrefixGen(useVirtualDotHost = true)) { bucket => 76 | noException must be thrownBy BucketAndKey.validateBucketName(bucket, S3Settings(createBasicConfigFactory(true))) 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /core/README.md: -------------------------------------------------------------------------------- 1 | # Guardian for Apache Kafka - Core 2 | 3 | This module contains core configuration for setting up the Kafka Consumer 4 | 5 | By default core uses [Alpakka Kafka][alpakka-kafka] to interact with a Kafka Cluster however you can also provide your 6 | own implementation by extending the `aiven.io.guardian.kafka.KafkaClientInterface`. Since Kafka consumers handle auto 7 | commit of cursors the `KafkaClientInterface` uses a `SourceWithContext` so that its possible for the `Source` 8 | to automatically commit cursors when successfully reading topics. 9 | 10 | ## Configuration 11 | 12 | Specification (including environment variable overrides) can be found [here](/src/main/resources/reference.conf). 13 | 14 | The primary `aiven.io.guardian.kafka.KafkaClient` is configured using [Alpakka Kafka][alpakka-kafka] [Consumer 15 | configuration](https://doc.akka.io/docs/alpakka-kafka/current/consumer.html) which also contains the default values. 16 | The committing of Kafka cursors also requires 17 | [CommitterSettings configuration](https://doc.akka.io/docs/alpakka-kafka/current/consumer.html#committer-sink). 18 | 19 | There is also a generic `aiven.io.guardian.kafka.configs.KafkaCluster` configuration at `"kafka-cluster"` for anything not specific 20 | to the kafka consumer, i.e. which topics to backup/compact/restore. 21 | 22 | [alpakka-kafka]: https://doc.akka.io/docs/alpakka-kafka/current/home.html 23 | -------------------------------------------------------------------------------- /core/src/main/resources/reference.conf: -------------------------------------------------------------------------------- 1 | # See https://github.com/akka/akka-http/issues/3201 and https://discuss.lightbend.com/t/about-nomoreelementsneeded-exception/8599 2 | 3 | pekko.http.client.stream-cancellation-delay = 1000 millis 4 | pekko.http.client.stream-cancellation-delay = ${?PEKKO_HTTP_CLIENT_STREAM_CANCELLATION_DELAY} 5 | 6 | kafka-cluster = { 7 | topics = [] 8 | topics = ${?KAFKA_CLUSTER_TOPICS} 9 | } 10 | -------------------------------------------------------------------------------- /core/src/main/scala/io/aiven/guardian/kafka/Config.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka 2 | 3 | import io.aiven.guardian.kafka.configs.KafkaCluster 4 | import pureconfig.ConfigSource 5 | import pureconfig.generic.auto._ 6 | 7 | import scala.annotation.nowarn 8 | 9 | trait Config { 10 | 11 | @nowarn("cat=lint-byname-implicit") 12 | implicit lazy val kafkaClusterConfig: KafkaCluster = 13 | ConfigSource.default.at("kafka-cluster").loadOrThrow[KafkaCluster] 14 | } 15 | 16 | object Config extends Config 17 | -------------------------------------------------------------------------------- /core/src/main/scala/io/aiven/guardian/kafka/Errors.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka 2 | 3 | trait Errors extends Exception 4 | 5 | object Errors { 6 | case object ExpectedStartOfSource extends Errors { 7 | override def getMessage: String = "Always expect a single element at the start of a stream" 8 | } 9 | 10 | final case class UnhandledStreamCase[T](elems: Seq[T]) extends Errors { 11 | override def getMessage: String = s"Unhandled case for stream ${elems.map(_.toString).mkString(",")}" 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /core/src/main/scala/io/aiven/guardian/kafka/ExtensionsMethods.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka 2 | 3 | import java.time.OffsetDateTime 4 | 5 | object ExtensionsMethods { 6 | 7 | implicit final class OffsetDateTimeMethods(value: OffsetDateTime) { 8 | def >(other: OffsetDateTime): Boolean = value.compareTo(other) > 0 9 | def >=(other: OffsetDateTime): Boolean = value.compareTo(other) > 0 || value == other 10 | def <(other: OffsetDateTime): Boolean = value.compareTo(other) < 0 11 | def <=(other: OffsetDateTime): Boolean = value.compareTo(other) < 0 || value == other 12 | } 13 | 14 | } 15 | -------------------------------------------------------------------------------- /core/src/main/scala/io/aiven/guardian/kafka/PureConfigUtils.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka 2 | 3 | import pureconfig.ConfigCursor 4 | import pureconfig.error.CannotConvert 5 | import pureconfig.error.ConfigReaderFailures 6 | import pureconfig.error.ConvertFailure 7 | 8 | object PureConfigUtils { 9 | private[kafka] def failure(cur: ConfigCursor, value: String, `type`: String) = ConfigReaderFailures( 10 | ConvertFailure( 11 | CannotConvert(value, `type`, s"Invalid ${`type`}"), 12 | cur 13 | ) 14 | ) 15 | } 16 | -------------------------------------------------------------------------------- /core/src/main/scala/io/aiven/guardian/kafka/Utils.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka 2 | 3 | import scala.annotation.tailrec 4 | 5 | import java.time.OffsetDateTime 6 | import java.time.format.DateTimeParseException 7 | 8 | object Utils { 9 | 10 | private def parseToOffsetDateTime(string: String): Option[OffsetDateTime] = 11 | try 12 | Some(OffsetDateTime.parse(string)) 13 | catch { 14 | case _: DateTimeParseException => 15 | None 16 | } 17 | 18 | @tailrec 19 | def keyToOffsetDateTime(key: String): OffsetDateTime = { 20 | val withoutExtension = key.substring(0, key.lastIndexOf('.')) 21 | parseToOffsetDateTime(withoutExtension) match { 22 | case Some(offsetDateTime) => offsetDateTime 23 | case None => keyToOffsetDateTime(withoutExtension) 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /core/src/main/scala/io/aiven/guardian/kafka/codecs/Circe.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.codecs 2 | 3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord 4 | import io.circe._ 5 | import io.circe.syntax._ 6 | import org.apache.kafka.common.record.TimestampType 7 | 8 | trait Circe { 9 | implicit val kafkaTimestampTypeDecoder: Decoder[TimestampType] = (c: HCursor) => 10 | c.as[Int].flatMap { id => 11 | TimestampType 12 | .values() 13 | .find(_.id == id) 14 | .toRight(DecodingFailure(s"No TimestampType with $id", c.history)) 15 | } 16 | 17 | implicit val kafkaTimestampTypeEncoder: Encoder[TimestampType] = Encoder.instance[TimestampType](_.id.asJson) 18 | 19 | implicit val reducedConsumerRecordDecoder: Decoder[ReducedConsumerRecord] = Decoder.forProduct7( 20 | "topic", 21 | "partition", 22 | "offset", 23 | "key", 24 | "value", 25 | "timestamp", 26 | "timestamp_type" 27 | )(ReducedConsumerRecord.apply) 28 | 29 | implicit val reducedConsumerRecordEncoder: Encoder[ReducedConsumerRecord] = Encoder.forProduct7( 30 | "topic", 31 | "partition", 32 | "offset", 33 | "key", 34 | "value", 35 | "timestamp", 36 | "timestamp_type" 37 | )(x => ReducedConsumerRecord.unapply(x).get) 38 | } 39 | 40 | object Circe extends Circe 41 | -------------------------------------------------------------------------------- /core/src/main/scala/io/aiven/guardian/kafka/configs/KafkaCluster.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.configs 2 | 3 | /** @param topics 4 | * The set of topics to subscribe to (and hence backup and restore) 5 | */ 6 | final case class KafkaCluster(topics: Set[String]) 7 | -------------------------------------------------------------------------------- /core/src/main/scala/io/aiven/guardian/kafka/models/BackupObjectMetadata.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.models 2 | 3 | final case class BackupObjectMetadata(compression: Option[CompressionType]) 4 | 5 | object BackupObjectMetadata { 6 | def fromKey(key: String): BackupObjectMetadata = 7 | if (key.endsWith(".gz")) 8 | BackupObjectMetadata(Some(Gzip)) 9 | else 10 | BackupObjectMetadata(None) 11 | } 12 | -------------------------------------------------------------------------------- /core/src/main/scala/io/aiven/guardian/kafka/models/CompressionType.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.models 2 | 3 | sealed trait CompressionType { 4 | val pretty: String 5 | } 6 | 7 | case object Gzip extends CompressionType { 8 | override val pretty: String = "Gzip" 9 | } 10 | -------------------------------------------------------------------------------- /core/src/main/scala/io/aiven/guardian/kafka/models/ReducedConsumerRecord.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.models 2 | 3 | import org.apache.kafka.common.record.TimestampType 4 | 5 | import java.time.Instant 6 | import java.time.OffsetDateTime 7 | import java.time.ZoneId 8 | 9 | /** A `ConsumerRecord` that only contains the necessary data for guardian 10 | * 11 | * @param topic 12 | * The kafka topic (same as `ConsumerRecord` `topic`) 13 | * @param offset 14 | * The kafka offset (same as `ConsumerRecord` `offset`) 15 | * @param key 16 | * Base64 encoded version of the original ConsumerRecord key as a byte array 17 | * @param value 18 | * Base64 encoded version of the original ConsumerRecord value as a byte array 19 | * @param timestamp 20 | * The timestamp value (same as `ConsumerRecord` `timestamp`) 21 | * @param timestampType 22 | * The timestamp type (same as `ConsumerRecord` `timestampType`) 23 | */ 24 | final case class ReducedConsumerRecord(topic: String, 25 | partition: Int, 26 | offset: Long, 27 | key: Option[String], 28 | value: String, 29 | timestamp: Long, 30 | timestampType: TimestampType 31 | ) { 32 | def toOffsetDateTime: OffsetDateTime = 33 | Instant.ofEpochMilli(this.timestamp).atZone(ZoneId.of("UTC")).toOffsetDateTime 34 | } 35 | -------------------------------------------------------------------------------- /core/src/test/resources/application.conf: -------------------------------------------------------------------------------- 1 | pekko { 2 | log-dead-letters-during-shutdown = false 3 | log-dead-letters = 0 4 | } 5 | -------------------------------------------------------------------------------- /core/src/test/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | [%highlight(%-5level)] %d{HH:mm:ss.SSS} %logger{0} - %msg%n 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /core/src/test/scala/io/aiven/guardian/kafka/ConfigSpec.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka 2 | 3 | import io.aiven.guardian.kafka.configs.KafkaCluster 4 | import org.scalacheck.Arbitrary 5 | import org.scalacheck.Gen 6 | import org.scalatest.matchers.must.Matchers 7 | import org.scalatest.propspec.AnyPropSpec 8 | import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks 9 | import pureconfig.ConfigSource 10 | 11 | class ConfigSpec extends AnyPropSpec with Matchers with ScalaCheckPropertyChecks { 12 | implicit val kafkaClusterArb: Arbitrary[KafkaCluster] = Arbitrary( 13 | Gen.containerOf[Set, String](Gen.alphaStr).map(topics => KafkaCluster(topics)) 14 | ) 15 | 16 | property("Valid KafkaClusterConfig configs should parse correctly") { 17 | forAll { (kafkaClusterConfig: KafkaCluster) => 18 | val conf = 19 | s""" 20 | |kafka-cluster = { 21 | | topics = [${kafkaClusterConfig.topics.map(topic => s""""$topic"""").mkString(",")}] 22 | |} 23 | |""".stripMargin 24 | 25 | noException should be thrownBy ConfigSource.string(conf).at("kafka-cluster") 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /core/src/test/scala/io/aiven/guardian/kafka/KafkaClusterTest.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka 2 | 3 | import com.dimafeng.testcontainers.ForAllTestContainer 4 | import com.dimafeng.testcontainers.KafkaContainer 5 | import io.aiven.guardian.kafka.TestUtils.KafkaFutureToCompletableFuture 6 | import io.aiven.guardian.pekko.PekkoStreamTestKit 7 | import org.apache.kafka.clients.CommonClientConfigs 8 | import org.apache.kafka.clients.admin.AdminClient 9 | import org.apache.kafka.clients.admin.NewTopic 10 | import org.apache.kafka.clients.producer.ProducerConfig 11 | import org.apache.kafka.clients.producer.ProducerRecord 12 | import org.apache.kafka.common.serialization.ByteArraySerializer 13 | import org.apache.pekko 14 | import org.scalatest.Suite 15 | 16 | import scala.concurrent.ExecutionContext 17 | import scala.concurrent.Future 18 | import scala.concurrent.duration.FiniteDuration 19 | import scala.concurrent.duration._ 20 | import scala.jdk.CollectionConverters._ 21 | import scala.jdk.FutureConverters._ 22 | import scala.language.postfixOps 23 | 24 | import pekko.Done 25 | import pekko.kafka.ConsumerSettings 26 | import pekko.kafka.ProducerSettings 27 | import pekko.kafka.scaladsl.Producer 28 | import pekko.stream.scaladsl.Source 29 | 30 | trait KafkaClusterTest extends ForAllTestContainer with PekkoStreamTestKit { this: Suite => 31 | 32 | /** Timeout constant to wait for both Pekko Streams plus initialization of consumer/kafka cluster 33 | */ 34 | val KafkaInitializationTimeoutConstant: FiniteDuration = PekkoStreamInitializationConstant + (2.5 seconds) 35 | 36 | override lazy val container: KafkaContainer = new KafkaContainer() 37 | 38 | def baseKafkaConfig: Some[ConsumerSettings[Array[Byte], Array[Byte]] => ConsumerSettings[Array[Byte], Array[Byte]]] = 39 | Some( 40 | _.withBootstrapServers( 41 | container.bootstrapServers 42 | ) 43 | ) 44 | 45 | /** This config ensures that our producer is atomic since we only ever send a single kafka topic per request and there 46 | * can only be a single request at a given time 47 | * @return 48 | */ 49 | def baseProducerConfig 50 | : Some[ProducerSettings[Array[Byte], Array[Byte]] => ProducerSettings[Array[Byte], Array[Byte]]] = 51 | Some( 52 | _.withBootstrapServers( 53 | container.bootstrapServers 54 | ).withProperties( 55 | Map( 56 | ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG -> true.toString, 57 | ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION -> 1.toString, 58 | ProducerConfig.BATCH_SIZE_CONFIG -> 0.toString 59 | ) 60 | ).withParallelism(1) 61 | ) 62 | 63 | def createProducer(): ProducerSettings[Array[Byte], Array[Byte]] = 64 | ProducerSettings(system, new ByteArraySerializer, new ByteArraySerializer) 65 | .withBootstrapServers(container.bootstrapServers) 66 | 67 | /** Call this function to send a message after the next step of configured time period to trigger a rollover so the 68 | * current object will finish processing 69 | * @param duration 70 | * @param producerSettings 71 | * @param topic 72 | * @return 73 | */ 74 | def sendTopicAfterTimePeriod(duration: FiniteDuration, 75 | producerSettings: ProducerSettings[Array[Byte], Array[Byte]], 76 | topic: String 77 | ): Future[Done] = pekko.pattern.after(duration) { 78 | Source( 79 | List( 80 | new ProducerRecord[Array[Byte], Array[Byte]](topic, "1".getBytes, "1".getBytes) 81 | ) 82 | ).runWith(Producer.plainSink(producerSettings)) 83 | } 84 | 85 | protected var adminClient: AdminClient = _ 86 | 87 | override def afterStart(): Unit = { 88 | super.afterStart() 89 | adminClient = AdminClient.create( 90 | Map[String, AnyRef]( 91 | CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> container.bootstrapServers 92 | ).asJava 93 | ) 94 | } 95 | 96 | override def beforeStop(): Unit = { 97 | adminClient.close() 98 | super.beforeStop() 99 | } 100 | 101 | def createTopics(topics: Set[String])(implicit executionContext: ExecutionContext): Future[Unit] = 102 | for { 103 | currentTopics <- adminClient.listTopics().names().toCompletableFuture.asScala 104 | topicsToCreate = topics.diff(currentTopics.asScala.toSet) 105 | _ <- adminClient 106 | .createTopics(topicsToCreate.map { topic => 107 | new NewTopic(topic, 1, 1.toShort) 108 | }.asJava) 109 | .all() 110 | .toCompletableFuture 111 | .asScala 112 | } yield () 113 | 114 | def cleanTopics(topics: Set[String])(implicit executionContext: ExecutionContext): Future[Unit] = 115 | for { 116 | currentTopics <- adminClient.listTopics().names().toCompletableFuture.asScala 117 | topicsToDelete = topics.intersect(currentTopics.asScala.toSet) 118 | _ <- adminClient.deleteTopics(topicsToDelete.asJava).all().toCompletableFuture.asScala 119 | } yield () 120 | 121 | case object TerminationException extends Exception("termination-exception") 122 | } 123 | -------------------------------------------------------------------------------- /core/src/test/scala/io/aiven/guardian/kafka/TestUtils.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka 2 | 3 | import com.typesafe.scalalogging.LazyLogging 4 | import org.apache.kafka.common.KafkaFuture 5 | import org.apache.pekko 6 | 7 | import scala.collection.immutable 8 | import scala.collection.mutable 9 | import scala.collection.mutable.ListBuffer 10 | import scala.concurrent.ExecutionContext 11 | import scala.concurrent.Future 12 | import scala.jdk.DurationConverters._ 13 | import scala.util.Failure 14 | import scala.util.Success 15 | 16 | import java.time.OffsetDateTime 17 | import java.time.temporal.ChronoUnit 18 | import java.util.concurrent.CompletableFuture 19 | 20 | import pekko.actor.ActorSystem 21 | 22 | object TestUtils { 23 | 24 | // Taken from https://stackoverflow.com/a/56763206/1519631 25 | implicit final class KafkaFutureToCompletableFuture[T](kafkaFuture: KafkaFuture[T]) { 26 | @SuppressWarnings(Array("DisableSyntax.null")) 27 | def toCompletableFuture: CompletableFuture[T] = { 28 | val wrappingFuture = new CompletableFuture[T] 29 | kafkaFuture.whenComplete { (value, throwable) => 30 | if (throwable != null) 31 | wrappingFuture.completeExceptionally(throwable) 32 | else 33 | wrappingFuture.complete(value) 34 | } 35 | wrappingFuture 36 | } 37 | } 38 | 39 | implicit final class ScalaFutureExtensionMethods[T](future: Future[T]) extends LazyLogging { 40 | def onCompleteLogError(f: () => Unit)(implicit executor: ExecutionContext): Unit = 41 | future.onComplete { result => 42 | result match { 43 | case Failure(exception) => logger.error("Future resulted in error", exception) 44 | case Success(_) => () 45 | } 46 | f() 47 | } 48 | } 49 | 50 | /** The standard Scala groupBy returns an `immutable.Map` which is unordered, this version returns an ordered 51 | * `ListMap` for when preserving insertion order is important 52 | */ 53 | implicit class GroupBy[A](val t: IterableOnce[A]) { 54 | def orderedGroupBy[K](f: A => K): immutable.ListMap[K, List[A]] = { 55 | var m = immutable.ListMap.empty[K, ListBuffer[A]] 56 | for (elem <- t.iterator) { 57 | val key = f(elem) 58 | m = m.updatedWith(key) { 59 | case Some(value) => Some(value.addOne(elem)) 60 | case None => Some(mutable.ListBuffer[A](elem)) 61 | } 62 | } 63 | m.map { case (k, v) => (k, v.toList) } 64 | } 65 | } 66 | 67 | final case class UnsupportedTimeUnit(chronoUnit: ChronoUnit) extends Exception(s"$chronoUnit not supported") 68 | 69 | private def recurseUntilHitTimeUnit(previousChronoUnit: ChronoUnit, buffer: BigDecimal)(implicit 70 | system: ActorSystem 71 | ): Future[Unit] = { 72 | val now = OffsetDateTime.now() 73 | val (current, max) = previousChronoUnit match { 74 | case ChronoUnit.SECONDS => 75 | (now.getSecond, 59) 76 | case ChronoUnit.MINUTES => 77 | (now.getMinute, 59) 78 | case ChronoUnit.HOURS => 79 | (now.getHour, 23) 80 | case ChronoUnit.DAYS => 81 | (now.getDayOfWeek.getValue - 1, 6) 82 | case ChronoUnit.MONTHS => 83 | (now.getMonth.getValue - 1, 11) 84 | case _ => throw UnsupportedTimeUnit(previousChronoUnit) 85 | } 86 | 87 | if (BigDecimal(current) / BigDecimal(max) * BigDecimal(100) <= buffer) 88 | Future.successful(()) 89 | else 90 | pekko.pattern.after(previousChronoUnit.getDuration.toScala)(recurseUntilHitTimeUnit(previousChronoUnit, buffer)) 91 | } 92 | 93 | def waitForStartOfTimeUnit(chronoUnit: ChronoUnit, buffer: BigDecimal = BigDecimal(5))(implicit 94 | system: ActorSystem 95 | ): Future[Unit] = { 96 | val allEnums = ChronoUnit.values() 97 | val previousEnum = allEnums(chronoUnit.ordinal - 1) 98 | recurseUntilHitTimeUnit(previousEnum, buffer) 99 | } 100 | 101 | } 102 | -------------------------------------------------------------------------------- /core/src/test/scala/io/aiven/guardian/pekko/AnyPropTestKit.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.pekko 2 | 3 | import org.apache.pekko 4 | import org.scalatest.fixture 5 | import org.scalatest.propspec.FixtureAnyPropSpecLike 6 | 7 | import pekko.actor.ActorSystem 8 | import pekko.testkit.TestKitBase 9 | 10 | class AnyPropTestKit(_system: ActorSystem) 11 | extends FixtureAnyPropSpecLike 12 | with TestKitBase 13 | with fixture.TestDataFixture { 14 | implicit val system: ActorSystem = _system 15 | } 16 | -------------------------------------------------------------------------------- /core/src/test/scala/io/aiven/guardian/pekko/PekkoHttpTestKit.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.pekko 2 | 3 | import org.apache.pekko 4 | import org.scalatest.Suite 5 | 6 | import pekko.actor.ActorSystem 7 | import pekko.http.scaladsl.Http 8 | 9 | trait PekkoHttpTestKit extends PekkoStreamTestKit { this: Suite => 10 | implicit val system: ActorSystem 11 | 12 | override protected def afterAll(): Unit = 13 | Http(system) 14 | .shutdownAllConnectionPools() 15 | .foreach { _ => 16 | super.afterAll() 17 | }(system.dispatcher) 18 | } 19 | -------------------------------------------------------------------------------- /core/src/test/scala/io/aiven/guardian/pekko/PekkoStreamTestKit.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.pekko 2 | 3 | import com.typesafe.scalalogging.CanLog 4 | import com.typesafe.scalalogging.Logger 5 | import com.typesafe.scalalogging.LoggerTakingImplicit 6 | import org.apache.pekko 7 | import org.scalatest.BeforeAndAfterAll 8 | import org.scalatest.Suite 9 | import org.scalatest.TestData 10 | 11 | import scala.concurrent.duration._ 12 | import scala.language.postfixOps 13 | 14 | import pekko.actor.ActorSystem 15 | import pekko.testkit.TestKit 16 | import pekko.testkit.TestKitBase 17 | 18 | trait PekkoStreamTestKit extends TestKitBase with BeforeAndAfterAll { this: Suite => 19 | implicit val system: ActorSystem 20 | 21 | override protected def afterAll(): Unit = 22 | TestKit.shutdownActorSystem(system) 23 | 24 | /** If its not possible to determine whether a Stream has finished in a test and instead you need to use a manual 25 | * wait, make sure you wait at least this period of time for akka-streams to initialize properly. 26 | */ 27 | val PekkoStreamInitializationConstant: FiniteDuration = 1 second 28 | 29 | private implicit case object CanLogTestData extends CanLog[TestData] { 30 | override def logMessage(originalMsg: String, context: TestData): String = 31 | s"${context.name}: $originalMsg" 32 | } 33 | 34 | lazy val logger: LoggerTakingImplicit[TestData] = Logger.takingImplicit[TestData](getClass.getName) 35 | } 36 | -------------------------------------------------------------------------------- /dependency-check/suppression.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | ^pkg:maven/org\.mdedetrich/akka\-stream\-json_2\.13@.*$ 8 | cpe:/a:akka:akka 9 | 10 | 11 | 14 | ^pkg:maven/org\.mdedetrich/akka\-stream\-circe_2\.13@.*$ 15 | cpe:/a:akka:akka 16 | 17 | 18 | -------------------------------------------------------------------------------- /docs/src/main/paradox/application/design.md: -------------------------------------------------------------------------------- 1 | # Design 2 | 3 | Each application is contained within a corresponding sbt submodule, i.e. the application for `backup` is contained 4 | within the `cli-backup` sbt submodule. The `core-cli` sbt submodule contains common cli arguments (i.e. `kafka-topics`). 5 | 6 | Scala packaging has been disabled for these submodules which means that when publishing/packaging Guardian it won't push 7 | any built `.jar` files. This is because its unnecessary since you are meant to run these applications as a binary and 8 | not include it as a library. By the same token this also means that the cli modules are built with global inlining 9 | using `"-opt-inline-from:**"`, see [here](https://www.lightbend.com/blog/scala-inliner-optimizer) for more info. 10 | -------------------------------------------------------------------------------- /docs/src/main/paradox/application/index.md: -------------------------------------------------------------------------------- 1 | # Application 2 | 3 | Guardian also becomes packaged as various application/s that lets you run it using a CLI interface. Currently, the 4 | binaries provided are 5 | 6 | * restore: A continuously running binary that performs the restore operation. 7 | * backup: A binary which when executed allows you to restore an existing backup. 8 | 9 | The CLI follows POSIX guidelines which means you can use `--help` as an argument to provide information on all of the 10 | parameters. 11 | 12 | @@toc { depth=2 } 13 | 14 | @@@ index 15 | 16 | * [design](design.md) 17 | * [packaging](packaging.md) 18 | * [logging](logging.md) 19 | 20 | @@@ 21 | -------------------------------------------------------------------------------- /docs/src/main/paradox/application/logging.md: -------------------------------------------------------------------------------- 1 | # Logging 2 | 3 | The CLI provides its own default 4 | logback `logback.xml` @github[logging file](/core-cli/src/main/resources/logback.xml) which has sane defaults for 5 | typical usage. It's also possible to provide a custom `logback.xml` configuration file using the `--logback-file` 6 | command line argument. 7 | 8 | For more details about logback and/or the `logback.xml` configuration format read the 9 | @ref:[general architecture section on logging](../general-architecture/logging.md). 10 | -------------------------------------------------------------------------------- /docs/src/main/paradox/application/packaging.md: -------------------------------------------------------------------------------- 1 | # Packaging 2 | 3 | Guardian is currently packaged using [sbt-native-packager](https://github.com/sbt/sbt-native-packager) to provide the 4 | following formats by using the sbt shell. 5 | 6 | * `rpm` 7 | * restore: `cliRestore/rpm:packageBin`. Created `rpm` file will be contained 8 | in `cli-restore/target/rpm/RPMS/noarch/` 9 | * backup: `cliBackup/rpm:packageBin`. Created `rpm` file will be contained in `cli-backup/target/rpm/RPMS/noarch/` 10 | NOTE: In order to build packages you need to have the [rpm-tools](https://rpm.org/) (specifically `rpmbuild`) 11 | installed and available on `PATH`. Please consult your Linux distribution for more info 12 | * `zip` 13 | * restore: `cliRestore/universal:packageBin`. Created `zip` file will be contained 14 | in `cli-restore/target/universal/` 15 | * backup: `cliBackup/universal:packageBin`. Created `zip` file will be contained in `cli-backup/target/universal/` 16 | * `tar` 17 | * restore: `cliRestore/universal:packageZipTarball`. Created `tar` file will be contained 18 | in `cli-restore/target/universal/` 19 | * backup: `cliBackup/universal:packageZipTarball`. Created `tar` file will be contained 20 | in `cli-backup/target/universal/` 21 | * `Xz` 22 | * restore: `cliRestore/universal:packageXzTarball`. Created `xz` file will be contained 23 | in `cli-restore/target/universal/` 24 | * backup: `cliBackup/universal:packageXzTarball`. Created `xz` file will be contained 25 | in `cli-backup/target/universal/` 26 | 27 | Note that for these packages formats you need to have JRE installed on your system to run the package. For more details 28 | about packaging read the [docs](https://sbt-native-packager.readthedocs.io/en/latest/) 29 | -------------------------------------------------------------------------------- /docs/src/main/paradox/backup/configuration.md: -------------------------------------------------------------------------------- 1 | # Configuration 2 | 3 | ## Reference 4 | 5 | @@snip (/core-backup/src/main/resources/reference.conf) 6 | 7 | Scala API doc @apidoc[kafka.backup.configs.Backup] 8 | 9 | ## Explanation 10 | 11 | * `pekko.kafka.consumer`: See @extref:[documentation](pekko-connectors-kafka-docs:consumer.html#settings) 12 | * `pekko.kafka.consumer.kafka-clients`: See @extref:[documentation](kafka-docs:documentation.html#consumerconfigs) 13 | * `backup`: 14 | * `kafka-group-id`: The group id for the Kafka consumer that's used in restore tool 15 | * `time-configuration`: How to slice the persisted keys/files based by time 16 | * `type`: The type of time configuration. Either `period-from-first` or `chrono-unit-slice` 17 | * `period-from-first`: Guardian will split up the backup keys/files determined by the `duration` specified. 18 | The key/filename will be determined by the timestamp of the first message received from the Kafka consumer 19 | with each further key/filename being incremented by the configured `duration`. If guardian is shut down 20 | then it will terminate and complete stream with the final element in the JSON array being a `null` 21 | * This is done so it's possible to determine if a backup has been terminated by shut down of Guardian 22 | and also because it's not really possible to resume using arbitrary durations. 23 | * `chrono-unit-slice`: Guardian will split up the backup keys/files determined by the `chrono-unit` which 24 | represent intervals such as days and weeks. As such when using this setting its possible for Guardian to 25 | resume from a previous uncompleted backup. 26 | * `duration`: If configuration is `period-from-first` then this determines max period of time for each time 27 | slice. 28 | * `chrono-unit`: if configuration is `chrono-unit-slice` the `chrono-unit` determines 29 | * `commit-timeout-buffer-window`: Guardian sets the commit timeout of the Kafka consumer based on the `time-configuration` 30 | since Guardian does manual committing of cursors. The buffer gets added onto the `time-configuration` to give 31 | some headroom for any theoretical delays. 32 | * `compression`: The compression format to use for the data being backed up. Note that changes in compression 33 | configuration will not apply for any currently existing backups that need to be completed, only for future 34 | new backups. 35 | * `type`: Which compression to use. 36 | * `gzip`. Standard [Gzip](https://en.wikipedia.org/wiki/Gzip) compression 37 | * `level`: The level of compression to use 38 | -------------------------------------------------------------------------------- /docs/src/main/paradox/backup/design.md: -------------------------------------------------------------------------------- 1 | # Design 2 | 3 | The format for backups is in JSON consisting of a large JSON array filled with JSON objects that have the following 4 | format. 5 | 6 | ```json 7 | { 8 | "topic": "kafka topic", 9 | "partition": 0, 10 | "offset": 0, 11 | "key": "a2V5", 12 | "value": "dmFsdWU=", 13 | "timestamp": 0, 14 | "timestamp_type": 0 15 | } 16 | ``` 17 | 18 | The `key` and `value` are Base64 encoded byte arrays (in the above example `"a2V5"` decodes to the string `key` 19 | and `"dmFsdWU="` decodes to the string `value`). This is due to the fact that the backup tool can make no assumptions on 20 | the format of the key or value, so we encode the raw byte arrays. 21 | 22 | One thing to note is that its possible for the last JSON object in the JSON array to be `null`, see for more info. 23 | -------------------------------------------------------------------------------- /docs/src/main/paradox/backup/index.md: -------------------------------------------------------------------------------- 1 | # Backup 2 | 3 | The backup module is responsible for backing up a specific set of Kafka topics into a persistent storage. The backup 4 | runs as a continuous stream that is split depending on time buckets which are configurable. 5 | 6 | @@project-info { projectId="coreBackup" } 7 | 8 | @@toc { depth=2 } 9 | 10 | @@@ index 11 | 12 | * [configuration](configuration.md) 13 | * [design](design.md) 14 | 15 | @@@ 16 | -------------------------------------------------------------------------------- /docs/src/main/paradox/ci.md: -------------------------------------------------------------------------------- 1 | # CI - Continuous Integration 2 | 3 | Guardian uses github actions to perform CI whenever a pull request is made and when a pull request is merged into 4 | master. CI is also responsible for publishing github github. The integration with github actions for the main build is 5 | performed using [sbt-github-actions][sbt-github-actions-link]. 6 | 7 | ## Design 8 | 9 | One thing to note about [sbt-github-actions][sbt-github-actions-link] is that it generates the github workflow files 10 | directly from the sbt @github[build definition file](/build.sbt). 11 | This means that the `build.sbt` is the source of truth and hence [sbt-github-actions][sbt-github-actions-link] also 12 | checks that the github workflow is in sync with `build.sbt` as part of the CI process. 13 | 14 | Essentially that means any changes to `build.sbt` (such as updating Scala versions) can also cause changes in github 15 | workflow actions. Likewise if you need to do any custom changes to 16 | the @github[ci.yaml](/.github/workflows/ci.yml) file you need to do this in `build.sbt` using 17 | [sbt-github-actions][sbt-github-actions-link] SBT dsl. 18 | 19 | To regenerate the relevant github workflow files after changes to `build.sbt` are done you need to run 20 | 21 | ``` 22 | githubWorkflowGenerate 23 | ``` 24 | 25 | In the sbt shell. For more information go [here](https://github.com/djspiewak/sbt-github-actions#generative-plugin) 26 | 27 | ## Scalafmt 28 | 29 | In addition and separately to [sbt-github-actions][sbt-github-actions-link] Guardian also has 30 | a [scalafmt][scalafmt-link] pipeline that checks the code is correctly formatted on each PR. This allows the 31 | @github[scalafmt pipeline](/.github/workflows/format.yml) to run at the same time the main build 32 | does. Furthermore, it uses [scalafmt-native](https://scalameta.org/scalafmt/docs/installation.html#native-image) for 33 | improved runtime performance (typically it takes 5-10 seconds to check the entire project is formatted). 34 | 35 | This means that if you ever update the scalafmt version in 36 | the @github[configuration file](/.scalafmt.conf#L1) you also need to update it in the 37 | @github[scalafmt-pipeline](/.github/workflows/format.yml#L26). 38 | 39 | [sbt-github-actions-link]: https://github.com/djspiewak/sbt-github-actions 40 | [scalafmt-link]: https://scalameta.org/scalafmt/ 41 | -------------------------------------------------------------------------------- /docs/src/main/paradox/doc-generation.md: -------------------------------------------------------------------------------- 1 | # Document Generation 2 | 3 | Guardian uses [sbt-paradox][sbt-paradox-link] as the main plugin for generating documentation which is hosted 4 | using [github pages][github-pages-link]. In addition various other plugins are used which are noted below 5 | 6 | * [sbt-paradox-api-doc](https://github.com/lightbend/sbt-paradox-apidoc): Allows you to directly link to Scala 7 | documentation using the `@@apidoc` directive 8 | * [sbt-paradox-project-info](https://github.com/lightbend/sbt-paradox-project-info): Provides an `@@projectInfo` 9 | directive that derives common information about the project (such as dependencies, project info etc etc) 10 | * [sbt-site](https://github.com/sbt/sbt-site): Used in conjunction with [sbt-paradox][sbt-paradox-link] to generate the 11 | final site structure 12 | * [sbt-ghpages](https://github.com/sbt/sbt-ghpages): Used for uploading the final site 13 | to [github-pages][github-pages-link]. 14 | * [sbt-unidoc](https://github.com/sbt/sbt-unidoc): Used to aggregate/concatenate documentation Scala API documentation 15 | from various sbt modules into a single documentation result 16 | 17 | ## Design 18 | 19 | [sbt-paradox][sbt-paradox-link] generates documentation using standard [Markdown](https://www.markdownguide.org/). The 20 | documentation can be found in the @github[docs-folder](/docs). Note that this folder also corresponds to a sbt-module 21 | which is also named `docs` which also means that commands related to documentation are run in that sbt sub-project 22 | (i.e. `docs/makeSite` generates the documentation site). 23 | 24 | Guardian also uses [scaladoc][scaladoc-link] which is already included within Scala compiler/SBT to generate Scala API 25 | documentation. [scaladoc][scaladoc-link] is analogous to Java's own [javadoc](https://en.wikipedia.org/wiki/Javadoc) 26 | which generates API documentation that is written within the code itself. 27 | 28 | One advantage of using [sbt-paradox][sbt-paradox-link] and its various plugins as the main driver for documentation 29 | generation is it that checks at document generation (i.e. compile time) that the docs are well-formed. This checking 30 | includes 31 | 32 | * references to other links 33 | * references to specific Scala API documentation directly using Scala classes/objects/traits 34 | * TOC (table of contents) are well-formed (e.g. you don't have markdown files in `docs` which aren't referenced 35 | anywhere) 36 | * references to versions from Guardians various Scala submodules are always up-to-date 37 | * references to code snippets 38 | 39 | [sbt-paradox-link]: https://github.com/lightbend/paradox 40 | [github-pages-link]: https://pages.github.com/ 41 | [scaladoc-link]: https://docs.scala-lang.org/style/scaladoc.html 42 | -------------------------------------------------------------------------------- /docs/src/main/paradox/general-architecture/index.md: -------------------------------------------------------------------------------- 1 | # General Architecture 2 | 3 | General documentation about how Guardian for Apache Kafka is architected lives here. 4 | 5 | @@toc { depth=2 } 6 | 7 | @@@ index 8 | 9 | * [logging](logging.md) 10 | 11 | @@@ 12 | -------------------------------------------------------------------------------- /docs/src/main/paradox/general-architecture/logging.md: -------------------------------------------------------------------------------- 1 | # Logging 2 | 3 | Guardian for Apache Kafka uses [logback](https://logback.qos.ch/index.html) to perform logging. This means if you are 4 | using the modules as libraries you need to provide a `logback.xml` in your classpath (typically this is done by putting 5 | the `logback.xml` in your `/src/main/resources` folder). Note that the Guardian modules do not provide a default 6 | `logback.xml` for deployed artifacts since this is typically the responsibility of an application to configure and 7 | provide. 8 | 9 | If you want examples of `logback.xml` configuration you can have a look at the 10 | official [logback page](https://logback.qos.ch/manual/configuration.html) but you can also use existing `logback.xml`'s 11 | from either the @github[cli](/core-cli/src/main/resources/logback.xml) or the 12 | @github[tests](/core/src/test/resources/logback.xml) as a reference. 13 | 14 | @@@ warning 15 | 16 | As documented at @extref:[pekko logback configuration](pekko-docs:logging.html#logback-configuration) it is highly recommended 17 | to use an `AsyncAppender` in your configuration as this offsets the logging to a background thread otherwise you will 18 | end up blocking the core pekko/pekko-streams library whenever a log is made. 19 | 20 | @@@ 21 | 22 | ## Logback adapter for pekko/pekko-streams 23 | 24 | By default, pekko/pekko-streams uses its own asynchronous logger however they provide a 25 | @extref:[logging adapter](pekko-docs:logging.html#slf4j) which has already been preconfigured for use in Guardian. 26 | 27 | ## CLI/Application 28 | 29 | Note that unlike the core libraries, the CLI application does provide a default `logback.xml`. For more details read 30 | @ref:[application logging](../application/logging.md). 31 | -------------------------------------------------------------------------------- /docs/src/main/paradox/index.md: -------------------------------------------------------------------------------- 1 | # Guardian for Apache Kafka Documentation 2 | 3 | Guardian for Apache Kafka is an open source utility for backing up [Apache Kafka](https://kafka.apache.org/) clusters. 4 | It is built using [Scala](https://www.scala-lang.org/) entirely 5 | with [Pekko-Streams](https://pekko.apache.org/docs/pekko/current/stream/index.html) 6 | to ensure that the tool runs reliably and as desired with large datasets in different scenarios. 7 | 8 | @@toc { depth=2 } 9 | 10 | @@@ index 11 | 12 | * [overview](overview.md) 13 | * [security](security.md) 14 | * [license-report](license-report.md) 15 | * [ci](ci.md) 16 | * [doc-generation](doc-generation.md) 17 | * [general-architecture](general-architecture/index.md) 18 | * [testing](testing/index.md) 19 | * [application](application/index.md) 20 | * [backup](backup/index.md) 21 | * [persistence](persistence/index.md) 22 | * [restore](restore/index.md) 23 | 24 | @@@ 25 | 26 | ## Trademarks 27 | 28 | Apache Kafka is either a registered trademark or trademark of the Apache Software Foundation in the United States and/or 29 | other countries. 30 | -------------------------------------------------------------------------------- /docs/src/main/paradox/overview.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | Guardian for Apache Kafka is an open source utility for backing up [Apache Kafka](https://kafka.apache.org/) clusters. 4 | It is built using [Scala](https://www.scala-lang.org/) entirely 5 | with [Pekko-Streams](https://pekko.apache.org/docs/pekko/current/stream/index.html) 6 | to ensure that the tool runs as desired with large datasets in different scenarios. 7 | 8 | ## Versions 9 | 10 | The core modules are compiled against: 11 | 12 | * Pekko Streams $pekko.version$+ (@extref:[Reference](pekko-docs:stream/index.html), [Github](https://github.com/apache/incubator-pekko)) 13 | * Pekko Streams Circe $pekko-stream-circe.version$+ ([Github](https://github.com/mdedetrich/pekko-streams-circe)) 14 | * PureConfig $pure-config.version$+ ([Reference](https://pureconfig.github.io/docs/), [Github](https://github.com/pureconfig/pureconfig)) 15 | * ScalaLogging $scala-logging.version$+ ([Github](https://github.com/lightbend/scala-logging)) 16 | 17 | The cli modules are compiled against: 18 | 19 | * Decline $decline.version$+ ([Reference](https://ben.kirw.in/decline/), [Github](https://github.com/bkirwi/decline)) 20 | -------------------------------------------------------------------------------- /docs/src/main/paradox/persistence/design.md: -------------------------------------------------------------------------------- 1 | # Design 2 | 3 | Storage mechanisms are implemented via the @apidoc[BackupClientInterface] and @apidoc[RestoreClientInterface]. To add 4 | custom storage mechanisms you need to implement these methods. These interfaces are designed to be as simple as possible 5 | while being completely abstract to allow for any theoretical storage mechanism. 6 | 7 | ## BackupClientInterface 8 | 9 | The @apidoc[BackupClientInterface] implements the entire backup flow including the resuming from a previously terminated 10 | backup. Of note is the @apidoc[BackupClientInterface.State](BackupClientInterface) which is the data structure that is 11 | returned when any previously existing backup for that key exists. This is provided to 12 | @apidoc[BackupClientInterface.backupToStorageSink](BackupClientInterface) indicating whether the backup being performed 13 | is a new backup or resuming from a previous one with the retrieval of the current state being defined by 14 | @apidoc[BackupClientInterface.getCurrentUploadState](BackupClientInterface). 15 | 16 | Note that when implementing @apidoc[BackupClientInterface] you do not need to handle the corner cases regarding the 17 | contents of the byte string when resuming/suspending/terminating, this is automatically handled for you. Essentially you 18 | just need to handle how to store/push `ByteString` into the storage of your choice. 19 | 20 | ## RestoreClientInterface 21 | 22 | The @apidoc[RestoreClientInterface] implements restoration from an existing backup. Implementing this is quite simple, 23 | you need to define @apidoc[RestoreClientInterface.retrieveBackupKeys](RestoreClientInterface) which returns all valid 24 | keys to restore (i.e. don't include currently in progress backup keys) and 25 | @apidoc[RestoreClientInterface.downloadFlow](RestoreClientInterface) which is a pekko-stream `Flow` that takes 26 | a `String` which is the key and outputs the content of that key. 27 | -------------------------------------------------------------------------------- /docs/src/main/paradox/persistence/index.md: -------------------------------------------------------------------------------- 1 | # Persistence Modules 2 | 3 | Guardian for Apache Kafka has a modular architecture that provides support for different persistence backups. 4 | 5 | @@toc { depth=2 } 6 | 7 | @@@ index 8 | 9 | * [design](design.md) 10 | * [S3](s3/index.md) 11 | 12 | @@@ -------------------------------------------------------------------------------- /docs/src/main/paradox/persistence/s3/configuration.md: -------------------------------------------------------------------------------- 1 | # S3 2 | 3 | ## Reference 4 | 5 | @@snip (/core-s3/src/main/resources/reference.conf) 6 | 7 | Scala API doc @apidoc[kafka.s3.configs.S3] 8 | 9 | ## Explanation 10 | 11 | * `s3-headers`: See @extref:[documentation](pekko-connectors:org/apache/pekko/stream/connectors/s3/headers/index.html) 12 | * `pekko.connectors.s3`: See @extref:[documentation](pekko-connectors-docs:s3.html#configuration) 13 | * `s3-config`: Core S3 configuration 14 | * `data-bucket`: The main S3 bucket where data is backed up and where to restore data from 15 | * `data-bucket-prefix`: S3 prefix configuration to be used when searching for the bucket 16 | * `error-restart-settings`: Specific retry settings when recovering from known errors in S3. See @extref:[apidoc](pekko:org/apache/pekko/stream/RestartSettings.html) 17 | -------------------------------------------------------------------------------- /docs/src/main/paradox/persistence/s3/index.md: -------------------------------------------------------------------------------- 1 | # S3 2 | 3 | The S3 persistence module allows you to store kafka backups on [AWS S3 Cloud Storage](https://aws.amazon.com/s3/). 4 | 5 | @@project-info { projectId="coreS3" } 6 | @@project-info { projectId="backupS3" } 7 | @@project-info { projectId="restoreS3" } 8 | 9 | @@toc { depth=2 } 10 | 11 | @@@ index 12 | 13 | * [configuration](configuration.md) 14 | 15 | @@@ 16 | -------------------------------------------------------------------------------- /docs/src/main/paradox/restore/configuration.md: -------------------------------------------------------------------------------- 1 | # Configuration 2 | 3 | ## Reference 4 | 5 | @@snip (/core-restore/src/main/resources/reference.conf) 6 | 7 | Scala API doc @apidoc[kafka.restore.configs.Restore] 8 | 9 | ## Explanation 10 | 11 | * `pekko.kafka.producer`: See @extref:[documentation](pekko-connectors-kafka-docs:producer.html#settings) 12 | * `pekko.kafka.producer.kafka-clients`: See @extref:[documentation](kafka-docs:documentation.html#producerconfigs) 13 | * `restore`: 14 | * `from-when`: An `ISO-8601` time that specifies from when topics need to be restored. Note that the time used is 15 | based on the original Kafka timestamp and **NOT** the current time. 16 | * `override-topics`: A mapping of currently backed up topics to a new topic in the destination Kafka cluster 17 | -------------------------------------------------------------------------------- /docs/src/main/paradox/restore/index.md: -------------------------------------------------------------------------------- 1 | # Restore 2 | 3 | The restore module is responsible for streaming data from a backup storage location into a fresh new cluster in the 4 | circumstance of a disaster recovery. The restore is able to work in any format of backed up files created by Guardian's 5 | restore. 6 | 7 | @@project-info { projectId="coreRestore" } 8 | 9 | @@toc { depth=2 } 10 | 11 | @@@ index 12 | 13 | * [configuration](configuration.md) 14 | 15 | @@@ 16 | -------------------------------------------------------------------------------- /docs/src/main/paradox/security.md: -------------------------------------------------------------------------------- 1 | # Security 2 | 3 | ## OWASP Report 4 | 5 | Guardian uses [sbt-dependency-check](https://github.com/albuch/sbt-dependency-check) to generate 6 | a [dependency-check-report][dependency-check-report-link] which checks direct and transitive dependencies for 7 | vulnerabilities against [NVD](https://nvd.nist.gov/) in the form of a HTML file that can be viewed in a standard 8 | browser. 9 | 10 | ### Generating a report 11 | 12 | You can use the sbt shell to generate a report at any time using 13 | 14 | ``` 15 | dependencyCheckAggregate 16 | ``` 17 | 18 | This will overwrite the @github[current report file](/dependency-check/dependency-check-report.html) 19 | 20 | ### Suppressing false positives 21 | 22 | Sometimes it is possible that a false positive get generated in the report. To add a false positive, first you need to 23 | open the @github[report file](/dependency-check/dependency-check-report.html) in a supported browser. In the list of found vulnerabilities there 24 | should be a suppress button which when clicked displays a popup containing an `XML` suppression entry. You then add 25 | that `` tag entry to the 26 | existing [suppression-file](https://github.com/aiven/guardian-for-apache-kafka/edit/main/dependency-check/suppression.xml) 27 | . Finally, regenerate the report again using sbt's `dependencyCheckAggregate` 28 | -------------------------------------------------------------------------------- /docs/src/main/paradox/testing/index.md: -------------------------------------------------------------------------------- 1 | # Testing 2 | 3 | As much as possible, Guardian for Apache Kafka aims to provide as little friction as possible to run tests (ideally you 4 | should be able to run tests directly and only in SBT). As an example this means avoiding handwritten shell scripts to 5 | set up environments since this typically doesn't play well with IDE integrations such 6 | as [Intellij IDEA](https://www.jetbrains.com/idea/) or [Metals](https://scalameta.org/metals/) integrated SBT test. 7 | runner. 8 | 9 | ## ScalaTest 10 | 11 | Guardian for Apache Kafka uses [scalatest](https://www.scalatest.org/) as its testing framework. The primary reasons for 12 | using this testing framework are 13 | 14 | * It's the most supported testing framework in Scala, so much so that its considered a critical dependency whenever a 15 | new Scala release is made 16 | * It provides very handy utilities for testing asynchronous code, for example a 17 | @extref:[PatienceConfig](scalatest:concurrent/AbstractPatienceConfiguration$PatienceConfig.html) 18 | that provides efficient polling of Scala futures with configurable scalable timeouts and intervals. 19 | * Pekko provides @extref:[Testkit](pekko-docs:testing.html#asynchronous-testing-testkit) with direct integration into 20 | ScalaTest for easy testing of pekko-streams. 21 | 22 | ### Property based tests 23 | 24 | Guardian for Apache Kafka emphasises using property based testing over unit based tests. This is mainly due 25 | to the fact that property based tests often reveal more problems due to covering more cases compared to unit 26 | based tests. Here are more [details](https://www.scalatest.org/user_guide/generator_driven_property_checks) 27 | on how property based testing works with Scala. 28 | 29 | Like most random data generation, ScalaTest/ScalaCheck relies on an initial seed to deterministically generate 30 | the data. When a test fails the seed for the failing test is automatically shown (search for `Init Seed: `). 31 | If you want to specify the seed to regenerate the exact same data that caused the test to fail, you need to 32 | specify it as a test argument in `sbt` 33 | 34 | ```sbt 35 | Test / testOptions += Tests.Argument(TestFrameworks.ScalaTest, "-S", "7832168009826873070") 36 | ``` 37 | 38 | where `7832168009826873070` happens to be the seed 39 | 40 | This argument can be put into any of the projects within the @github[build](/build.sbt). For example if you 41 | want to only specify the speed in the `core` project you can place it like so 42 | 43 | ```sbt 44 | lazy val core = project 45 | .in(file("core")) 46 | .settings( 47 | librarySettings, 48 | name := s"$baseName-core", 49 | Test / testOptions += Tests.Argument(TestFrameworks.ScalaTest, "-S", "7832168009826873070"), 50 | ``` 51 | 52 | Whereas if you want it to apply globally you can just place it in the `guardian` project. 53 | 54 | ## Running test/s until failure 55 | 56 | When diagnosing flaky tests it's very useful to be able to run a test until it fails which sbt allows you to 57 | do with [commands](https://www.scala-sbt.org/1.x/docs/Commands.html). Doing this using a sbt command 58 | is far quicker than other options such a shell script since you don't have to deal with startup time cost for 59 | every test run. 60 | 61 | This is what the base command looks like 62 | 63 | ```sbt 64 | commands += Command.command("testUntilFailed") { state => 65 | "test" :: "testUntilFailed" :: state 66 | } 67 | ``` 68 | 69 | The command will recursively call a specific task (in this case `test`) until it fails. For it to work with 70 | Guardin for Apache Kafka's @github[build](/build.sbt), you need to place it as a setting 71 | within the `guardian` project. 72 | 73 | Note that this works with any command, not just `test`. For example if you want to only run a single test 74 | suite until failure you can do 75 | 76 | ```sbt 77 | commands += Command.command("testUntilFailed") { state => 78 | "backupS3/testOnly io.aiven.guardian.kafka.backup.s3.MockedKafkaClientBackupClientSpec" :: "testUntilFailed" :: state 79 | } 80 | ``` 81 | 82 | Once specified in the @github[build](/build.sbt) file you can then run `testUntilFailed` within the sbt shell. 83 | 84 | ## TestContainers 85 | 86 | [testcontainers](https://www.testcontainers.org/) along with the Scala 87 | wrapper [testcontainers-scala](https://github.com/testcontainers/testcontainers-scala) is used to automate the spinning 88 | up of [docker](https://www.docker.com/) whenever the relevant test is run. As long as you have docker installed on your 89 | system you souldn't have to worry about anhything. 90 | 91 | @@toc { depth=2 } 92 | 93 | @@@ index 94 | 95 | * [s3](s3.md) 96 | 97 | @@@ 98 | -------------------------------------------------------------------------------- /docs/src/main/paradox/testing/s3.md: -------------------------------------------------------------------------------- 1 | # S3 - Testing 2 | 3 | For tests that run against the [AWS S3 service](https://aws.amazon.com/s3/) you need to provide the relevant credentials 4 | to S3. The most typical way to provide these credentials is with the usage of environment variables, e.g. 5 | 6 | ```shell 7 | export PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_PROVIDER=static 8 | export PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_ACCESS_KEY_ID="my key" 9 | export PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_SECRET_ACCESS_KEY="my secret" 10 | export PEKKO_CONNECTORS_S3_REGION_PROVIDER=static 11 | export PEKKO_CONNECTORS_S3_REGION_DEFAULT_REGION=eu-central-1 12 | ``` 13 | 14 | ## Utilities 15 | 16 | Guardian provides a utility to help deal with running S3 related tests. Due to the possibility of this tool 17 | making unintentional consequences to your S3 account, it needs to be manually run in sbt. To run the tool 18 | without any parameters do this 19 | 20 | ```sh 21 | sbt "coreS3/test:runMain io.aiven.guardian.kafka.s3.Main" 22 | ``` 23 | 24 | Current commands 25 | 26 | * `cleanup-buckets`: Helps in cleaning up S3 buckets that have been inadvertently left over by tests. 27 | 28 | ## Tagging S3 Tests 29 | 30 | Due to a current limitation where there is no way to expose Github secrets to PR's made from external forks, tests which 31 | run against S3 need to be @extref:[Tagged](scalatest:Tag.html) 32 | using @github[RealS3Available](/core-s3/src/test/scala/io/aiven/guardian/kafka/s3/S3Spec.scala#L45-L48). 33 | -------------------------------------------------------------------------------- /project/LicenseReport.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | import sbtlicensereport.SbtLicenseReport 3 | import sbtlicensereport.SbtLicenseReport.autoImportImpl._ 4 | import sbtlicensereport.license.{DepModuleInfo, MarkDown} 5 | 6 | object LicenseReport extends AutoPlugin { 7 | 8 | override lazy val projectSettings = Seq( 9 | licenseReportTypes := Seq(MarkDown), 10 | licenseReportMakeHeader := (language => language.header1("License Report")), 11 | licenseConfigurations := Set("compile", "test", "provided"), 12 | licenseDepExclusions := { 13 | case dep: DepModuleInfo if dep.organization == "io.aiven" && dep.name.contains("guardian") => 14 | true // Inter guardian project dependencies are pointless 15 | case DepModuleInfo(_, "scala-library", _) => true // Scala library is part of Scala language 16 | case DepModuleInfo(_, "scala-reflect", _) => true // Scala reflect is part of Scala language 17 | }, 18 | licenseReportColumns := Seq(Column.Category, Column.License, Column.Dependency, Column.Configuration) 19 | ) 20 | 21 | override def requires = plugins.JvmPlugin && SbtLicenseReport 22 | 23 | override def trigger = allRequirements 24 | 25 | } 26 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.10.0 2 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2") 2 | addSbtPlugin("com.lightbend.paradox" % "sbt-paradox" % "0.10.7") 3 | addSbtPlugin("com.lightbend.paradox" % "sbt-paradox-apidoc" % "1.1.0") 4 | addSbtPlugin("com.lightbend.paradox" % "sbt-paradox-project-info" % "3.0.1") 5 | addSbtPlugin("com.github.sbt" % "sbt-unidoc" % "0.5.0") 6 | addSbtPlugin("com.github.sbt" % "sbt-ghpages" % "0.8.0") 7 | addSbtPlugin("com.thoughtworks.sbt-api-mappings" % "sbt-api-mappings" % "3.0.2") 8 | addSbtPlugin("com.github.sbt" % "sbt-site-paradox" % "1.7.0") 9 | addSbtPlugin("com.github.sbt" % "sbt-native-packager" % "1.10.0") 10 | addSbtPlugin("com.github.sbt" % "sbt-github-actions" % "0.23.0") 11 | addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.2.1") 12 | addSbtPlugin("com.github.sbt" % "sbt-release" % "1.4.0") 13 | addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.12.1") 14 | addSbtPlugin("org.scoverage" % "sbt-scoverage" % "2.0.11") 15 | addSbtPlugin("org.scoverage" % "sbt-coveralls" % "1.3.11") 16 | addSbtPlugin("net.vonbuchholtz" % "sbt-dependency-check" % "5.1.0") 17 | addSbtPlugin("com.github.sbt" % "sbt-license-report" % "1.5.0") 18 | 19 | // This is here to bump dependencies for sbt-paradox/sbt-site, see 20 | // https://github.com/sirthias/parboiled/issues/175, https://github.com/sirthias/parboiled/issues/128 and 21 | // https://github.com/sirthias/parboiled/pull/195 22 | libraryDependencies ++= Seq( 23 | "org.parboiled" %% "parboiled-scala" % "1.4.1", 24 | "org.parboiled" % "parboiled-java" % "1.4.1" 25 | ) 26 | 27 | // See https://github.com/akka/akka-http/pull/3995 and https://github.com/akka/akka-http/pull/3995#issuecomment-1026978593 28 | libraryDependencySchemes += "org.scala-lang.modules" %% "scala-xml" % "always" 29 | -------------------------------------------------------------------------------- /project/project-info.conf: -------------------------------------------------------------------------------- 1 | project-info { 2 | version: "current" 3 | labels: "https://github.com/aiven/guardian-for-apache-kafka/labels/p%3A" 4 | scaladoc: "https://aiven.github.io/guardian-for-apache-kafka/api/"${project-info.version}"/io/aiven/guardian/" 5 | shared-info { 6 | jdk-versions: ["Adopt OpenJDK 11", "Adopt OpenJDK 17"] 7 | issues: { 8 | url: "https://github.com/aiven/guardian-for-apache-kafka/issues" 9 | text: "Github issues" 10 | } 11 | release-notes: { 12 | url: "https://github.com/aiven/guardian-for-apache-kafka/releases" 13 | text: "GitHub releases" 14 | } 15 | } 16 | backupS3: ${project-info.shared-info} { 17 | title: "Backup S3" 18 | jpms-name: "io.aiven.guardian.kafka.backup.s3" 19 | api-docs: [ 20 | { 21 | url: ${project-info.scaladoc}"kafka/backup/s3/index.html" 22 | text: "API (Scaladoc)" 23 | } 24 | ] 25 | } 26 | cliBackup: ${project-info.shared-info} { 27 | title: "CLI Backup" 28 | jpms-name: "io.aiven.guardian.kafka.backup" 29 | } 30 | cliRestore: ${project-info.shared-info} { 31 | title: "CLI Restore" 32 | jpms-name: "io.aiven.guardian.kafka.restore" 33 | } 34 | core: ${project-info.shared-info} { 35 | title: "Core" 36 | jpms-name: "io.aiven.guardian.kafka" 37 | api-docs: [ 38 | { 39 | url: ${project-info.scaladoc}"kafka/index.html" 40 | text: "API (Scaladoc)" 41 | } 42 | ] 43 | } 44 | coreBackup: ${project-info.shared-info} { 45 | title: "Core Backup" 46 | jpms-name: "io.aiven.guardian.kafka.backup" 47 | api-docs: [ 48 | { 49 | url: ${project-info.scaladoc}"kafka/backup/index.html" 50 | text: "API (Scaladoc)" 51 | } 52 | ] 53 | } 54 | coreCli: ${project-info.shared-info} { 55 | title: "Core CLI" 56 | jpms-name: "io.aiven.guardian.cli" 57 | } 58 | coreRestore: ${project-info.shared-info} { 59 | title: "Core Restore" 60 | jpms-name: "io.aiven.guardian.kafka.restore" 61 | api-docs: [ 62 | { 63 | url: ${project-info.scaladoc}"kafka/restore/index.html" 64 | text: "API (Scaladoc)" 65 | } 66 | ] 67 | } 68 | coreS3: ${project-info.shared-info} { 69 | title: "Core S3" 70 | jpms-name: "io.aiven.guardian.kafka.restore" 71 | api-docs: [ 72 | { 73 | url: ${project-info.scaladoc}"kafka/s3/index.html" 74 | text: "API (Scaladoc)" 75 | } 76 | ] 77 | } 78 | restoreS3: ${project-info.shared-info} { 79 | title: "Restore S3" 80 | jpms-name: "io.aiven.guardian.kafka.restore.s3" 81 | api-docs: [ 82 | { 83 | url: ${project-info.scaladoc}"kafka/restore/s3/index.html" 84 | text: "API (Scaladoc)" 85 | } 86 | ] 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /restore-gcs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/guardian-for-apache-kafka/9fadf3388140820b161cf28744d1587b91bf0776/restore-gcs/.gitkeep -------------------------------------------------------------------------------- /restore-s3/src/main/scala/io/aiven/guardian/kafka/restore/s3/RestoreClient.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.restore.s3 2 | 3 | import io.aiven.guardian.kafka.configs.KafkaCluster 4 | import io.aiven.guardian.kafka.restore.KafkaProducerInterface 5 | import io.aiven.guardian.kafka.restore.RestoreClientInterface 6 | import io.aiven.guardian.kafka.restore.configs.Restore 7 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config} 8 | import org.apache.pekko 9 | 10 | import scala.concurrent.ExecutionContext 11 | import scala.concurrent.Future 12 | 13 | import pekko.NotUsed 14 | import pekko.actor.ActorSystem 15 | import pekko.stream.connectors.s3.S3Attributes 16 | import pekko.stream.connectors.s3.S3Headers 17 | import pekko.stream.connectors.s3.S3Settings 18 | import pekko.stream.connectors.s3.scaladsl.S3 19 | import pekko.stream.scaladsl.Flow 20 | import pekko.stream.scaladsl.Sink 21 | import pekko.util.ByteString 22 | 23 | class RestoreClient[T <: KafkaProducerInterface](maybeS3Settings: Option[S3Settings])(implicit 24 | override val kafkaProducerInterface: T, 25 | override val restoreConfig: Restore, 26 | override val kafkaClusterConfig: KafkaCluster, 27 | override val system: ActorSystem, 28 | s3Config: S3Config, 29 | s3Headers: S3Headers 30 | ) extends RestoreClientInterface[T] { 31 | 32 | override def retrieveBackupKeys: Future[List[String]] = { 33 | implicit val ec: ExecutionContext = system.dispatcher 34 | 35 | val base = S3.listBucket(s3Config.dataBucket, s3Config.dataBucketPrefix, s3Headers) 36 | for { 37 | bucketContents <- maybeS3Settings 38 | .fold(base)(s3Settings => base.withAttributes(S3Attributes.settings(s3Settings))) 39 | .runWith(Sink.collection) 40 | } yield bucketContents.map(_.key).toList 41 | } 42 | 43 | override def downloadFlow: Flow[String, ByteString, NotUsed] = 44 | Flow[String] 45 | .flatMapConcat { key => 46 | val base = S3.getObject(s3Config.dataBucket, key, None, None, s3Headers) 47 | maybeS3Settings 48 | .fold(base)(s3Settings => base.withAttributes(S3Attributes.settings(s3Settings))) 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /restore-s3/src/test/scala/io/aiven/guardian/kafka/restore/s3/RealS3GzipCompressionRestoreClientSpec.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.restore.s3 2 | 3 | import io.aiven.guardian.kafka.backup.configs.Compression 4 | import io.aiven.guardian.kafka.models.Gzip 5 | import io.aiven.guardian.pekko.AnyPropTestKit 6 | import org.apache.pekko.actor.ActorSystem 7 | 8 | class RealS3GzipCompressionRestoreClientSpec 9 | extends AnyPropTestKit(ActorSystem("RealS3GzipCompressionRestoreClientSpec")) 10 | with RealS3RestoreClientTest { 11 | override val compression: Option[Compression] = Some(Compression(Gzip, None)) 12 | } 13 | -------------------------------------------------------------------------------- /restore-s3/src/test/scala/io/aiven/guardian/kafka/restore/s3/RealS3RestoreClientSpec.scala: -------------------------------------------------------------------------------- 1 | package io.aiven.guardian.kafka.restore.s3 2 | 3 | import io.aiven.guardian.kafka.backup.configs.Compression 4 | import io.aiven.guardian.pekko.AnyPropTestKit 5 | import org.apache.pekko.actor.ActorSystem 6 | 7 | class RealS3RestoreClientSpec 8 | extends AnyPropTestKit(ActorSystem("RealS3RestoreClientSpec")) 9 | with RealS3RestoreClientTest { 10 | override val compression: Option[Compression] = None 11 | } 12 | --------------------------------------------------------------------------------