├── .github
    ├── ISSUE_TEMPLATE
    │   ├── 01_question.md
    │   ├── 02_bug.md
    │   └── 03_feature.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   ├── ci.yml
    │   ├── clean.yml
    │   ├── dependency-graph.yml
    │   ├── format.yml
    │   └── scala-steward.yml
├── .gitignore
├── .scala-steward.conf
├── .scalafix.conf
├── .scalafmt.conf
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── SECURITY.md
├── backup-gcs
    └── src
    │   └── main
    │       └── scala
    │           └── io
    │               └── aiven
    │                   └── guardian
    │                       └── kafka
    │                           └── backup
    │                               └── gcs
    │                                   └── BackupClient.scala
├── backup-s3
    └── src
    │   ├── main
    │       └── scala
    │       │   └── io
    │       │       └── aiven
    │       │           └── guardian
    │       │               └── kafka
    │       │                   └── backup
    │       │                       └── s3
    │       │                           └── BackupClient.scala
    │   └── test
    │       └── scala
    │           └── io
    │               └── aiven
    │                   └── guardian
    │                       └── kafka
    │                           └── backup
    │                               └── s3
    │                                   ├── BackupClientChunkState.scala
    │                                   ├── BackupClientSpec.scala
    │                                   ├── KafkaConsumerWithKillSwitch.scala
    │                                   ├── MinioBackupClientSpec.scala
    │                                   ├── MockedKafkaClientBackupConsumerSpec.scala
    │                                   ├── MockedS3BackupClientInterface.scala
    │                                   ├── RealS3BackupClientSpec.scala
    │                                   ├── RealS3BackupClientTest.scala
    │                                   └── RealS3GzipCompressionBackupClientSpec.scala
├── build.sbt
├── cli-backup
    └── src
    │   ├── main
    │       └── scala
    │       │   └── io
    │       │       └── aiven
    │       │           └── guardian
    │       │               └── kafka
    │       │                   └── backup
    │       │                       ├── App.scala
    │       │                       ├── BackupApp.scala
    │       │                       ├── Main.scala
    │       │                       └── S3App.scala
    │   └── test
    │       ├── resources
    │           └── logback.xml
    │       └── scala
    │           └── io
    │               └── aiven
    │                   └── guardian
    │                       └── kafka
    │                           └── backup
    │                               └── CliSpec.scala
├── cli-compaction
    └── .gitkeep
├── cli-restore
    └── src
    │   ├── main
    │       └── scala
    │       │   └── io
    │       │       └── aiven
    │       │           └── guardian
    │       │               └── kafka
    │       │                   └── restore
    │       │                       ├── App.scala
    │       │                       ├── Main.scala
    │       │                       ├── RestoreApp.scala
    │       │                       └── S3App.scala
    │   └── test
    │       └── scala
    │           └── io
    │               └── aiven
    │                   └── guardian
    │                       └── kafka
    │                           └── restore
    │                               └── CliSpec.scala
├── compaction-gcs
    └── src
    │   └── main
    │       ├── resources
    │           └── reference.conf
    │       └── scala
    │           └── io
    │               └── aiven
    │                   └── guardian
    │                       └── kafka
    │                           └── compaction
    │                               └── gcs
    │                                   ├── Config.scala
    │                                   ├── StorageClient.scala
    │                                   └── models
    │                                       └── StorageConfig.scala
├── compaction-s3
    └── src
    │   └── main
    │       ├── resources
    │           └── reference.conf
    │       └── scala
    │           └── io
    │               └── aiven
    │                   └── guardian
    │                       └── kafka
    │                           └── compaction
    │                               └── s3
    │                                   ├── Config.scala
    │                                   ├── StorageClient.scala
    │                                   └── models
    │                                       └── StorageConfig.scala
├── core-backup
    └── src
    │   ├── main
    │       ├── resources
    │       │   └── reference.conf
    │       └── scala
    │       │   └── io
    │       │       └── aiven
    │       │           └── guardian
    │       │               └── kafka
    │       │                   └── backup
    │       │                       ├── BackupClientInterface.scala
    │       │                       ├── Config.scala
    │       │                       ├── KafkaConsumer.scala
    │       │                       ├── KafkaConsumerInterface.scala
    │       │                       └── configs
    │       │                           ├── Backup.scala
    │       │                           ├── Compression.scala
    │       │                           └── TimeConfiguration.scala
    │   └── test
    │       └── scala
    │           └── io
    │               └── aiven
    │                   └── guardian
    │                       └── kafka
    │                           └── backup
    │                               ├── BackupClientControlWrapper.scala
    │                               ├── BackupClientInterfaceSpec.scala
    │                               ├── BackupClientInterfaceTest.scala
    │                               ├── CompressionSpec.scala
    │                               ├── ConfigSpec.scala
    │                               ├── ConfigurationChangeRestartSpec.scala
    │                               ├── GzipCompressionBackupClientInterfaceSpec.scala
    │                               ├── MockedBackupClientInterface.scala
    │                               └── MockedKafkaConsumerInterface.scala
├── core-cli
    └── src
    │   └── main
    │       ├── resources
    │           ├── application.conf
    │           └── logback.xml
    │       └── scala
    │           └── io
    │               └── aiven
    │                   └── guardian
    │                       └── cli
    │                           ├── MainUtils.scala
    │                           ├── PekkoSettings.scala
    │                           ├── arguments
    │                               ├── PropertiesOpt.scala
    │                               └── StorageOpt.scala
    │                           └── options
    │                               └── Options.scala
├── core-compaction
    └── src
    │   └── main
    │       └── scala
    │           └── io
    │               └── aiven
    │                   └── guardian
    │                       └── kafka
    │                           └── compaction
    │                               ├── DatabaseInterface.scala
    │                               ├── PostgresJDBCDatabase.scala
    │                               └── StorageInterface.scala
├── core-gcs
    └── src
    │   └── main
    │       ├── resources
    │           └── reference.conf
    │       └── scala
    │           └── io
    │               └── aiven
    │                   └── guardian
    │                       └── kafka
    │                           └── gcs
    │                               ├── Config.scala
    │                               ├── configs
    │                                   └── GCS.scala
    │                               └── errors
    │                                   └── GCSErrors.scala
├── core-restore
    └── src
    │   ├── main
    │       ├── resources
    │       │   └── reference.conf
    │       └── scala
    │       │   └── io
    │       │       └── aiven
    │       │           └── guardian
    │       │               └── kafka
    │       │                   └── restore
    │       │                       ├── Config.scala
    │       │                       ├── KafkaProducer.scala
    │       │                       ├── KafkaProducerInterface.scala
    │       │                       ├── RestoreClientInterface.scala
    │       │                       └── configs
    │       │                           └── Restore.scala
    │   └── test
    │       └── scala
    │           └── io
    │               └── aiven
    │                   └── guardian
    │                       └── kafka
    │                           └── restore
    │                               ├── ConfigSpec.scala
    │                               ├── GzipCompressionRestoreClientInterfaceSpec.scala
    │                               ├── MockedKafkaProducerInterface.scala
    │                               ├── MockedRestoreClientInterface.scala
    │                               ├── RestoreClientInterfaceSpec.scala
    │                               └── RestoreClientInterfaceTest.scala
├── core-s3
    └── src
    │   ├── main
    │       ├── resources
    │       │   └── reference.conf
    │       └── scala
    │       │   └── io
    │       │       └── aiven
    │       │           └── guardian
    │       │               └── kafka
    │       │                   └── s3
    │       │                       ├── Config.scala
    │       │                       ├── configs
    │       │                           └── S3.scala
    │       │                       └── errors
    │       │                           └── S3Errors.scala
    │   └── test
    │       ├── resources
    │           └── logback.xml
    │       └── scala
    │           ├── io
    │               └── aiven
    │               │   └── guardian
    │               │       └── kafka
    │               │           └── s3
    │               │               ├── Generators.scala
    │               │               ├── Main.scala
    │               │               ├── MinioContainer.scala
    │               │               ├── MinioS3Test.scala
    │               │               ├── PureConfigS3HeadersSpec.scala
    │               │               ├── S3Spec.scala
    │               │               └── S3TestUtils.scala
    │           └── org
    │               └── apache
    │                   └── pekko
    │                       └── stream
    │                           └── connectors
    │                               └── s3
    │                                   └── GeneratorsSpec.scala
├── core
    ├── README.md
    └── src
    │   ├── main
    │       ├── resources
    │       │   └── reference.conf
    │       └── scala
    │       │   └── io
    │       │       └── aiven
    │       │           └── guardian
    │       │               └── kafka
    │       │                   ├── Config.scala
    │       │                   ├── Errors.scala
    │       │                   ├── ExtensionsMethods.scala
    │       │                   ├── PureConfigUtils.scala
    │       │                   ├── Utils.scala
    │       │                   ├── codecs
    │       │                       └── Circe.scala
    │       │                   ├── configs
    │       │                       └── KafkaCluster.scala
    │       │                   └── models
    │       │                       ├── BackupObjectMetadata.scala
    │       │                       ├── CompressionType.scala
    │       │                       └── ReducedConsumerRecord.scala
    │   └── test
    │       ├── resources
    │           ├── application.conf
    │           └── logback.xml
    │       └── scala
    │           └── io
    │               └── aiven
    │                   └── guardian
    │                       ├── kafka
    │                           ├── ConfigSpec.scala
    │                           ├── Generators.scala
    │                           ├── KafkaClusterTest.scala
    │                           └── TestUtils.scala
    │                       └── pekko
    │                           ├── AnyPropTestKit.scala
    │                           ├── PekkoHttpTestKit.scala
    │                           └── PekkoStreamTestKit.scala
├── dependency-check
    ├── dependency-check-report.html
    └── suppression.xml
├── docs
    └── src
    │   └── main
    │       └── paradox
    │           ├── application
    │               ├── design.md
    │               ├── index.md
    │               ├── logging.md
    │               └── packaging.md
    │           ├── backup
    │               ├── configuration.md
    │               ├── design.md
    │               └── index.md
    │           ├── ci.md
    │           ├── doc-generation.md
    │           ├── general-architecture
    │               ├── index.md
    │               └── logging.md
    │           ├── index.md
    │           ├── overview.md
    │           ├── persistence
    │               ├── design.md
    │               ├── index.md
    │               └── s3
    │               │   ├── configuration.md
    │               │   └── index.md
    │           ├── restore
    │               ├── configuration.md
    │               └── index.md
    │           ├── security.md
    │           └── testing
    │               ├── index.md
    │               └── s3.md
├── project
    ├── LicenseReport.scala
    ├── build.properties
    ├── plugins.sbt
    └── project-info.conf
├── restore-gcs
    └── .gitkeep
└── restore-s3
    └── src
        ├── main
            └── scala
            │   └── io
            │       └── aiven
            │           └── guardian
            │               └── kafka
            │                   └── restore
            │                       └── s3
            │                           └── RestoreClient.scala
        └── test
            └── scala
                └── io
                    └── aiven
                        └── guardian
                            └── kafka
                                └── restore
                                    └── s3
                                        ├── RealS3GzipCompressionRestoreClientSpec.scala
                                        ├── RealS3RestoreClientSpec.scala
                                        └── RealS3RestoreClientTest.scala


/.github/ISSUE_TEMPLATE/01_question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: ❓ Ask a question
 3 | about: Got stuck or missing something from the docs? Ask away!
 4 | ---
 5 | 
 6 | # What can we help you with?
 7 | 
 8 | <!-- Try to explain your question with as much detail as you can provide. -->
 9 | 
10 | # Where would you expect to find this information?
11 | 
12 | <!-- Feel free to point us where with links or even proposing new sections or pages in the documentation. -->
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/02_bug.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: 🐜 Report a bug
 3 | about: Spotted a problem? Let us know
 4 | ---
 5 | 
 6 | # What happened?
 7 | 
 8 | <!-- Try to be as precise as possible. If you can a small reproducer example would be great! -->
 9 | 
10 | # What did you expect to happen?
11 | 
12 | <!-- Please explain what would be the expected behavior for this particular case, ideally, with examples. -->
13 | 
14 | # What else do we need to know?
15 | 
16 | <!-- Include your platform, version of Guardian for Apache Kafka and Kafka itself, and any other information that seems relevant like which storage mechanism is it in use. -->
17 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/03_feature.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: 💡 Feature suggestion
 3 | about: What would make this even better?
 4 | ---
 5 | 
 6 | # What is currently missing?
 7 | 
 8 | <!-- Please, describe what is currently missing and why should it be present in the project. -->
 9 | 
10 | # How could this be improved?
11 | 
12 | <!-- If you already know how this could be approached, please provide some brief explanation about it. -->
13 | 
14 | # Is this a feature you would work on yourself?
15 | 
16 | * [ ] I plan to open a pull request for this feature
17 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!-- All contributors please complete these sections, including maintainers -->
 2 | # About this change - What it does
 3 | 
 4 | <!-- Provide a small sentence that summarizes the change. -->
 5 | 
 6 | <!-- Provide the issue number below if it exists. -->
 7 | Resolves: #xxxxx
 8 | 
 9 | # Why this way
10 | 
11 | <!-- Provide a small explanation on why this is the approach you took for solving this problem. -->
12 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | # This file was automatically generated by sbt-github-actions using the
  2 | # githubWorkflowGenerate task. You should add and commit this file to
  3 | # your git repository. It goes without saying that you shouldn't edit
  4 | # this file by hand! Instead, if you wish to make changes, you should
  5 | # change your sbt build configuration to revise the workflow description
  6 | # to meet your needs, then regenerate this file.
  7 | 
  8 | name: Continuous Integration
  9 | 
 10 | on:
 11 |   pull_request:
 12 |     branches: [main]
 13 |   push:
 14 |     branches: [main]
 15 | 
 16 | permissions:
 17 |   id-token: write
 18 | 
 19 | env:
 20 |   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 21 | 
 22 | jobs:
 23 |   build:
 24 |     name: Build and Test
 25 |     strategy:
 26 |       matrix:
 27 |         os: [ubuntu-latest]
 28 |         scala: [2.13.13]
 29 |         java: [temurin@11]
 30 |     runs-on: ${{ matrix.os }}
 31 |     steps:
 32 |       - name: Checkout current branch (full)
 33 |         uses: actions/checkout@v4
 34 |         with:
 35 |           fetch-depth: 0
 36 | 
 37 |       - name: Setup Java (temurin@11)
 38 |         if: matrix.java == 'temurin@11'
 39 |         uses: actions/setup-java@v4
 40 |         with:
 41 |           distribution: temurin
 42 |           java-version: 11
 43 |           cache: sbt
 44 | 
 45 |       - name: 'Linter: Scalafix checks'
 46 |         run: sbt '++ ${{ matrix.scala }}' 'scalafixAll --check'
 47 | 
 48 |       - name: Configure AWS credentials
 49 |         uses: aws-actions/configure-aws-credentials@v2
 50 |         with:
 51 |           role-to-assume: 'arn:aws:iam::310017459104:role/aiven-guardian-github-action'
 52 |           aws-region: us-west-2
 53 |           role-duration-seconds: 7200
 54 | 
 55 |       - name: Check that workflows are up to date
 56 |         run: sbt '++ ${{ matrix.scala }}' githubWorkflowCheck
 57 | 
 58 |       - name: Build project
 59 |         env:
 60 |           PEKKO_CONNECTORS_S3_REGION_PROVIDER: default
 61 |           PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_PROVIDER: default
 62 |         run: sbt '++ ${{ matrix.scala }}' clean coverage test
 63 | 
 64 |       - name: Compile docs
 65 |         run: sbt '++ ${{ matrix.scala }}' docs/makeSite
 66 | 
 67 |       - name: Upload coverage data to Coveralls
 68 |         env:
 69 |           COVERALLS_REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 70 |           COVERALLS_FLAG_NAME: Scala ${{ matrix.scala }}
 71 |         run: sbt '++ ${{ matrix.scala }}' coverageReport coverageAggregate coveralls
 72 | 
 73 |       - name: Compress target directories
 74 |         run: tar cf targets.tar target cli-compaction/target compaction-gcs/target backup-s3/target compaction-s3/target docs/target cli-backup/target core-restore/target restore-s3/target core-gcs/target core-compaction/target core-s3/target core-backup/target core-cli/target cli-restore/target core/target restore-gcs/target backup-gcs/target project/target
 75 | 
 76 |       - name: Upload target directories
 77 |         uses: actions/upload-artifact@v4
 78 |         with:
 79 |           name: target-${{ matrix.os }}-${{ matrix.scala }}-${{ matrix.java }}
 80 |           path: targets.tar
 81 | 
 82 |   publish:
 83 |     name: Publish Artifacts
 84 |     needs: [build]
 85 |     if: github.event_name != 'pull_request' && (github.ref == 'refs/heads/main')
 86 |     strategy:
 87 |       matrix:
 88 |         os: [ubuntu-latest]
 89 |         scala: [2.13.13]
 90 |         java: [temurin@11]
 91 |     runs-on: ${{ matrix.os }}
 92 |     steps:
 93 |       - name: Checkout current branch (full)
 94 |         uses: actions/checkout@v4
 95 |         with:
 96 |           fetch-depth: 0
 97 | 
 98 |       - name: Setup Java (temurin@11)
 99 |         if: matrix.java == 'temurin@11'
100 |         uses: actions/setup-java@v4
101 |         with:
102 |           distribution: temurin
103 |           java-version: 11
104 |           cache: sbt
105 | 
106 |       - name: Download target directories (2.13.13)
107 |         uses: actions/download-artifact@v4
108 |         with:
109 |           name: target-${{ matrix.os }}-2.13.13-${{ matrix.java }}
110 | 
111 |       - name: Inflate target directories (2.13.13)
112 |         run: |
113 |           tar xf targets.tar
114 |           rm targets.tar
115 | 
116 |       - run: |
117 |           git config --global user.name "$(git --no-pager log --format=format:'%an' -n 1)"
118 |           git config --global user.email "$(git --no-pager log --format=format:'%ae' -n 1)"
119 | 
120 |       - uses: webfactory/ssh-agent@v0.5.4
121 |         with:
122 |           ssh-private-key: ${{ secrets.GH_PAGES_SSH_PRIVATE_KEY }}
123 | 
124 |       - run: sbt docs/ghpagesPushSite
125 | 


--------------------------------------------------------------------------------
/.github/workflows/clean.yml:
--------------------------------------------------------------------------------
 1 | # This file was automatically generated by sbt-github-actions using the
 2 | # githubWorkflowGenerate task. You should add and commit this file to
 3 | # your git repository. It goes without saying that you shouldn't edit
 4 | # this file by hand! Instead, if you wish to make changes, you should
 5 | # change your sbt build configuration to revise the workflow description
 6 | # to meet your needs, then regenerate this file.
 7 | 
 8 | name: Clean
 9 | 
10 | on: push
11 | 
12 | jobs:
13 |   delete-artifacts:
14 |     name: Delete Artifacts
15 |     runs-on: ubuntu-latest
16 |     env:
17 |       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
18 |     steps:
19 |       - name: Delete artifacts
20 |         shell: bash {0}
21 |         run: |
22 |           # Customize those three lines with your repository and credentials:
23 |           REPO=${GITHUB_API_URL}/repos/${{ github.repository }}
24 | 
25 |           # A shortcut to call GitHub API.
26 |           ghapi() { curl --silent --location --user _:$GITHUB_TOKEN "$@"; }
27 | 
28 |           # A temporary file which receives HTTP response headers.
29 |           TMPFILE=$(mktemp)
30 | 
31 |           # An associative array, key: artifact name, value: number of artifacts of that name.
32 |           declare -A ARTCOUNT
33 | 
34 |           # Process all artifacts on this repository, loop on returned "pages".
35 |           URL=$REPO/actions/artifacts
36 |           while [[ -n "$URL" ]]; do
37 | 
38 |             # Get current page, get response headers in a temporary file.
39 |             JSON=$(ghapi --dump-header $TMPFILE "$URL")
40 | 
41 |             # Get URL of next page. Will be empty if we are at the last page.
42 |             URL=$(grep '^Link:' "$TMPFILE" | tr ',' '\n' | grep 'rel="next"' | head -1 | sed -e 's/.*<//' -e 's/>.*//')
43 |             rm -f $TMPFILE
44 | 
45 |             # Number of artifacts on this page:
46 |             COUNT=$(( $(jq <<<$JSON -r '.artifacts | length') ))
47 | 
48 |             # Loop on all artifacts on this page.
49 |             for ((i=0; $i < $COUNT; i++)); do
50 | 
51 |               # Get name of artifact and count instances of this name.
52 |               name=$(jq <<<$JSON -r ".artifacts[$i].name?")
53 |               ARTCOUNT[$name]=$(( $(( ${ARTCOUNT[$name]} )) + 1))
54 | 
55 |               id=$(jq <<<$JSON -r ".artifacts[$i].id?")
56 |               size=$(( $(jq <<<$JSON -r ".artifacts[$i].size_in_bytes?") ))
57 |               printf "Deleting '%s' #%d, %'d bytes\n" $name ${ARTCOUNT[$name]} $size
58 |               ghapi -X DELETE $REPO/actions/artifacts/$id
59 |             done
60 |           done
61 | 


--------------------------------------------------------------------------------
/.github/workflows/dependency-graph.yml:
--------------------------------------------------------------------------------
 1 | name: Update Dependency Graph
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main # default branch of the project
 6 | permissions:
 7 |   contents: write
 8 | jobs:
 9 |   dependency-graph:
10 |     name: Update Dependency Graph
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |       - uses: scalacenter/sbt-dependency-submission@v2
15 | 


--------------------------------------------------------------------------------
/.github/workflows/format.yml:
--------------------------------------------------------------------------------
 1 | name: Scalafmt
 2 | 
 3 | permissions: read-all
 4 | 
 5 | on:
 6 |   pull_request:
 7 |     branches: ['**']
 8 | 
 9 | jobs:
10 |   build:
11 |     name: Code is formatted
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Checkout current branch (full)
15 |         uses: actions/checkout@v4
16 |         with:
17 |           fetch-depth: 0
18 |           persist-credentials: false
19 | 
20 |       - name: Check project is formatted
21 |         uses: jrouly/scalafmt-native-action@v3
22 |         with:
23 |           arguments: '--list --mode diff-ref=origin/main'
24 | 


--------------------------------------------------------------------------------
/.github/workflows/scala-steward.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   workflow_dispatch:
 3 |   schedule:
 4 |     - cron: '0 0 * * 0'
 5 | 
 6 | name: Launch Scala Steward
 7 | 
 8 | jobs:
 9 |   scala-steward:
10 |     runs-on: ubuntu-22.04
11 |     name: Launch Scala Steward
12 |     steps:
13 |       - name: Launch Scala Steward
14 |         uses: scala-steward-org/scala-steward-action@v2
15 |         with:
16 |           github-app-id: ${{ secrets.APP_ID }}
17 |           github-app-installation-id: ${{ secrets.APP_INSTALLATION_ID }}
18 |           github-app-key: ${{ secrets.APP_PRIVATE_KEY }}
19 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | ### SBT template
 2 | # Simple Build Tool
 3 | # http://www.scala-sbt.org/release/docs/Getting-Started/Directories.html#configuring-version-control
 4 | 
 5 | dist/*
 6 | target/
 7 | lib_managed/
 8 | src_managed/
 9 | project/boot/
10 | project/plugins/project/
11 | .history
12 | .cache
13 | .lib/
14 | 
15 | ### Scala template
16 | *.class
17 | *.log
18 | 


--------------------------------------------------------------------------------
/.scala-steward.conf:
--------------------------------------------------------------------------------
1 | updatePullRequests = "always"
2 | 


--------------------------------------------------------------------------------
/.scalafix.conf:
--------------------------------------------------------------------------------
 1 | rules = [
 2 |   DisableSyntax, # Disables some constructs that make no semantic sense like `final val`
 3 |   ProcedureSyntax, # Procedure syntax in Scala is always discouraged
 4 |   ExplicitResultTypes, # To avoid public API breakages by mistake is good to always annotate the return types of public methods
 5 |   NoValInForComprehension, # `val` in for comprehensions are deprecated and shouldn't be used
 6 |   NoAutoTupling, # Avoids the automatic tupling in parameters
 7 |   RemoveUnused, # Removes unused elements
 8 |   LeakingImplicitClassVal, # This rule adds the private access modifier on the field of implicit value classes in order to prevent direct access.
 9 |   OrganizeImports # Organizes imports and removes unused ones
10 | ]
11 | 
12 | ExplicitResultTypes.memberKind = [Def, Val, Var]
13 | ExplicitResultTypes.memberVisibility = [Public, Protected]
14 | ExplicitResultTypes.skipSimpleDefinitions = ['Lit', 'Term.New', 'Term.Ref']
15 | ExplicitResultTypes.fatalWarnings = true
16 | DisableSyntax.noNulls = true
17 | DisableSyntax.noReturns = true
18 | DisableSyntax.noWhileLoops = true
19 | DisableSyntax.noIsInstanceOf = true
20 | DisableSyntax.noXml = true
21 | DisableSyntax.noFinalVal = true
22 | DisableSyntax.noFinalize = true
23 | DisableSyntax.noValPatterns = true
24 | RemoveUnused.imports = false # The plugin organize imports removes unused and clashes with this
25 | OrganizeImports.groups = [
26 |     "*"
27 |     "scala."
28 |     "re:javax?\\."
29 |   ] # Reasoning for this config is to keep the more business related imports at the top, while language imports are on the bottom
30 | 


--------------------------------------------------------------------------------
/.scalafmt.conf:
--------------------------------------------------------------------------------
 1 | version = 3.8.2
 2 | runner.dialect = scala213
 3 | preset = default
 4 | align.preset = more
 5 | maxColumn = 120
 6 | project.git = true
 7 | align.openParenDefnSite = true
 8 | align.openParenCallSite = true
 9 | align.arrowEnumeratorGenerator = true
10 | danglingParentheses.preset = true
11 | rewrite.rules = [RedundantBraces, RedundantParens]
12 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | opensource@aiven.io.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Welcome!
  2 | 
  3 | Guardian for Apache Kafka follows the [fork and pull](https://help.github.com/articles/using-pull-requests/#fork--pull)
  4 | development model. You can simply fork the repository, create and checkout a new branch and commit changes to that
  5 | branch and then create a pull request once you are done.
  6 | 
  7 | Feel free to submit a PR earlier rather than later, this is recommended as it can spur discussion to see if you are on
  8 | the right track. If you create a PR before its ready, we recommend using github's
  9 | [draft](https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/changing-the-stage-of-a-pull-request)
 10 | feature to clear indicate that a PR is still being worked on.
 11 | 
 12 | ## Setting up development environment
 13 | 
 14 | If you haven't already done so, before you get started you need to set up your machine for development. Guardian for
 15 | Apache Kafka is written in [Scala](https://www.scala-lang.org/) so a few steps are needed.
 16 | 
 17 | ## JDK
 18 | 
 19 | Guardian for Apache Kafka is developed on the latest stable branch of OpenJDK. For Windows and MacOS we recommend
 20 | using [AdoptOpenJDK][adopt-openjdk-link] to download the latest installer. For Linux its recommended installing
 21 | OpenJDK through your distribution (but you can also use [AdoptOpenJDK][adopt-openjdk-link] as a last resort)
 22 | 
 23 | ## Scala and sbt
 24 | Once you have installed JDK having [Scala](https://www.scala-lang.org) and [sbt][sbt-link] installed is recommended. 
 25 | Although some IDE's (such as Intellij) automatically handle Scala and sbt installation for you, it's still recommended
 26 | having a standalone version so you can compile/test/run the project without an IDE/Editor. The Scala installation also
 27 | comes with its own REPL which can aid in development.
 28 | 
 29 | We recommend following the official [Scala2 documentation](https://www.scala-lang.org/download/scala2.html) on how to
 30 | install Scala
 31 | 
 32 | ## Editors/IDE's
 33 | The following editors are recommended for development with Scala. Although It's possible to use other environments since
 34 | Scala is a strongly typed language using a well supported editor is beneficial.
 35 | 
 36 | ### Intellij IDEA
 37 | 
 38 | [Intellij IDEA](https://www.jetbrains.com/idea/) is one of the most used editors for Scala development. Upon installing
 39 | of IDEA you need to install the [scala plugin](https://plugins.jetbrains.com/plugin/1347-scala) so it can recognize sbt
 40 | projects. After installation of the plugin you can simply open the cloned `guardian-for-apache-kafka` and it should
 41 | setup everything for you.
 42 | 
 43 | ### Metals
 44 | 
 45 | [Metals][metals-link] is a Scala [LSP](https://en.wikipedia.org/wiki/Language_Server_Protocol) implementation that
 46 | supports various editors. The primary supported editor for [Metals][metals-link] is 
 47 | [Visual Studio Code](https://code.visualstudio.com/) along with relevant
 48 | [marketplace plugin](https://marketplace.visualstudio.com/items?itemName=scalameta.metals).
 49 | 
 50 | Note that other editors can also be used with metals, documentation can be found
 51 | [here](https://scalameta.org/metals/docs/). [Spacemacs](https://www.spacemacs.org/) an 
 52 | [Emacs](https://www.gnu.org/software/emacs/) distribution also supports [Metals][metals-link] via the 
 53 | [Scala layer](https://develop.spacemacs.org/layers/+lang/scala/README.html)
 54 | 
 55 | ## Formatting
 56 | 
 57 | The codebase is formatted with [scalafmt](https://scalameta.org/scalafmt/), as such the codebase needs to be formatted
 58 | before submitting a PR.
 59 | 
 60 | Various runners for Scalafmt exist, such as
 61 | * A [sbt scalafmt plugin](https://github.com/scalameta/sbt-scalafmt) that lets you run scalafmt directly within sbt using
 62 |   * `scalafmt` to format base scala sources
 63 |   * `test:scalafmt` to format test scala sources
 64 |   * `scalafmtSbt` to format the `build.sbt` file
 65 | * IntelliJ IDEA and VSCode will automatically detect projects with scalafmt and prompt you whether to use Scalafmt. See
 66 | the [scalafmt installation guide][scalafmt-installation-link] for more details
 67 | * There are native builds of Scalafmt that let you run a `scalafmt` as a CLI tool, see the CLI section in
 68 | [scalafmt installation guide][scalafmt-installation-link]
 69 | 
 70 | Note that a github action exists which will check that your code is formatted whenever you create a PR. For more details
 71 | read the [documentation](https://aiven.github.io/guardian-for-apache-kafka/ci.html#scalafmt)
 72 | 
 73 | ## sbt - Compiling, Building and Testing
 74 | 
 75 | We use [sbt][sbt-link] as the primary build tool for the project. When you run [sbt][sbt-link] by itself
 76 | it will start a REPL session where you can type in commands, i.e.
 77 | 
 78 | * `compile` will compile the entire project
 79 | * `test:compile` will only compile the test sources
 80 | * `test` will run the tests for the entire project
 81 | * `core/compile` will only compile the `core` project. See [build.sbt](build.sbt) to get a reference for how the projects
 82 | are named
 83 | * `publishLocal` will publish the project into the local `~/.m2` repository
 84 | * `clean` will clean all builds targets (including documentation) from the project. Note that sbt stores build
 85 | in sub-directories named `target`
 86 | * `reload` will reload sbt which is used when the [sbt][sbt-link] build definition is changed
 87 | 
 88 | ## Testing
 89 | 
 90 | As mentioned before testing is completely handled using sbt, there are no custom shell scripts that are required to set
 91 | up environments unless otherwise noted in
 92 | the [testing docs](https://aiven.github.io/guardian-for-apache-kafka/testing/index.html) (typically when tests run
 93 | against actual services such as S3)
 94 | 
 95 | ### Docker
 96 | 
 97 | For integration tests Guardian for Apache Kafka uses docker to spin up services. For MacOS the best way to install
 98 | docker is from the [official website](https://www.docker.com/products/docker-desktop/) whereas if you are running Linux
 99 | then consult your distribution/package manager/repository.
100 | 
101 | Since Guardian for Apache Kafka uses [testcontainers](https://www.testcontainers.org/) you don't need to worry about
102 | starting/stopping the docker instances manually, this is automatically handled when you run the relevant test/s.
103 | 
104 | ## sbt - documentation
105 | 
106 | Documentation is also built within SBT, i.e.
107 | 
108 | * `docs/makeSite` will compile documentation
109 | * `docs/previewSite` will compile documentation (if needed) and open the result in your system's default browser
110 | 
111 | For details about how the document generation works go 
112 | [here](https://aiven.github.io/guardian-for-apache-kafka/doc-generation.html)
113 | 
114 | [adopt-openjdk-link]: https://adoptopenjdk.net/
115 | [metals-link]: https://scalameta.org/metals/
116 | [scalafmt-installation-link]: https://scalameta.org/scalafmt/docs/installation.html
117 | [sbt-link]: https://www.scala-sbt.org/
118 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Build Status](https://github.com/aiven/guardian-for-apache-kafka/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/aiven/guardian-for-apache-kafka/actions/workflows/ci.yml?query=branch%3Amain)
 2 | [![Apache License](https://img.shields.io/badge/license-APACHE_2-green.svg)](https://www.apache.org/licenses/LICENSE-2.0)
 3 | [![Coverage](https://coveralls.io/repos/github/aiven/guardian-for-apache-kafka/badge.svg?branch=main)](https://coveralls.io/github/aiven/guardian-for-apache-kafka?branch=main)
 4 | 
 5 | # Guardian for Apache Kafka®
 6 | 
 7 | Guardian is a backup and restore tool for Apache Kafka clusters. It is designed to continuously stream kafka topics into
 8 | persistent/object storages such as S3 and also provides tools for restoring said backups.
 9 | 
10 | ## Documentation
11 | 
12 | * [Guardian reference](https://aiven-open.github.io/guardian-for-apache-kafka/) documentation.
13 | 
14 | ## Trademarks
15 | 
16 | Apache Kafka is either a registered trademark or trademark of the Apache Software Foundation in the United States and/or
17 | other countries.
18 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Supported Versions
 4 | 
 5 | We release patches for security vulnerabilities. Which versions are eligible
 6 | to receive such patches depend on the CVSS v3.0 Rating:
 7 | 
 8 | | CVSS v3.0 | Supported Versions                        |
 9 | | --------- | ----------------------------------------- |
10 | | 4.0-10.0  | Most recent release                       |
11 | 
12 | ## Reporting a Vulnerability
13 | 
14 | Please report (suspected) security vulnerabilities to our **[bug bounty
15 | program](https://bugcrowd.com/aiven-mbb-og)**. You will receive a response from
16 | us within 2 working days. If the issue is confirmed, we will release a patch as
17 | soon as possible depending on impact and complexity.
18 | 
19 | ## Qualifying Vulnerabilities
20 | 
21 | Any reproducible vulnerability that has a severe effect on the security or
22 | privacy of our users is likely to be in scope for the program.
23 | 
24 | We generally **aren't** interested in the following issues:
25 | * Social engineering (e.g. phishing, vishing, smishing) attacks
26 | * Brute force, DoS, text injection
27 | * Missing best practices such as HTTP security headers (CSP, X-XSS, etc.),
28 |   email (SPF/DKIM/DMARC records), SSL/TLS configuration.
29 | * Software version disclosure / Banner identification issues / Descriptive
30 |   error messages or headers (e.g. stack traces, application or server errors).
31 | * Clickjacking on pages with no sensitive actions
32 | * Theoretical vulnerabilities where you can't demonstrate a significant
33 |   security impact with a proof of concept.
34 | 


--------------------------------------------------------------------------------
/backup-gcs/src/main/scala/io/aiven/guardian/kafka/backup/gcs/BackupClient.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup.gcs
 2 | 
 3 | import io.aiven.guardian.kafka.backup.BackupClientInterface
 4 | import io.aiven.guardian.kafka.backup.KafkaConsumerInterface
 5 | import io.aiven.guardian.kafka.backup.configs.Backup
 6 | import io.aiven.guardian.kafka.gcs.configs.{GCS => GCSConfig}
 7 | import org.apache.pekko
 8 | 
 9 | import scala.concurrent.ExecutionContext
10 | import scala.concurrent.Future
11 | 
12 | import pekko.actor.ActorSystem
13 | import pekko.http.scaladsl.model.ContentTypes
14 | import pekko.stream.connectors.google.GoogleAttributes
15 | import pekko.stream.connectors.google.GoogleSettings
16 | import pekko.stream.connectors.googlecloud.storage.StorageObject
17 | import pekko.stream.connectors.googlecloud.storage.scaladsl.GCStorage
18 | import pekko.stream.scaladsl.Sink
19 | import pekko.util.ByteString
20 | 
21 | // TODO: GCS implementation currently does not work correctly because of inability of current GCS implementation in
22 | // Pekko Connectors to allow us to commit Kafka cursor whenever chunks are uploaded
23 | class BackupClient[T <: KafkaConsumerInterface](maybeGoogleSettings: Option[GoogleSettings])(implicit
24 |     override val kafkaClientInterface: T,
25 |     override val backupConfig: Backup,
26 |     override val system: ActorSystem,
27 |     gcsConfig: GCSConfig
28 | ) extends BackupClientInterface[T] {
29 | 
30 |   override def empty: () => Future[Option[StorageObject]] = () => Future.successful(None)
31 | 
32 |   override type BackupResult = Option[StorageObject]
33 | 
34 |   override type State = Nothing
35 | 
36 |   override def getCurrentUploadState(key: String): Future[UploadStateResult] =
37 |     Future.successful(UploadStateResult.empty)
38 | 
39 |   override def backupToStorageTerminateSink(
40 |       previousState: PreviousState
41 |   ): Sink[ByteString, Future[Option[StorageObject]]] = {
42 |     val base = GCStorage
43 |       .resumableUpload(gcsConfig.dataBucket, previousState.previousKey, ContentTypes.`application/json`)
44 |       .mapMaterializedValue(future => future.map(result => Some(result))(ExecutionContext.parasitic))
45 | 
46 |     maybeGoogleSettings
47 |       .fold(base)(googleSettings => base.withAttributes(GoogleAttributes.settings(googleSettings)))
48 |   }
49 | 
50 |   override def backupToStorageSink(key: String,
51 |                                    currentState: Option[Nothing]
52 |   ): Sink[(ByteString, kafkaClientInterface.CursorContext), Future[BackupResult]] = {
53 |     val base = GCStorage
54 |       .resumableUpload(gcsConfig.dataBucket, key, ContentTypes.`application/json`)
55 |       .mapMaterializedValue(future => future.map(result => Some(result))(ExecutionContext.parasitic))
56 | 
57 |     maybeGoogleSettings
58 |       .fold(base)(googleSettings => base.withAttributes(GoogleAttributes.settings(googleSettings)))
59 |       .contramap[(ByteString, kafkaClientInterface.CursorContext)] { case (byteString, _) =>
60 |         byteString
61 |       }
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/BackupClientChunkState.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup.s3
 2 | 
 3 | import io.aiven.guardian.kafka.backup.KafkaConsumerInterface
 4 | import io.aiven.guardian.kafka.backup.configs.Backup
 5 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config}
 6 | import org.apache.pekko
 7 | 
 8 | import scala.collection.immutable
 9 | import scala.concurrent.Future
10 | 
11 | import java.util.concurrent.ConcurrentLinkedQueue
12 | 
13 | import pekko.Done
14 | import pekko.actor.ActorSystem
15 | import pekko.stream.connectors.s3.S3Headers
16 | import pekko.stream.connectors.s3.S3Settings
17 | import pekko.stream.connectors.s3.SuccessfulUploadPart
18 | import pekko.stream.scaladsl.Flow
19 | import pekko.stream.scaladsl.Sink
20 | 
21 | class BackupClientChunkState[T <: KafkaConsumerInterface](maybeS3Settings: Option[S3Settings])(implicit
22 |     override val kafkaClientInterface: T,
23 |     override val backupConfig: Backup,
24 |     override val system: ActorSystem,
25 |     s3Config: S3Config,
26 |     s3Headers: S3Headers
27 | ) extends BackupClient[T](maybeS3Settings) {
28 |   val processedChunks: ConcurrentLinkedQueue[SuccessfulUploadPart] = new ConcurrentLinkedQueue[SuccessfulUploadPart]()
29 | 
30 |   override def successSink
31 |       : Sink[(SuccessfulUploadPart, immutable.Iterable[kafkaClientInterface.CursorContext]), Future[Done]] =
32 |     Flow[(SuccessfulUploadPart, immutable.Iterable[kafkaClientInterface.CursorContext])]
33 |       .alsoTo(Sink.foreach { case (part, _) =>
34 |         processedChunks.add(part)
35 |       })
36 |       .to(super.successSink)
37 |       .mapMaterializedValue(_ => Future.successful(Done))
38 | }
39 | 


--------------------------------------------------------------------------------
/backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/BackupClientSpec.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup.s3
 2 | 
 3 | import com.softwaremill.diffx.generic.auto._
 4 | import com.softwaremill.diffx.scalatest.DiffMustMatcher._
 5 | import io.aiven.guardian.kafka.Generators._
 6 | import io.aiven.guardian.kafka.backup.configs.PeriodFromFirst
 7 | import io.aiven.guardian.kafka.codecs.Circe._
 8 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
 9 | import io.aiven.guardian.kafka.s3.Generators._
10 | import io.aiven.guardian.kafka.s3.S3Spec
11 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config}
12 | import org.apache.pekko
13 | import org.mdedetrich.pekko.stream.support.CirceStreamSupport
14 | import org.scalatest.BeforeAndAfterAll
15 | import org.scalatest.TestData
16 | import org.scalatest.matchers.must.Matchers
17 | 
18 | import scala.concurrent.ExecutionContext
19 | import scala.concurrent.Future
20 | import scala.concurrent.duration._
21 | import scala.language.postfixOps
22 | 
23 | import java.time.OffsetDateTime
24 | 
25 | import pekko.stream.connectors.s3.scaladsl.S3
26 | import pekko.stream.scaladsl.Keep
27 | import pekko.stream.scaladsl.Sink
28 | import pekko.stream.scaladsl.Source
29 | 
30 | trait BackupClientSpec extends S3Spec with Matchers with BeforeAndAfterAll {
31 | 
32 |   val ThrottleElements: Int          = 100
33 |   val ThrottleAmount: FiniteDuration = 1 millis
34 | 
35 |   property("backup method completes flow correctly for all valid Kafka events") { implicit td: TestData =>
36 |     forAll(kafkaDataWithTimePeriodsGen(), s3ConfigGen(useVirtualDotHost, bucketPrefix)) {
37 |       (kafkaDataWithTimePeriod: KafkaDataWithTimePeriod, s3Config: S3Config) =>
38 |         logger.info(s"Data bucket is ${s3Config.dataBucket}")
39 |         val backupClient = new MockedS3BackupClientInterface(
40 |           Source(kafkaDataWithTimePeriod.data).throttle(ThrottleElements, ThrottleAmount),
41 |           PeriodFromFirst(kafkaDataWithTimePeriod.periodSlice),
42 |           s3Config,
43 |           Some(s3Settings)
44 |         )
45 | 
46 |         val delay =
47 |           (ThrottleAmount * (kafkaDataWithTimePeriod.data.size / ThrottleElements) * 1.2) + (10 millis) match {
48 |             case fd: FiniteDuration   => fd
49 |             case _: Duration.Infinite => throw new Exception("Expected Finite Duration")
50 |           }
51 | 
52 |         val calculatedFuture = for {
53 |           _ <- createBucket(s3Config.dataBucket)
54 |           _ <- backupClient.backup.run()
55 |           _ <- pekko.pattern.after(delay)(Future.successful(()))
56 |           bucketContents <-
57 |             S3.listBucket(s3Config.dataBucket, None, s3Headers)
58 |               .withAttributes(s3Attrs)
59 |               .toMat(Sink.collection)(Keep.right)
60 |               .run()
61 |           keysWithRecords <- Future.sequence(bucketContents.map { bucketContents =>
62 |                                S3.getObject(s3Config.dataBucket, bucketContents.key)
63 |                                  .withAttributes(s3Attrs)
64 |                                  .via(CirceStreamSupport.decode[List[Option[ReducedConsumerRecord]]])
65 |                                  .toMat(Sink.collection)(Keep.right)
66 |                                  .run()
67 |                                  .map(list => (bucketContents.key, list.flatten))(ExecutionContext.parasitic)
68 |                              })
69 |           sorted = keysWithRecords.toList.sortBy { case (key, _) =>
70 |                      val date = key.replace(".json", "")
71 |                      OffsetDateTime.parse(date).toEpochSecond
72 |                    }(Ordering[Long].reverse)
73 |           flattened = sorted.flatMap { case (_, records) => records }
74 |         } yield flattened.collect { case Some(reducedConsumerRecord) =>
75 |           reducedConsumerRecord
76 |         }
77 |         val observed = calculatedFuture.futureValue
78 | 
79 |         kafkaDataWithTimePeriod.data.containsSlice(observed) mustEqual true
80 |         if (observed.nonEmpty) {
81 |           observed.head mustMatchTo (kafkaDataWithTimePeriod.data.head)
82 |         }
83 |     }
84 |   }
85 | }
86 | 


--------------------------------------------------------------------------------
/backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/KafkaConsumerWithKillSwitch.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup.s3
 2 | 
 3 | import io.aiven.guardian.kafka.backup.KafkaConsumer
 4 | import io.aiven.guardian.kafka.backup.configs.Backup
 5 | import io.aiven.guardian.kafka.configs.KafkaCluster
 6 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
 7 | import org.apache.pekko
 8 | 
 9 | import pekko.actor.ActorSystem
10 | import pekko.kafka.CommitterSettings
11 | import pekko.kafka.ConsumerMessage
12 | import pekko.kafka.ConsumerSettings
13 | import pekko.kafka.scaladsl.Consumer
14 | import pekko.stream.SharedKillSwitch
15 | import pekko.stream.scaladsl.SourceWithContext
16 | 
17 | class KafkaConsumerWithKillSwitch(
18 |     configureConsumer: Option[
19 |       ConsumerSettings[Array[Byte], Array[Byte]] => ConsumerSettings[Array[Byte], Array[Byte]]
20 |     ] = None,
21 |     configureCommitter: Option[
22 |       CommitterSettings => CommitterSettings
23 |     ] = None,
24 |     killSwitch: SharedKillSwitch
25 | )(implicit system: ActorSystem, kafkaClusterConfig: KafkaCluster, backupConfig: Backup)
26 |     extends KafkaConsumer(configureConsumer, configureCommitter) {
27 |   override def getSource
28 |       : SourceWithContext[ReducedConsumerRecord, ConsumerMessage.CommittableOffset, Consumer.Control] =
29 |     super.getSource.via(killSwitch.flow)
30 | }
31 | 


--------------------------------------------------------------------------------
/backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/MinioBackupClientSpec.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup.s3
 2 | 
 3 | import io.aiven.guardian.kafka.s3.MinioS3Test
 4 | import io.aiven.guardian.pekko.AnyPropTestKit
 5 | import org.apache.pekko.actor.ActorSystem
 6 | 
 7 | class MinioBackupClientSpec
 8 |     extends AnyPropTestKit(ActorSystem("MinioS3BackupClientSpec"))
 9 |     with BackupClientSpec
10 |     with MinioS3Test {
11 | 
12 |   /** Since Minio doesn't do DNS name verification we can enable this
13 |     */
14 |   override lazy val useVirtualDotHost: Boolean = true
15 | }
16 | 


--------------------------------------------------------------------------------
/backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/MockedKafkaClientBackupConsumerSpec.scala:
--------------------------------------------------------------------------------
  1 | package io.aiven.guardian.kafka.backup.s3
  2 | 
  3 | import com.softwaremill.diffx.scalatest.DiffMustMatcher._
  4 | import io.aiven.guardian.kafka.Generators._
  5 | import io.aiven.guardian.kafka.Utils
  6 | import io.aiven.guardian.kafka.backup.MockedBackupClientInterface
  7 | import io.aiven.guardian.kafka.backup.MockedKafkaConsumerInterface
  8 | import io.aiven.guardian.kafka.backup.configs.Backup
  9 | import io.aiven.guardian.kafka.backup.configs.PeriodFromFirst
 10 | import io.aiven.guardian.kafka.codecs.Circe._
 11 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
 12 | import io.aiven.guardian.kafka.s3.Generators._
 13 | import io.aiven.guardian.kafka.s3.S3Spec
 14 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config}
 15 | import io.aiven.guardian.pekko.AnyPropTestKit
 16 | import org.apache.pekko
 17 | import org.mdedetrich.pekko.stream.support.CirceStreamSupport
 18 | import org.scalatest.TestData
 19 | import org.scalatest.matchers.must.Matchers
 20 | 
 21 | import scala.concurrent.ExecutionContext
 22 | import scala.concurrent.Future
 23 | import scala.concurrent.duration.FiniteDuration
 24 | import scala.concurrent.duration._
 25 | import scala.language.postfixOps
 26 | 
 27 | import pekko.actor.ActorSystem
 28 | import pekko.stream.connectors.s3.S3Settings
 29 | import pekko.stream.connectors.s3.scaladsl.S3
 30 | import pekko.stream.scaladsl.Sink
 31 | import pekko.stream.scaladsl.Source
 32 | 
 33 | class MockedKafkaClientBackupConsumerSpec
 34 |     extends AnyPropTestKit(ActorSystem("MockedKafkaClientBackupClientSpec"))
 35 |     with S3Spec
 36 |     with Matchers {
 37 |   override lazy val s3Settings: S3Settings = S3Settings()
 38 | 
 39 |   /** Virtual Dot Host in bucket names are disabled because you need an actual DNS certificate otherwise AWS will fail
 40 |     * on bucket creation
 41 |     */
 42 |   override lazy val useVirtualDotHost: Boolean            = false
 43 |   override lazy val bucketPrefix: Option[String]          = Some("guardian-")
 44 |   override lazy val enableCleanup: Option[FiniteDuration] = Some(5 seconds)
 45 | 
 46 |   property(
 47 |     "Creating many objects in a small period of time works despite S3's in progress multipart upload eventual consistency issues"
 48 |   ) { implicit td: TestData =>
 49 |     forAll(
 50 |       kafkaDataWithTimePeriodsGen(20,
 51 |                                   20,
 52 |                                   padTimestampsMillis = Range.inclusive(1000, 1000),
 53 |                                   trailingSentinelValue = true
 54 |       ),
 55 |       s3ConfigGen(useVirtualDotHost, bucketPrefix)
 56 |     ) { (kafkaDataWithTimePeriod: KafkaDataWithTimePeriod, s3Config: S3Config) =>
 57 |       logger.info(s"Data bucket is ${s3Config.dataBucket}")
 58 |       val data = kafkaDataWithTimePeriod.data
 59 | 
 60 |       implicit val config: S3Config = s3Config
 61 |       implicit val backupConfig: Backup =
 62 |         Backup(MockedBackupClientInterface.KafkaGroupId, PeriodFromFirst(1 second), 10 seconds, None)
 63 | 
 64 |       val backupClient =
 65 |         new BackupClient(Some(s3Settings))(new MockedKafkaConsumerInterface(Source(data)),
 66 |                                            implicitly,
 67 |                                            implicitly,
 68 |                                            implicitly,
 69 |                                            implicitly
 70 |         )
 71 | 
 72 |       val calculatedFuture = for {
 73 |         _ <- createBucket(s3Config.dataBucket)
 74 |         _ = backupClient.backup.run()
 75 |         bucketContents <- pekko.pattern.after(10 seconds)(
 76 |                             S3.listBucket(s3Config.dataBucket, None).withAttributes(s3Attrs).runWith(Sink.seq)
 77 |                           )
 78 |         keysSorted = bucketContents.map(_.key).sortBy(Utils.keyToOffsetDateTime)
 79 |         downloaded <-
 80 |           Future
 81 |             .sequence(keysSorted.map { key =>
 82 |               S3.getObject(s3Config.dataBucket, key)
 83 |                 .withAttributes(s3Attrs)
 84 |                 .via(CirceStreamSupport.decode[List[Option[ReducedConsumerRecord]]])
 85 |                 .runWith(Sink.seq)
 86 |             })
 87 |             .map(_.flatten)(ExecutionContext.parasitic)
 88 | 
 89 |       } yield downloaded.flatten.collect { case Some(reducedConsumerRecord) =>
 90 |         reducedConsumerRecord
 91 |       }
 92 | 
 93 |       val downloaded = calculatedFuture.futureValue
 94 | 
 95 |       // Only care about ordering when it comes to key
 96 |       val downloadedGroupedAsKey = downloaded
 97 |         .groupBy(_.key)
 98 |         .view
 99 |         .mapValues { reducedConsumerRecords =>
100 |           reducedConsumerRecords.map(_.value)
101 |         }
102 |         .toMap
103 | 
104 |       val inputAsKey = data
105 |         .dropRight(1) // Drop the generated sentinel value which we don't care about
106 |         .groupBy(_.key)
107 |         .view
108 |         .mapValues { reducedConsumerRecords =>
109 |           reducedConsumerRecords.map(_.value)
110 |         }
111 |         .toMap
112 | 
113 |       downloadedGroupedAsKey mustMatchTo inputAsKey
114 |     }
115 |   }
116 | 
117 | }
118 | 


--------------------------------------------------------------------------------
/backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/MockedS3BackupClientInterface.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup.s3
 2 | 
 3 | import io.aiven.guardian.kafka.backup.MockedBackupClientInterface
 4 | import io.aiven.guardian.kafka.backup.MockedKafkaConsumerInterface
 5 | import io.aiven.guardian.kafka.backup.configs.Backup
 6 | import io.aiven.guardian.kafka.backup.configs.TimeConfiguration
 7 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
 8 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config}
 9 | import org.apache.pekko
10 | 
11 | import scala.concurrent.duration._
12 | import scala.language.postfixOps
13 | 
14 | import pekko.NotUsed
15 | import pekko.actor.ActorSystem
16 | import pekko.stream.connectors.s3.S3Headers
17 | import pekko.stream.connectors.s3.S3Settings
18 | import pekko.stream.scaladsl.Source
19 | 
20 | class MockedS3BackupClientInterface(
21 |     kafkaData: Source[ReducedConsumerRecord, NotUsed],
22 |     timeConfiguration: TimeConfiguration,
23 |     s3Config: S3Config,
24 |     maybeS3Settings: Option[S3Settings]
25 | )(implicit val s3Headers: S3Headers, system: ActorSystem)
26 |     extends BackupClient(
27 |       maybeS3Settings
28 |     )(
29 |       new MockedKafkaConsumerInterface(kafkaData),
30 |       Backup(MockedBackupClientInterface.KafkaGroupId, timeConfiguration, 10 seconds, None),
31 |       implicitly,
32 |       s3Config,
33 |       implicitly
34 |     )
35 | 


--------------------------------------------------------------------------------
/backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/RealS3BackupClientSpec.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup.s3
 2 | 
 3 | import io.aiven.guardian.kafka.backup.configs.Compression
 4 | import io.aiven.guardian.pekko.AnyPropTestKit
 5 | import org.apache.pekko.actor.ActorSystem
 6 | 
 7 | class RealS3BackupClientSpec extends AnyPropTestKit(ActorSystem("RealS3BackupClientSpec")) with RealS3BackupClientTest {
 8 |   override val compression: Option[Compression] = None
 9 | }
10 | 


--------------------------------------------------------------------------------
/backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/RealS3GzipCompressionBackupClientSpec.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup.s3
 2 | 
 3 | import io.aiven.guardian.kafka.backup.configs.Compression
 4 | import io.aiven.guardian.kafka.models.Gzip
 5 | import io.aiven.guardian.pekko.AnyPropTestKit
 6 | import org.apache.pekko.actor.ActorSystem
 7 | 
 8 | class RealS3GzipCompressionBackupClientSpec
 9 |     extends AnyPropTestKit(ActorSystem("RealS3GzipCompressionBackupClientSpec"))
10 |     with RealS3BackupClientTest {
11 |   override val compression: Option[Compression] = Some(Compression(Gzip, None))
12 | }
13 | 


--------------------------------------------------------------------------------
/cli-backup/src/main/scala/io/aiven/guardian/kafka/backup/App.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup
 2 | 
 3 | import com.typesafe.scalalogging.LazyLogging
 4 | import io.aiven.guardian.kafka.backup.BackupClientInterface
 5 | import io.aiven.guardian.kafka.backup.KafkaConsumer
 6 | import io.aiven.guardian.kafka.backup.KafkaConsumerInterface
 7 | import org.apache.pekko
 8 | 
 9 | import scala.concurrent.ExecutionContext
10 | import scala.concurrent.Future
11 | 
12 | import pekko.Done
13 | import pekko.actor.ActorSystem
14 | import pekko.kafka.scaladsl.Consumer
15 | import pekko.stream.ActorAttributes
16 | import pekko.stream.Supervision
17 | 
18 | trait App[T <: KafkaConsumerInterface] extends LazyLogging {
19 |   implicit val kafkaClient: T
20 |   implicit val backupClient: BackupClientInterface[KafkaConsumer]
21 |   implicit val actorSystem: ActorSystem
22 |   implicit val executionContext: ExecutionContext
23 | 
24 |   def run(): Consumer.Control = {
25 |     val decider: Supervision.Decider = { e =>
26 |       logger.error("Unhandled exception in stream", e)
27 |       Supervision.Stop
28 |     }
29 | 
30 |     backupClient.backup.withAttributes(ActorAttributes.supervisionStrategy(decider)).run()
31 |   }
32 | 
33 |   def shutdown(control: Consumer.Control): Future[Done] = {
34 |     logger.info("Shutdown of Guardian detected")
35 |     val future = control.shutdown()
36 |     future.onComplete(_ => logger.info("Guardian shut down"))
37 |     future
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/cli-backup/src/main/scala/io/aiven/guardian/kafka/backup/BackupApp.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup
 2 | 
 3 | import io.aiven.guardian.cli.PekkoSettings
 4 | import io.aiven.guardian.kafka.backup.KafkaConsumer
 5 | import io.aiven.guardian.kafka.backup.{Config => BackupConfig}
 6 | import io.aiven.guardian.kafka.{Config => KafkaConfig}
 7 | 
 8 | trait BackupApp extends BackupConfig with KafkaConfig with PekkoSettings {
 9 |   implicit lazy val kafkaClient: KafkaConsumer = new KafkaConsumer()
10 | }
11 | 


--------------------------------------------------------------------------------
/cli-backup/src/main/scala/io/aiven/guardian/kafka/backup/S3App.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup
 2 | 
 3 | import io.aiven.guardian.kafka.backup.KafkaConsumer
 4 | import io.aiven.guardian.kafka.backup.s3.BackupClient
 5 | import io.aiven.guardian.kafka.s3.{Config => S3Config}
 6 | import org.apache.pekko
 7 | 
 8 | import pekko.stream.connectors.s3.S3Settings
 9 | 
10 | trait S3App extends S3Config with BackupApp with App[KafkaConsumer] {
11 |   lazy val s3Settings: S3Settings                             = S3Settings()
12 |   implicit lazy val backupClient: BackupClient[KafkaConsumer] = new BackupClient[KafkaConsumer](Some(s3Settings))
13 | }
14 | 


--------------------------------------------------------------------------------
/cli-backup/src/test/resources/logback.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 | 
 3 |     <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 4 |         <encoder>
 5 |             <pattern>[%highlight(%-5level)] %d{HH:mm:ss.SSS} %logger{0} - %msg%n</pattern>
 6 |         </encoder>
 7 |     </appender>
 8 | 
 9 |     <appender name="ASYNCSTDOUT" class="ch.qos.logback.classic.AsyncAppender">
10 |         <appender-ref ref="STDOUT" />
11 |     </appender>
12 | 
13 |     <!-- Logging for NetworkClient is turned off because in cli tests we are only testing if command line args
14 |     are passed in properly -->
15 |     <logger name="org.apache.kafka.clients.NetworkClient" level="OFF"/>
16 | 
17 |     <root level="INFO">
18 |         <appender-ref ref="ASYNCSTDOUT" />
19 |     </root>
20 | 
21 | </configuration>
22 | 


--------------------------------------------------------------------------------
/cli-backup/src/test/scala/io/aiven/guardian/kafka/backup/CliSpec.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup
 2 | 
 3 | import com.typesafe.scalalogging.StrictLogging
 4 | import io.aiven.guardian.kafka.backup.configs.ChronoUnitSlice
 5 | import io.aiven.guardian.kafka.backup.configs.Compression
 6 | import io.aiven.guardian.kafka.backup.configs.{Backup => BackupConfig}
 7 | import io.aiven.guardian.kafka.configs.{KafkaCluster => KafkaClusterConfig}
 8 | import io.aiven.guardian.kafka.models.Gzip
 9 | import markatta.futiles.CancellableFuture
10 | import org.apache.pekko
11 | import org.scalatest.concurrent.ScalaFutures
12 | import org.scalatest.matchers.must.Matchers
13 | import org.scalatest.propspec.AnyPropSpecLike
14 | 
15 | import scala.annotation.nowarn
16 | import scala.concurrent.ExecutionContext
17 | import scala.concurrent.Future
18 | import scala.concurrent.Promise
19 | import scala.concurrent.duration._
20 | import scala.language.postfixOps
21 | 
22 | import java.time.temporal.ChronoUnit
23 | import java.util.concurrent.TimeUnit
24 | 
25 | import pekko.actor.ActorSystem
26 | import pekko.testkit.TestKit
27 | 
28 | @nowarn("msg=method main in class CommandApp is deprecated")
29 | class CliSpec
30 |     extends TestKit(ActorSystem("BackupCliSpec"))
31 |     with AnyPropSpecLike
32 |     with Matchers
33 |     with ScalaFutures
34 |     with StrictLogging {
35 |   implicit val ec: ExecutionContext                    = system.dispatcher
36 |   implicit override val patienceConfig: PatienceConfig = PatienceConfig(5 minutes, 100 millis)
37 | 
38 |   property("Command line args are properly passed into application") {
39 |     val groupId         = "my-consumer-group"
40 |     val topic           = "topic"
41 |     val bootstrapServer = "localhost:9092"
42 |     val dataBucket      = "backup-bucket"
43 | 
44 |     val args = List(
45 |       "--storage",
46 |       "s3",
47 |       "--kafka-topics",
48 |       topic,
49 |       "--kafka-bootstrap-servers",
50 |       bootstrapServer,
51 |       "--s3-data-bucket",
52 |       dataBucket,
53 |       "--kafka-group-id",
54 |       groupId,
55 |       "--chrono-unit-slice",
56 |       "hours",
57 |       "--commit-timeout-buffer-window",
58 |       "1 second",
59 |       "gzip",
60 |       "--compression-level",
61 |       "5"
62 |     )
63 | 
64 |     val cancellable = CancellableFuture {
65 |       Main.main(args.toArray)
66 |     }
67 | 
68 |     def checkUntilMainInitialized(main: io.aiven.guardian.kafka.backup.Entry): Future[(App[_], Promise[Unit])] =
69 |       main.initializedApp.get() match {
70 |         case Some((app, promise)) => Future.successful((app, promise))
71 |         case None                 => pekko.pattern.after(100 millis)(checkUntilMainInitialized(main))
72 |       }
73 | 
74 |     val (app, promise) = checkUntilMainInitialized(Main).futureValue
75 | 
76 |     cancellable.cancel()
77 |     promise.success(())
78 | 
79 |     app match {
80 |       case s3App: S3App =>
81 |         s3App.backupConfig mustEqual BackupConfig(groupId,
82 |                                                   ChronoUnitSlice(ChronoUnit.HOURS),
83 |                                                   FiniteDuration(1, TimeUnit.SECONDS),
84 |                                                   Some(Compression(Gzip, Some(5)))
85 |         )
86 |         s3App.kafkaClusterConfig mustEqual KafkaClusterConfig(Set(topic))
87 |         s3App.kafkaClient.consumerSettings.getProperty("bootstrap.servers") mustEqual bootstrapServer
88 |         s3App.s3Config.dataBucket mustEqual dataBucket
89 |       case _ =>
90 |         fail("Expected an App to be initialized")
91 |     }
92 |   }
93 | }
94 | 


--------------------------------------------------------------------------------
/cli-compaction/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aiven-Open/guardian-for-apache-kafka/9fadf3388140820b161cf28744d1587b91bf0776/cli-compaction/.gitkeep


--------------------------------------------------------------------------------
/cli-restore/src/main/scala/io/aiven/guardian/kafka/restore/App.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.restore
 2 | 
 3 | import com.typesafe.scalalogging.LazyLogging
 4 | import io.aiven.guardian.kafka.restore.KafkaProducer
 5 | import io.aiven.guardian.kafka.restore.s3.RestoreClient
 6 | import org.apache.pekko
 7 | 
 8 | import scala.concurrent.Future
 9 | 
10 | import pekko.Done
11 | import pekko.actor.ActorSystem
12 | import pekko.stream.ActorAttributes
13 | import pekko.stream.KillSwitch
14 | import pekko.stream.Supervision
15 | import pekko.stream.UniqueKillSwitch
16 | 
17 | trait App extends LazyLogging {
18 |   implicit val kafkaProducer: KafkaProducer
19 |   implicit val restoreClient: RestoreClient[KafkaProducer]
20 |   implicit val actorSystem: ActorSystem
21 | 
22 |   val decider: Supervision.Decider = { e =>
23 |     logger.error("Unhandled exception in stream", e)
24 |     Supervision.Stop
25 |   }
26 | 
27 |   def run(): (UniqueKillSwitch, Future[Done]) =
28 |     restoreClient.restore.withAttributes(ActorAttributes.supervisionStrategy(decider)).run()
29 | 
30 |   def shutdown(killSwitch: KillSwitch): Unit = {
31 |     logger.info("Shutdown of Guardian detected")
32 |     killSwitch.shutdown()
33 |     logger.info("Guardian shut down")
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/cli-restore/src/main/scala/io/aiven/guardian/kafka/restore/RestoreApp.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.restore
 2 | 
 3 | import io.aiven.guardian.cli.PekkoSettings
 4 | import io.aiven.guardian.kafka.restore.KafkaProducer
 5 | import io.aiven.guardian.kafka.restore.{Config => RestoreConfig}
 6 | import io.aiven.guardian.kafka.{Config => KafkaConfig}
 7 | 
 8 | trait RestoreApp extends RestoreConfig with KafkaConfig with PekkoSettings {
 9 |   implicit lazy val kafkaProducer: KafkaProducer = new KafkaProducer()
10 | }
11 | 


--------------------------------------------------------------------------------
/cli-restore/src/main/scala/io/aiven/guardian/kafka/restore/S3App.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.restore
 2 | 
 3 | import com.typesafe.scalalogging.LazyLogging
 4 | import io.aiven.guardian.kafka.restore.s3.RestoreClient
 5 | import io.aiven.guardian.kafka.s3.{Config => S3Config}
 6 | import org.apache.pekko
 7 | 
 8 | import pekko.stream.ActorAttributes
 9 | import pekko.stream.Attributes
10 | import pekko.stream.Supervision
11 | import pekko.stream.connectors.s3.S3Settings
12 | 
13 | trait S3App extends S3Config with RestoreApp with App with LazyLogging {
14 |   lazy val s3Settings: S3Settings = S3Settings()
15 |   implicit lazy val restoreClient: RestoreClient[KafkaProducer] =
16 |     new RestoreClient[KafkaProducer](Some(s3Settings)) {
17 |       override val maybeAttributes: Some[Attributes] = {
18 |         val decider: Supervision.Decider = { e =>
19 |           logger.error("Unhandled exception in stream", e)
20 |           Supervision.Stop
21 |         }
22 | 
23 |         Some(ActorAttributes.supervisionStrategy(decider))
24 |       }
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/cli-restore/src/test/scala/io/aiven/guardian/kafka/restore/CliSpec.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.restore
 2 | 
 3 | import io.aiven.guardian.kafka.configs.{KafkaCluster => KafkaClusterConfig}
 4 | import io.aiven.guardian.kafka.restore.configs.{Restore => RestoreConfig}
 5 | import org.apache.kafka.clients.producer.ProducerConfig
 6 | import org.scalatest.matchers.must.Matchers
 7 | import org.scalatest.propspec.AnyPropSpec
 8 | 
 9 | import scala.annotation.nowarn
10 | import scala.jdk.CollectionConverters._
11 | 
12 | import java.time.Instant
13 | import java.time.ZoneId
14 | 
15 | @nowarn("msg=method main in class CommandApp is deprecated")
16 | class CliSpec extends AnyPropSpec with Matchers {
17 | 
18 |   property("Command line args are properly passed into application") {
19 |     val bootstrapServer  = "localhost:9092"
20 |     val fromWhen         = Instant.ofEpochMilli(0).atZone(ZoneId.of("UTC")).toOffsetDateTime
21 |     val topic1           = "topic-1"
22 |     val topic2           = "topic-2"
23 |     val restoreTopicOne  = s"restore-$topic1"
24 |     val restoreTopicTwo  = s"restore-$topic2"
25 |     val overrideTopicOne = s"$topic1:$restoreTopicOne"
26 |     val overrideTopicTwo = s"$topic2:$restoreTopicTwo"
27 |     val dataBucket       = "backup-bucket"
28 | 
29 |     val args = List(
30 |       "--storage",
31 |       "s3",
32 |       "--kafka-topics",
33 |       topic1,
34 |       "--kafka-topics",
35 |       topic2,
36 |       "--kafka-bootstrap-servers",
37 |       bootstrapServer,
38 |       "--s3-data-bucket",
39 |       dataBucket,
40 |       "--from-when",
41 |       fromWhen.toString,
42 |       "--override-topics",
43 |       overrideTopicOne,
44 |       "--override-topics",
45 |       overrideTopicTwo,
46 |       "--single-message-per-kafka-request"
47 |     )
48 | 
49 |     try Main.main(args.toArray)
50 |     catch {
51 |       case _: Throwable =>
52 |     }
53 |     Main.initializedApp.get() match {
54 |       case Some(s3App: S3App) =>
55 |         s3App.restoreConfig mustEqual RestoreConfig(Some(fromWhen),
56 |                                                     Some(
57 |                                                       Map(
58 |                                                         topic1 -> restoreTopicOne,
59 |                                                         topic2 -> restoreTopicTwo
60 |                                                       )
61 |                                                     )
62 |         )
63 |         s3App.kafkaClusterConfig mustEqual KafkaClusterConfig(Set(topic1, topic2))
64 |         s3App.kafkaProducer.producerSettings.getProperties.get("bootstrap.servers") mustEqual bootstrapServer
65 |         s3App.s3Config.dataBucket mustEqual dataBucket
66 |         (Map(
67 |           ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG             -> true.toString,
68 |           ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION -> 1.toString,
69 |           ProducerConfig.BATCH_SIZE_CONFIG                     -> 0.toString
70 |         ): Map[String, AnyRef]).toSet
71 |           .subsetOf(s3App.kafkaProducer.producerSettings.getProperties.asScala.toMap.toSet) mustEqual true
72 |         s3App.kafkaProducer.producerSettings.parallelism mustEqual 1
73 |       case _ =>
74 |         fail("Expected an App to be initialized")
75 |     }
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/compaction-gcs/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
1 | storage-config-gcs {
2 |     parallel-object-download-limit = 10
3 |     parallel-object-download-limit = ${?STORAGE_CONFIG_GCS_PARALLEL_OBJECT_DOWNLOAD_LIMIT}
4 | }
5 | 


--------------------------------------------------------------------------------
/compaction-gcs/src/main/scala/io/aiven/guardian/kafka/compaction/gcs/Config.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.compaction.gcs
 2 | 
 3 | import io.aiven.guardian.kafka.compaction.gcs.models.StorageConfig
 4 | import pureconfig.ConfigSource
 5 | import pureconfig.generic.auto._
 6 | 
 7 | import scala.annotation.nowarn
 8 | 
 9 | trait Config {
10 |   @nowarn("cat=lint-byname-implicit")
11 |   implicit lazy val storageConfigGCS: StorageConfig =
12 |     ConfigSource.default.at("storage-config-gcs").loadOrThrow[StorageConfig]
13 | }
14 | 
15 | object Config extends Config
16 | 


--------------------------------------------------------------------------------
/compaction-gcs/src/main/scala/io/aiven/guardian/kafka/compaction/gcs/StorageClient.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.compaction.gcs
 2 | 
 3 | import com.typesafe.scalalogging.LazyLogging
 4 | import io.aiven.guardian.kafka.compaction.StorageInterface
 5 | import io.aiven.guardian.kafka.compaction.gcs.models.StorageConfig
 6 | import io.aiven.guardian.kafka.gcs.errors.GCSErrors
 7 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
 8 | import org.apache.pekko
 9 | 
10 | import scala.annotation.nowarn
11 | 
12 | import pekko.NotUsed
13 | import pekko.stream.connectors.googlecloud.storage.scaladsl.GCStorage
14 | import pekko.stream.scaladsl.Source
15 | 
16 | class StorageClient(bucketName: String, maybePrefix: Option[String])(implicit storageConfig: StorageConfig)
17 |     extends StorageInterface
18 |     with LazyLogging {
19 | 
20 |   /** Retrieve Kafka data from a given storage source
21 |     *
22 |     * @return
23 |     */
24 |   @throws(classOf[GCSErrors.ExpectedObjectToExist])
25 |   override def retrieveKafkaData: Source[ReducedConsumerRecord, NotUsed] = {
26 | 
27 |     @nowarn("msg=is never used")
28 |     // TODO filter the correct buckets to retrieve
29 |     val byteStringSource = GCStorage
30 |       .listBucket(bucketName, maybePrefix, versions = false)
31 |       .flatMapMerge(
32 |         storageConfig.parallelObjectDownloadLimit,
33 |         storageObject =>
34 |           GCStorage
35 |             .download(bucketName, storageObject.name)
36 |             .map(
37 |               _.getOrElse(
38 |                 throw GCSErrors.ExpectedObjectToExist(bucketName, maybePrefix)
39 |               )
40 |             )
41 |       )
42 | 
43 |     // TODO serialization from raw bytes to Kafka Topic Format
44 |     ???
45 |   }
46 | 
47 |   /** Checks whether the storage exists and is accessible
48 |     */
49 |   def checkStorageAccessible: Source[Boolean, NotUsed] =
50 |     GCStorage.getBucketSource(bucketName).map(_.isDefined).map {
51 |       case false =>
52 |         logger.error(s"Failed accessing GCS $bucketName")
53 |         false
54 |       case true =>
55 |         logger.info(s"Successfully accessed GCS $bucketName")
56 |         true
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/compaction-gcs/src/main/scala/io/aiven/guardian/kafka/compaction/gcs/models/StorageConfig.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.compaction.gcs.models
2 | 
3 | final case class StorageConfig(parallelObjectDownloadLimit: Int)
4 | 


--------------------------------------------------------------------------------
/compaction-s3/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
1 | storage-config-s3 {
2 |     parallel-object-download-limit = 10
3 |     parallel-object-download-limit = ${?STORAGE_CONFIG_S3_PARALLEL_OBJECT_DOWNLOAD_LIMIT}
4 | }
5 | 


--------------------------------------------------------------------------------
/compaction-s3/src/main/scala/io/aiven/guardian/kafka/compaction/s3/Config.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.compaction.s3
 2 | 
 3 | import io.aiven.guardian.kafka.compaction.s3.models.StorageConfig
 4 | import pureconfig.ConfigSource
 5 | import pureconfig.generic.auto._
 6 | 
 7 | import scala.annotation.nowarn
 8 | 
 9 | trait Config {
10 |   @nowarn("cat=lint-byname-implicit")
11 |   implicit lazy val storageConfigS3: StorageConfig =
12 |     ConfigSource.default.at("storage-config-s3").loadOrThrow[StorageConfig]
13 | }
14 | 
15 | object Config extends Config
16 | 


--------------------------------------------------------------------------------
/compaction-s3/src/main/scala/io/aiven/guardian/kafka/compaction/s3/StorageClient.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.compaction.s3
 2 | 
 3 | import com.typesafe.scalalogging.LazyLogging
 4 | import io.aiven.guardian.kafka.compaction.StorageInterface
 5 | import io.aiven.guardian.kafka.compaction.s3.models.StorageConfig
 6 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
 7 | import io.aiven.guardian.kafka.s3.errors.S3Errors
 8 | import org.apache.pekko
 9 | 
10 | import scala.annotation.nowarn
11 | 
12 | import pekko.NotUsed
13 | import pekko.stream.connectors.s3.BucketAccess
14 | import pekko.stream.connectors.s3.S3Headers
15 | import pekko.stream.connectors.s3.scaladsl.S3
16 | import pekko.stream.scaladsl.Source
17 | 
18 | class StorageClient(bucketName: String, prefix: Option[String], s3Headers: S3Headers)(implicit
19 |     storageConfig: StorageConfig
20 | ) extends StorageInterface
21 |     with LazyLogging {
22 | 
23 |   /** Retrieve Kafka data from a given storage source
24 |     *
25 |     * @return
26 |     */
27 |   @throws(classOf[S3Errors.ExpectedObjectToExist])
28 |   override def retrieveKafkaData: Source[ReducedConsumerRecord, NotUsed] = {
29 |     // TODO filter the correct buckets to retrieve
30 |     @nowarn("msg=is never used")
31 |     val byteStringSource = S3
32 |       .listBucket(bucketName, prefix, s3Headers)
33 |       .flatMapMerge(
34 |         storageConfig.parallelObjectDownloadLimit,
35 |         bucketDetails => S3.getObject(bucketName, bucketDetails.key, None, None, s3Headers)
36 |       )
37 | 
38 |     // TODO serialization from raw bytes to Kafka Topic Format
39 |     ???
40 |   }
41 | 
42 |   /** Checks whether the storage exists and is accessible
43 |     */
44 |   def checkStorageAccessible: Source[Boolean, NotUsed] =
45 |     S3.checkIfBucketExistsSource(bucketName, s3Headers).map {
46 |       case e @ (BucketAccess.AccessDenied | BucketAccess.NotExists) =>
47 |         logger.error(s"Accessing S3 $bucketName gave ${e.toString}")
48 |         false
49 |       case BucketAccess.AccessGranted =>
50 |         logger.info(s"Successfully accessed S3 $bucketName")
51 |         true
52 |     }
53 | 
54 | }
55 | 


--------------------------------------------------------------------------------
/compaction-s3/src/main/scala/io/aiven/guardian/kafka/compaction/s3/models/StorageConfig.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.compaction.s3.models
2 | 
3 | final case class StorageConfig(parallelObjectDownloadLimit: Int)
4 | 


--------------------------------------------------------------------------------
/core-backup/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
 1 | pekko.kafka.consumer = {
 2 |     poll-interval = ${?PEKKO_KAFKA_CONSUMER_POLL_INTERVAL}
 3 |     poll-timeout = ${?PEKKO_KAFKA_CONSUMER_POLL_TIMEOUT}
 4 |     stop-timeout = ${?PEKKO_KAFKA_CONSUMER_STOP_TIMEOUT}
 5 |     close-timeout = ${?PEKKO_KAFKA_CONSUMER_CLOSE_TIMEOUT}
 6 |     commit-time-warning = ${?PEKKO_KAFKA_CONSUMER_COMMIT_TIME_WARNING}
 7 |     commit-refresh-interval = ${?PEKKO_KAFKA_CONSUMER_COMMIT_REFRESH_INTERVAL}
 8 |     use-dispatcher = ${?PEKKO_KAFKA_CONSUMER_USE_DISPATCHER}
 9 |     wait-close-partition = ${?PEKKO_KAFKA_CONSUMER_WAIT_CLOSE_PARTITION}
10 |     position-timeout = ${?PEKKO_KAFKA_CONSUMER_POSITION_TIMEOUT}
11 |     offset-for-times-timeout = ${?PEKKO_KAFKA_OFFSET_FOR_TIMES_TIMEOUT}
12 |     metadata-request-timeout = ${?PEKKO_KAFKA_METADATA_REQUEST_TIMEOUT}
13 |     eos-draining-check-interval = ${?PEKKO_KAFKA_CONSUMER_EOS_DRAINING_CHECK_INTERVAL}
14 |     connection-checker = {
15 |         enable = ${?PEKKO_KAFKA_CONSUMER_CONNECTION_CHECKER_ENABLE}
16 |         max-retries = ${?PEKKO_KAFKA_CONSUMER_CONNECTION_CHECKER_MAX_RETRIES}
17 |         backoff-factor = ${?PEKKO_KAFKA_CONSUMER_CONNECTION_CHECKER_BACKOFF_FACTOR}
18 |         check-interval = ${?PEKKO_KAFKA_CONSUMER_CONNECTION_CHECKER_CHECK_INTERVAL}
19 |     }
20 |     partition-handler-warning = ${?PEKKO_KAFKA_CONSUMER_PARTITION_HANDLER_WARNING}
21 |     offset-reset-protection = {
22 |         enable = ${?PEKKO_KAFKA_CONSUMER_OFFSET_RESET_PROTECTION_ENABLE}
23 |         offset-threshold = ${?PEKKO_KAFKA_CONSUMER_OFFSET_RESET_PROTECTION_OFFSET_THRESHOLD}
24 |         time-threshold = ${?PEKKO_KAFKA_CONSUMER_OFFSET_RESET_PROTECTION_TIME_THRESHOLD}
25 |     }
26 | }
27 | 
28 | pekko.kafka.committer = {
29 |     max-batch = 100000
30 |     max-batch = ${?PEKKO_KAFKA_COMMITTER_MAX_BATCH}
31 |     max-interval = 1 hour
32 |     max-interval = ${?PEKKO_KAFKA_COMMITTER_MAX_INTERVAL}
33 |     parallelism = ${?PEKKO_KAFKA_COMMITTER_PARALLELISM}
34 |     parallelism = 10000
35 | }
36 | 
37 | backup {
38 |     kafka-group-id = ${?BACKUP_KAFKA_GROUP_ID}
39 |     time-configuration = {
40 |        type = chrono-unit-slice
41 |        type = ${?BACKUP_TIME_CONFIGURATION_TYPE}
42 |        chrono-unit = hours
43 |        chrono-unit = ${?BACKUP_TIME_CONFIGURATION_CHRONO_UNIT}
44 |        duration = 1 hour
45 |        duration = ${?BACKUP_TIME_CONFIGURATION_DURATION}
46 |     }
47 |     commit-timeout-buffer-window = 10 seconds
48 |     commit-timeout-buffer-window = ${?BACKUP_COMMIT_TIMEOUT_BUFFER}
49 | }
50 | 


--------------------------------------------------------------------------------
/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/Config.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup
 2 | 
 3 | import io.aiven.guardian.kafka.backup.configs.Backup
 4 | import pureconfig.ConfigSource
 5 | import pureconfig.generic.auto._
 6 | 
 7 | import scala.annotation.nowarn
 8 | 
 9 | trait Config {
10 | 
11 |   @nowarn("cat=lint-byname-implicit")
12 |   implicit lazy val backupConfig: Backup = ConfigSource.default.at("backup").loadOrThrow[Backup]
13 | }
14 | 
15 | object Config extends Config
16 | 


--------------------------------------------------------------------------------
/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/KafkaConsumer.scala:
--------------------------------------------------------------------------------
  1 | package io.aiven.guardian.kafka.backup
  2 | 
  3 | import com.typesafe.scalalogging.LazyLogging
  4 | import io.aiven.guardian.kafka.backup.configs.Backup
  5 | import io.aiven.guardian.kafka.backup.configs.ChronoUnitSlice
  6 | import io.aiven.guardian.kafka.backup.configs.PeriodFromFirst
  7 | import io.aiven.guardian.kafka.configs.KafkaCluster
  8 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
  9 | import org.apache.kafka.clients.consumer.ConsumerConfig
 10 | import org.apache.kafka.clients.consumer.ConsumerRecord
 11 | import org.apache.kafka.common.serialization.ByteArrayDeserializer
 12 | import org.apache.pekko
 13 | 
 14 | import scala.collection.immutable
 15 | import scala.concurrent.Future
 16 | import scala.jdk.DurationConverters._
 17 | 
 18 | import java.util.Base64
 19 | 
 20 | import pekko.Done
 21 | import pekko.actor.ActorSystem
 22 | import pekko.kafka.CommitDelivery
 23 | import pekko.kafka.CommitterSettings
 24 | import pekko.kafka.ConsumerMessage.CommittableOffset
 25 | import pekko.kafka.ConsumerMessage.CommittableOffsetBatch
 26 | import pekko.kafka.ConsumerSettings
 27 | import pekko.kafka.Subscriptions
 28 | import pekko.kafka.scaladsl.Committer
 29 | import pekko.kafka.scaladsl.Consumer
 30 | import pekko.stream.scaladsl.Sink
 31 | import pekko.stream.scaladsl.SourceWithContext
 32 | 
 33 | /** A Kafka Client that uses Pekko Connectors Kafka Consumer under the hood to create a stream of events from a Kafka
 34 |   * cluster. To configure the Pekko Connectors Kafka Consumer use the standard typesafe configuration i.e.
 35 |   * pekko.kafka.consumer (note that the `keySerializer` and `valueSerializer` are hardcoded so you cannot override
 36 |   * this).
 37 |   * @param configureConsumer
 38 |   *   A way to configure the underlying Kafka consumer settings
 39 |   * @param configureCommitter
 40 |   *   A way to configure the underlying kafka committer settings
 41 |   * @param system
 42 |   *   A classic `ActorSystem`
 43 |   * @param kafkaClusterConfig
 44 |   *   Additional cluster configuration that is needed
 45 |   */
 46 | class KafkaConsumer(
 47 |     configureConsumer: Option[
 48 |       ConsumerSettings[Array[Byte], Array[Byte]] => ConsumerSettings[Array[Byte], Array[Byte]]
 49 |     ] = None,
 50 |     configureCommitter: Option[
 51 |       CommitterSettings => CommitterSettings
 52 |     ] = None
 53 | )(implicit system: ActorSystem, kafkaClusterConfig: KafkaCluster, backupConfig: Backup)
 54 |     extends KafkaConsumerInterface
 55 |     with LazyLogging {
 56 |   override type CursorContext        = CommittableOffset
 57 |   override type Control              = Consumer.Control
 58 |   override type MatCombineResult     = Consumer.DrainingControl[Done]
 59 |   override type BatchedCursorContext = CommittableOffsetBatch
 60 | 
 61 |   import KafkaConsumer._
 62 | 
 63 |   if (kafkaClusterConfig.topics.isEmpty)
 64 |     logger.warn("Kafka Cluster configuration has no topics set")
 65 | 
 66 |   private[kafka] val consumerSettings = {
 67 |     val base = ConsumerSettings(system, new ByteArrayDeserializer, new ByteArrayDeserializer)
 68 |     configureConsumer
 69 |       .fold(base)(block => block(base))
 70 |       .withProperties(
 71 |         ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "earliest"
 72 |       )
 73 |       .withCommitTimeout {
 74 |         val baseDuration = backupConfig.timeConfiguration match {
 75 |           case PeriodFromFirst(duration) => duration
 76 |           case ChronoUnitSlice(chronoUnit) =>
 77 |             chronoUnit.getDuration.toScala
 78 |         }
 79 | 
 80 |         baseDuration + backupConfig.commitTimeoutBufferWindow
 81 |       }
 82 |       .withGroupId(
 83 |         backupConfig.kafkaGroupId
 84 |       )
 85 |   }
 86 | 
 87 |   private[kafka] val subscriptions = Subscriptions.topics(kafkaClusterConfig.topics)
 88 | 
 89 |   /** @return
 90 |     *   A `SourceWithContext` that returns a Kafka Stream which automatically handles committing of cursors
 91 |     */
 92 |   override def getSource: SourceWithContext[ReducedConsumerRecord, CommittableOffset, Consumer.Control] =
 93 |     Consumer
 94 |       .sourceWithOffsetContext(consumerSettings, subscriptions)
 95 |       .map(consumerRecordToReducedConsumerRecord)
 96 | 
 97 |   private[kafka] val committerSettings: CommitterSettings = {
 98 |     val base = CommitterSettings(system)
 99 |     configureCommitter
100 |       .fold(base)(block => block(base))
101 |       .withDelivery(CommitDelivery.waitForAck)
102 |   }
103 | 
104 |   /** @return
105 |     *   A `Sink` that allows you to commit a `CursorContext` to Kafka to signify you have processed a message
106 |     */
107 |   override def commitCursor: Sink[CommittableOffsetBatch, Future[Done]] = Committer.sink(committerSettings)
108 | 
109 |   /** @return
110 |     *   The result of this function gets directly passed into the `combine` parameter of
111 |     *   [[pekko.stream.scaladsl.Source.toMat]]
112 |     */
113 |   override def matCombine: (Consumer.Control, Future[Done]) => Consumer.DrainingControl[Done] =
114 |     Consumer.DrainingControl[Done].apply
115 | 
116 |   /** How to batch an immutable iterable of `CursorContext` into a `BatchedCursorContext`
117 |     * @param cursors
118 |     *   The cursors that need to be batched
119 |     * @return
120 |     *   A collection data structure that represents the batched cursors
121 |     */
122 |   override def batchCursorContext(cursors: immutable.Iterable[CommittableOffset]): CommittableOffsetBatch =
123 |     CommittableOffsetBatch(cursors.toSeq)
124 | }
125 | 
126 | object KafkaConsumer {
127 |   def consumerRecordToReducedConsumerRecord(
128 |       consumerRecord: ConsumerRecord[Array[Byte], Array[Byte]]
129 |   ): ReducedConsumerRecord =
130 |     ReducedConsumerRecord(
131 |       consumerRecord.topic(),
132 |       consumerRecord.partition(),
133 |       consumerRecord.offset(),
134 |       Option(consumerRecord.key()).map(byteArray => Base64.getEncoder.encodeToString(byteArray)),
135 |       Base64.getEncoder.encodeToString(consumerRecord.value()),
136 |       consumerRecord.timestamp(),
137 |       consumerRecord.timestampType()
138 |     )
139 | }
140 | 


--------------------------------------------------------------------------------
/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/KafkaConsumerInterface.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup
 2 | 
 3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
 4 | import org.apache.pekko
 5 | 
 6 | import scala.collection.immutable
 7 | import scala.concurrent.Future
 8 | 
 9 | import pekko.Done
10 | import pekko.stream.scaladsl.Sink
11 | import pekko.stream.scaladsl.SourceWithContext
12 | 
13 | trait KafkaConsumerInterface {
14 | 
15 |   /** The type of the context to pass around. In context of a Kafka consumer, this typically holds offset data to be
16 |     * automatically committed
17 |     */
18 |   type CursorContext
19 | 
20 |   /** The type that represents how to control the given stream, i.e. if you want to shut it down or add metrics
21 |     */
22 |   type Control
23 | 
24 |   /** The type that represents the result of the `combine` parameter that is supplied to
25 |     * [[pekko.stream.scaladsl.Source.toMat]]
26 |     */
27 |   type MatCombineResult
28 | 
29 |   /** The type that represents the result of batching a `CursorContext`
30 |     */
31 |   type BatchedCursorContext
32 | 
33 |   /** @return
34 |     *   A `SourceWithContext` that returns a Kafka Stream which automatically handles committing of cursors
35 |     */
36 |   def getSource: SourceWithContext[ReducedConsumerRecord, CursorContext, Control]
37 | 
38 |   /** @return
39 |     *   A `Sink` that allows you to commit a `CursorContext` to Kafka to signify you have processed a message
40 |     */
41 |   def commitCursor: Sink[BatchedCursorContext, Future[Done]]
42 | 
43 |   /** @return
44 |     *   The result of this function gets directly passed into the `combine` parameter of
45 |     *   [[pekko.stream.scaladsl.Source.toMat]]
46 |     */
47 |   def matCombine: (Control, Future[Done]) => MatCombineResult
48 | 
49 |   /** How to batch an immutable iterable of `CursorContext` into a `BatchedCursorContext`
50 |     * @param cursors
51 |     *   The cursors that need to be batched
52 |     * @return
53 |     *   A collection data structure that represents the batched cursors
54 |     */
55 |   def batchCursorContext(cursors: immutable.Iterable[CursorContext]): BatchedCursorContext
56 | }
57 | 


--------------------------------------------------------------------------------
/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/configs/Backup.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup.configs
 2 | 
 3 | import scala.concurrent.duration.FiniteDuration
 4 | 
 5 | /** @param kafkaGroupId
 6 |   *   The group-id that the Kafka consumer will use
 7 |   * @param timeConfiguration
 8 |   *   Determines how the backed up objects/files are segregated depending on a time configuration
 9 |   * @param commitTimeoutBufferWindow
10 |   *   A buffer that is added ontop of the `timeConfiguration` when setting the Kafka Consumer commit timeout.
11 |   * @param compression
12 |   *   Which compression to use for the backed up data
13 |   */
14 | final case class Backup(kafkaGroupId: String,
15 |                         timeConfiguration: TimeConfiguration,
16 |                         commitTimeoutBufferWindow: FiniteDuration,
17 |                         compression: Option[Compression]
18 | )
19 | 


--------------------------------------------------------------------------------
/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/configs/Compression.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup.configs
2 | 
3 | import io.aiven.guardian.kafka.models.CompressionType
4 | 
5 | final case class Compression(`type`: CompressionType, level: Option[Int])
6 | 


--------------------------------------------------------------------------------
/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/configs/TimeConfiguration.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup.configs
 2 | 
 3 | import scala.concurrent.duration.FiniteDuration
 4 | 
 5 | import java.time.temporal.ChronoUnit
 6 | 
 7 | sealed trait TimeConfiguration
 8 | 
 9 | /** Backs up objects/files depending on the timestamp fo the first received Kafka message. Suspending/resuming the
10 |   * backup client will always create a new object/file
11 |   * @param duration
12 |   *   The maximum span of time for each object/file, when this duration is exceeded a new file is created
13 |   */
14 | final case class PeriodFromFirst(duration: FiniteDuration) extends TimeConfiguration
15 | 
16 | /** Backs up objects/files by collecting received Kafka messages into a single time slice based on a
17 |   * [[java.time.temporal.ChronoUnit]]. When suspending/resuming the backup client, this option will reuse existing
18 |   * objects/files if they fall into the currently configured `chronoUnit`.
19 |   * @param chronoUnit
20 |   *   Timestamps for kafka messages that are contained within the configured [[java.time.temporal.ChronoUnit]] will be
21 |   *   placed into the same object/file.
22 |   */
23 | final case class ChronoUnitSlice(chronoUnit: ChronoUnit) extends TimeConfiguration
24 | 


--------------------------------------------------------------------------------
/core-backup/src/test/scala/io/aiven/guardian/kafka/backup/BackupClientControlWrapper.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup
 2 | 
 3 | import org.apache.pekko
 4 | 
 5 | import scala.concurrent.ExecutionContext
 6 | import scala.concurrent.Future
 7 | 
 8 | import pekko.Done
 9 | import pekko.kafka.scaladsl.Consumer
10 | import pekko.stream.Materializer
11 | 
12 | /** A wrapper that is designed to make it easier to cleanly shutdown resources in tests
13 |   */
14 | class BackupClientControlWrapper[T <: KafkaConsumer](backupClient: BackupClientInterface[T])(implicit
15 |     materializer: Materializer,
16 |     ec: ExecutionContext
17 | ) {
18 | 
19 |   private var control: Consumer.DrainingControl[Done] = _
20 | 
21 |   def run(): Unit =
22 |     control = backupClient.backup.run()
23 | 
24 |   @SuppressWarnings(Array("DisableSyntax.null"))
25 |   def shutdown(): Future[Done] =
26 |     if (control != null)
27 |       control.drainAndShutdown()
28 |     else
29 |       Future.successful(Done)
30 | }
31 | 


--------------------------------------------------------------------------------
/core-backup/src/test/scala/io/aiven/guardian/kafka/backup/BackupClientInterfaceSpec.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup
 2 | 
 3 | import io.aiven.guardian.kafka.backup.configs.Compression
 4 | import io.aiven.guardian.pekko.AnyPropTestKit
 5 | import org.apache.pekko.actor.ActorSystem
 6 | 
 7 | class BackupClientInterfaceSpec
 8 |     extends AnyPropTestKit(ActorSystem("BackupClientInterfaceSpec"))
 9 |     with BackupClientInterfaceTest {
10 |   override val compression: Option[Compression] = None
11 | }
12 | 


--------------------------------------------------------------------------------
/core-backup/src/test/scala/io/aiven/guardian/kafka/backup/CompressionSpec.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup
 2 | 
 3 | import io.aiven.guardian.kafka.backup.configs.{Compression => CompressionModel}
 4 | import io.aiven.guardian.kafka.models.Gzip
 5 | import io.aiven.guardian.pekko.AnyPropTestKit
 6 | import io.aiven.guardian.pekko.PekkoStreamTestKit
 7 | import org.apache.pekko
 8 | import org.scalatest.concurrent.ScalaFutures
 9 | import org.scalatest.matchers.must.Matchers
10 | import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks
11 | 
12 | import scala.concurrent.ExecutionContext
13 | import scala.concurrent.duration._
14 | import scala.language.postfixOps
15 | 
16 | import pekko.actor.ActorSystem
17 | import pekko.stream.scaladsl.Compression
18 | import pekko.stream.scaladsl.Source
19 | import pekko.stream.scaladsl.SourceWithContext
20 | import pekko.util.ByteString
21 | 
22 | class CompressionSpec
23 |     extends AnyPropTestKit(ActorSystem("CompressionSpec"))
24 |     with Matchers
25 |     with ScalaFutures
26 |     with ScalaCheckPropertyChecks
27 |     with PekkoStreamTestKit {
28 | 
29 |   implicit val ec: ExecutionContext = system.dispatcher
30 | 
31 |   // Due to pekko-streams taking a while to initialize for the first time we need a longer
32 |   // increase in the timeout
33 |   implicit override val patienceConfig: PatienceConfig = PatienceConfig(10 seconds, 15 millis)
34 | 
35 |   property("GZip compression works with a SourceWithContext/FlowWithContext") { _ =>
36 |     forAll { data: List[String] =>
37 |       val asByteString    = data.map(ByteString.fromString)
38 |       val zippedWithIndex = asByteString.zipWithIndex
39 |       val sourceWithContext = SourceWithContext.fromTuples(
40 |         Source(zippedWithIndex)
41 |       )
42 | 
43 |       val calculatedFuture = for {
44 |         compressed <- sourceWithContext
45 |                         .unsafeDataVia(BackupClientInterface.compressionFlow(CompressionModel(Gzip, None)))
46 |                         .asSource
47 |                         .map { case (byteString, _) => byteString }
48 |                         .runFold(ByteString.empty)(_ ++ _)
49 |         decompressed <- Source.single(compressed).via(Compression.gunzip()).runFold(ByteString.empty)(_ ++ _)
50 |       } yield decompressed
51 | 
52 |       val decompressed = calculatedFuture.futureValue
53 |       data.mkString mustEqual decompressed.utf8String
54 |     }
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/core-backup/src/test/scala/io/aiven/guardian/kafka/backup/ConfigSpec.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup
 2 | 
 3 | import com.typesafe.config.ConfigFactory
 4 | import com.typesafe.config.ConfigValueFactory
 5 | import io.aiven.guardian.kafka.backup.configs.Backup
 6 | import io.aiven.guardian.kafka.backup.configs.ChronoUnitSlice
 7 | import io.aiven.guardian.kafka.backup.configs.PeriodFromFirst
 8 | import io.aiven.guardian.kafka.backup.configs.TimeConfiguration
 9 | import org.scalacheck.Arbitrary
10 | import org.scalacheck.Gen
11 | import org.scalatest.matchers.must.Matchers
12 | import org.scalatest.propspec.AnyPropSpec
13 | import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks
14 | import pureconfig.ConfigSource
15 | import pureconfig.generic.auto._
16 | 
17 | import scala.annotation.nowarn
18 | import scala.concurrent.duration.FiniteDuration
19 | 
20 | import java.time.temporal.ChronoUnit
21 | 
22 | class ConfigSpec extends AnyPropSpec with Matchers with ScalaCheckPropertyChecks {
23 |   implicit val chronoUnitArb: Arbitrary[ChronoUnit] = Arbitrary(
24 |     Gen.oneOf(ChronoUnit.values().toList)
25 |   )
26 | 
27 |   property("Valid TimeConfiguration chrono-unit-slice configs should parse correctly") {
28 |     forAll { (chronoUnit: ChronoUnit) =>
29 |       val conf =
30 |         s"""
31 |            |time-configuration = {
32 |            |    type = chrono-unit-slice
33 |            |    chrono-unit = ${chronoUnit.name.toLowerCase}
34 |            |}
35 |            |""".stripMargin
36 | 
37 |       @nowarn("cat=lint-byname-implicit")
38 |       val backup = ConfigSource.string(conf).at("time-configuration").loadOrThrow[TimeConfiguration]
39 |       backup mustEqual ChronoUnitSlice(chronoUnit)
40 |     }
41 |   }
42 | 
43 |   property("Valid TimeConfiguration period-from-first configs should parse correctly") {
44 |     forAll { (finiteDuration: FiniteDuration) =>
45 |       val conf =
46 |         s"""
47 |            |time-configuration = {
48 |            |    type = period-from-first
49 |            |    duration = ${finiteDuration.toString()}
50 |            |}
51 |            |""".stripMargin
52 | 
53 |       @nowarn("cat=lint-byname-implicit")
54 |       val backup = ConfigSource.string(conf).at("time-configuration").loadOrThrow[TimeConfiguration]
55 |       backup mustEqual PeriodFromFirst(finiteDuration)
56 |     }
57 |   }
58 | 
59 |   property("Default Backup configuration loads") {
60 |     val config = ConfigFactory.load()
61 | 
62 |     // Inject mandatory values that have no default into the configuration
63 |     val configWithMandatoryValues =
64 |       config.withValue("backup.kafka-group-id", ConfigValueFactory.fromAnyRef(MockedBackupClientInterface.KafkaGroupId))
65 | 
66 |     @nowarn("cat=lint-byname-implicit")
67 |     def readConfiguration = ConfigSource.fromConfig(configWithMandatoryValues).at("backup").loadOrThrow[Backup]
68 | 
69 |     noException should be thrownBy readConfiguration
70 |   }
71 | 
72 | }
73 | 


--------------------------------------------------------------------------------
/core-backup/src/test/scala/io/aiven/guardian/kafka/backup/GzipCompressionBackupClientInterfaceSpec.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.backup
 2 | 
 3 | import io.aiven.guardian.kafka.backup.configs.Compression
 4 | import io.aiven.guardian.kafka.models.Gzip
 5 | import io.aiven.guardian.pekko.AnyPropTestKit
 6 | import org.apache.pekko.actor.ActorSystem
 7 | 
 8 | class GzipCompressionBackupClientInterfaceSpec
 9 |     extends AnyPropTestKit(ActorSystem("GzipCompressionBackupClientInterfaceSpec"))
10 |     with BackupClientInterfaceTest {
11 |   override val compression: Option[Compression] = Some(Compression(Gzip, None))
12 | }
13 | 


--------------------------------------------------------------------------------
/core-backup/src/test/scala/io/aiven/guardian/kafka/backup/MockedKafkaConsumerInterface.scala:
--------------------------------------------------------------------------------
  1 | package io.aiven.guardian.kafka.backup
  2 | 
  3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
  4 | import org.apache.pekko
  5 | 
  6 | import scala.collection.immutable
  7 | import scala.concurrent.Future
  8 | import scala.concurrent.duration.FiniteDuration
  9 | 
 10 | import java.time.Instant
 11 | import java.time.temporal.ChronoUnit
 12 | import java.util.concurrent.ConcurrentLinkedDeque
 13 | import java.util.concurrent.atomic.AtomicReference
 14 | 
 15 | import pekko.Done
 16 | import pekko.NotUsed
 17 | import pekko.stream.scaladsl._
 18 | 
 19 | /** A mocked `KafkaClientInterface` that returns a specific data as its source
 20 |   *
 21 |   * @param kafkaData
 22 |   *   The data which the mock will output
 23 |   * @param commitStorage
 24 |   *   A collection that keeps track of whenever a cursor is committed
 25 |   * @param stopAfterDuration
 26 |   *   Dont produce any data from `kafkaData` if its offset is after `stopAfterOffset` based off of the first committed
 27 |   *   [[io.aiven.guardian.kafka.models.ReducedConsumerRecord.timestamp]]. Handy to simulate the premature closing of a
 28 |   *   KafkaClient before its completed producing all source elements (i.e. suspend/resume and restart scenarios).
 29 |   * @param handleOffsets
 30 |   *   Tells the MockedKafkaClientInterface to handleOffsets rather than just ignoring them. This means that the mock
 31 |   *   will only add commits to the `commitStorage` if its later than any currently processed offsets. Furthermore it
 32 |   *   will not replay source data if it has already been committed.
 33 |   */
 34 | class MockedKafkaConsumerInterface(kafkaData: Source[ReducedConsumerRecord, NotUsed],
 35 |                                    commitStorage: ConcurrentLinkedDeque[Long] = new ConcurrentLinkedDeque[Long](),
 36 |                                    stopAfterDuration: Option[FiniteDuration] = None,
 37 |                                    handleOffsets: Boolean = false
 38 | ) extends KafkaConsumerInterface {
 39 | 
 40 |   /** The type of the context to pass around. In context of a Kafka consumer, this typically holds offset data to be
 41 |     * automatically committed
 42 |     */
 43 |   override type CursorContext = Long
 44 | 
 45 |   /** The type that represents how to control the given stream, i.e. if you want to shut it down or add metrics
 46 |     */
 47 |   override type Control = Future[NotUsed]
 48 | 
 49 |   /** The type that represents the result of the `combine` parameter that is supplied to
 50 |     * [[pekko.stream.scaladsl.Source.toMat]]
 51 |     */
 52 |   override type MatCombineResult = Future[NotUsed]
 53 | 
 54 |   /** The type that represents the result of batching a `CursorContext`
 55 |     */
 56 |   override type BatchedCursorContext = Long
 57 | 
 58 |   private val firstReducedConsumerRecord: AtomicReference[ReducedConsumerRecord] =
 59 |     new AtomicReference[ReducedConsumerRecord]()
 60 | 
 61 |   /** @return
 62 |     *   A `SourceWithContext` that returns a Kafka Stream which automatically handles committing of cursors
 63 |     */
 64 |   override def getSource: SourceWithContext[ReducedConsumerRecord, Long, Future[NotUsed]] = {
 65 |     val source = kafkaData
 66 |       .prefixAndTail(1)
 67 |       .flatMapConcat {
 68 |         case (Seq(head), rest) =>
 69 |           firstReducedConsumerRecord.set(head)
 70 |           Source.combine(
 71 |             Source.single(head),
 72 |             rest
 73 |           )(Concat(_))
 74 |         case _ => Source.empty[ReducedConsumerRecord]
 75 |       }
 76 | 
 77 |     val finalSource = if (handleOffsets) {
 78 |       source.filter { reducedConsumerRecord =>
 79 |         (commitStorage.isEmpty || {
 80 |           reducedConsumerRecord.offset > commitStorage.getLast
 81 |         }) && {
 82 |           (stopAfterDuration, Option(firstReducedConsumerRecord.get())) match {
 83 |             case (Some(afterDuration), Some(firstRecord)) =>
 84 |               val difference =
 85 |                 ChronoUnit.MILLIS.between(Instant.ofEpochMilli(firstRecord.timestamp),
 86 |                                           Instant.ofEpochMilli(reducedConsumerRecord.timestamp)
 87 |                 )
 88 |               afterDuration.toMillis > difference
 89 |             case _ => true
 90 |           }
 91 |         }
 92 |       }
 93 |     } else
 94 |       source
 95 | 
 96 |     SourceWithContext
 97 |       .fromTuples(finalSource.map { reducedConsumerRecord =>
 98 |         (reducedConsumerRecord, reducedConsumerRecord.offset)
 99 |       })
100 |       .mapMaterializedValue(Future.successful)
101 |   }
102 | 
103 |   /** @return
104 |     *   The result of this function gets directly passed into the `combine` parameter of
105 |     *   [[pekko.stream.scaladsl.Source.toMat]]
106 |     */
107 |   override def matCombine: (Future[NotUsed], Future[Done]) => Future[NotUsed] = Keep.left
108 | 
109 |   /** @return
110 |     *   A `Sink` that allows you to commit a `CursorContext` to Kafka to signify you have processed a message
111 |     */
112 |   override def commitCursor: Sink[Long, Future[Done]] = Sink.foreach { cursor =>
113 |     if (handleOffsets && !commitStorage.isEmpty) {
114 |       if (commitStorage.getLast < cursor)
115 |         commitStorage.add(cursor)
116 |     } else
117 |       commitStorage.add(cursor)
118 |   }
119 | 
120 |   /** How to batch an immutable iterable of `CursorContext` into a `BatchedCursorContext`
121 |     * @param cursors
122 |     *   The cursors that need to be batched
123 |     * @return
124 |     *   A collection data structure that represents the batched cursors
125 |     */
126 |   override def batchCursorContext(cursors: immutable.Iterable[Long]): Long = cursors.max
127 | 
128 | }
129 | 


--------------------------------------------------------------------------------
/core-cli/src/main/resources/application.conf:
--------------------------------------------------------------------------------
1 | pekko {
2 |   loggers = ["org.apache.pekko.event.slf4j.Slf4jLogger"]
3 |   loglevel = "INFO"
4 |   logging-filter = "org.apache.pekko.event.slf4j.Slf4jLoggingFilter"
5 | }
6 | 


--------------------------------------------------------------------------------
/core-cli/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 | 
 3 |     <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 4 |         <encoder>
 5 |             <pattern>[%highlight(%-5level)] %d{HH:mm:ss.SSS} %logger{0} - %msg%n</pattern>
 6 |         </encoder>
 7 |     </appender>
 8 | 
 9 |     <appender name="ASYNCSTDOUT" class="ch.qos.logback.classic.AsyncAppender">
10 |         <appender-ref ref="STDOUT" />
11 |     </appender>
12 | 
13 |     <root level="INFO">
14 |         <appender-ref ref="ASYNCSTDOUT" />
15 |     </root>
16 | 
17 | </configuration>
18 | 


--------------------------------------------------------------------------------
/core-cli/src/main/scala/io/aiven/guardian/cli/MainUtils.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.cli
 2 | 
 3 | import ch.qos.logback.classic.joran.JoranConfigurator
 4 | import ch.qos.logback.core.Context
 5 | import org.slf4j.ILoggerFactory
 6 | 
 7 | import scala.concurrent.ExecutionContext
 8 | import scala.concurrent.Future
 9 | import scala.concurrent.Promise
10 | import scala.concurrent.blocking
11 | import scala.io.StdIn
12 | import scala.util.Failure
13 | import scala.util.Success
14 | import scala.util.Using
15 | 
16 | import java.nio.file.Files
17 | import java.nio.file.Path
18 | 
19 | object MainUtils {
20 | 
21 |   /** Hook that lets the user specify the future that will signal the shutdown of the server whenever completed. Adapted
22 |     * from
23 |     * https://github.com/apache/incubator-pekko-http/blob/94d1b1c153cc39216dae4217fd0e927f04d53cd2/http/src/main/scala/org/apache/pekko/http/scaladsl/server/HttpApp.scala#L164-L176
24 |     */
25 |   @SuppressWarnings(
26 |     Array(
27 |       "scalafix:DisableSyntax.null"
28 |     )
29 |   )
30 |   def waitForShutdownSignal(promise: Promise[Unit] = Promise[Unit]())(implicit ec: ExecutionContext): Future[Unit] = {
31 |     sys.addShutdownHook {
32 |       promise.trySuccess(())
33 |     }
34 |     Future {
35 |       blocking {
36 |         if (StdIn.readLine("Press RETURN to stop...\n") != null)
37 |           promise.trySuccess(())
38 |       }
39 |     }
40 |     promise.future
41 |   }
42 | 
43 |   /** Allows you to override the default logback.xml file with a custom one
44 |     * @see
45 |     *   https://stackoverflow.com/a/21886322/1519631
46 |     */
47 |   def setLogbackFile(path: Path, loggerContext: ILoggerFactory): Unit =
48 |     Using(Files.newInputStream(path)) { inputStream =>
49 |       val configurator = new JoranConfigurator
50 |       configurator.setContext(loggerContext.asInstanceOf[Context])
51 |       configurator.doConfigure(inputStream)
52 |     } match {
53 |       case Failure(exception) => throw exception
54 |       case Success(value)     => value
55 |     }
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/core-cli/src/main/scala/io/aiven/guardian/cli/PekkoSettings.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.cli
 2 | 
 3 | import org.apache.pekko.actor.ActorSystem
 4 | 
 5 | import scala.concurrent.ExecutionContext
 6 | 
 7 | trait PekkoSettings {
 8 |   implicit val actorSystem: ActorSystem           = ActorSystem()
 9 |   implicit val executionContext: ExecutionContext = ExecutionContext.global
10 | }
11 | 


--------------------------------------------------------------------------------
/core-cli/src/main/scala/io/aiven/guardian/cli/arguments/PropertiesOpt.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.cli.arguments
 2 | 
 3 | import cats.data.ValidatedNel
 4 | import cats.implicits._
 5 | import com.monovore.decline.Argument
 6 | 
 7 | import scala.util.Failure
 8 | import scala.util.Success
 9 | import scala.util.Using
10 | 
11 | import java.io.BufferedReader
12 | import java.io.FileNotFoundException
13 | import java.io.FileReader
14 | import java.util.Properties
15 | 
16 | object PropertiesOpt {
17 |   implicit val propertiesArgument: Argument[Properties] = new Argument[Properties] {
18 |     override def read(string: String): ValidatedNel[String, Properties] = {
19 |       val prop = new Properties()
20 |       Using(new BufferedReader(new FileReader(string))) { reader =>
21 |         prop.load(reader)
22 |       } match {
23 |         case Failure(_: FileNotFoundException) =>
24 |           s"Properties file at path $string does not exist".invalidNel
25 |         case Failure(_) =>
26 |           s"Unable to read file at path $string".invalidNel
27 |         case Success(_) => prop.validNel
28 |       }
29 |     }
30 | 
31 |     override def defaultMetavar: String = "path"
32 |   }
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/core-cli/src/main/scala/io/aiven/guardian/cli/arguments/StorageOpt.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.cli.arguments
 2 | 
 3 | import cats.data.ValidatedNel
 4 | import cats.implicits._
 5 | import com.monovore.decline.Argument
 6 | import enumeratum._
 7 | 
 8 | sealed trait StorageOpt extends EnumEntry with EnumEntry.Lowercase
 9 | 
10 | object StorageOpt extends Enum[StorageOpt] {
11 |   case object S3 extends StorageOpt
12 | 
13 |   val values: IndexedSeq[StorageOpt] = findValues
14 | 
15 |   implicit val storageArgument: Argument[StorageOpt] = new Argument[StorageOpt] {
16 |     override def read(string: String): ValidatedNel[String, StorageOpt] =
17 |       StorageOpt.withNameOption(string).toValidNel("Invalid Storage Argument")
18 | 
19 |     override def defaultMetavar: String = "storage"
20 |   }
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/core-cli/src/main/scala/io/aiven/guardian/cli/options/Options.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.cli.options
 2 | 
 3 | import cats.data.NonEmptyList
 4 | import cats.implicits._
 5 | import com.monovore.decline.Opts
 6 | import com.typesafe.config.ConfigException.Missing
 7 | import com.typesafe.config.ConfigFactory
 8 | import io.aiven.guardian.cli.arguments.StorageOpt
 9 | import io.aiven.guardian.kafka.configs.KafkaCluster
10 | import pureconfig.error.ConfigReaderException
11 | 
12 | import java.nio.file.Path
13 | 
14 | trait Options {
15 |   val storageOpt: Opts[StorageOpt] =
16 |     Opts.option[StorageOpt]("storage", help = "Which type of storage to persist kafka topics")
17 | 
18 |   val dataBucketOpt: Opts[Option[String]] =
19 |     Opts.option[String]("s3-data-bucket", help = "S3 Bucket for storage of main backup data").orNone
20 | 
21 |   val topicsOpt: Opts[Option[NonEmptyList[String]]] =
22 |     Opts.options[String]("kafka-topics", help = "Kafka topics to operate on").orNone
23 | 
24 |   val bootstrapServersOpt: Opts[Option[NonEmptyList[String]]] =
25 |     Opts.options[String]("kafka-bootstrap-servers", help = "Kafka bootstrap servers").orNone
26 | 
27 |   val logbackFileOpt: Opts[Option[Path]] =
28 |     Opts.option[Path]("logback-file", help = "Specify logback.xml configuration to override default").orNone
29 | 
30 |   def optionalPureConfigValue[T](value: () => T): Option[T] =
31 |     try Some(value())
32 |     catch {
33 |       case _: ConfigReaderException[_] =>
34 |         None
35 |     }
36 | 
37 |   @SuppressWarnings(
38 |     Array(
39 |       "scalafix:DisableSyntax.null"
40 |     )
41 |   )
42 |   def checkConfigKeyIsDefined(path: String): Boolean =
43 |     try ConfigFactory.load().getAnyRef(path) != null
44 |     catch {
45 |       case _: Missing => false
46 |     }
47 | 
48 |   val kafkaClusterOpt: Opts[KafkaCluster] = topicsOpt.mapValidated { topics =>
49 |     import io.aiven.guardian.kafka.{Config => KafkaConfig}
50 |     topics match {
51 |       case Some(value) =>
52 |         KafkaCluster(value.toList.toSet).validNel
53 |       case None if KafkaConfig.kafkaClusterConfig.topics.nonEmpty => KafkaConfig.kafkaClusterConfig.validNel
54 |       case _ =>
55 |         "kafka-topics is a mandatory value that needs to be configured".invalidNel
56 |     }
57 |   }
58 | 
59 | }
60 | 
61 | object Options extends Options
62 | 


--------------------------------------------------------------------------------
/core-compaction/src/main/scala/io/aiven/guardian/kafka/compaction/DatabaseInterface.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.compaction
 2 | 
 3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
 4 | import org.apache.pekko
 5 | 
 6 | import scala.concurrent.Future
 7 | 
 8 | import pekko.NotUsed
 9 | import pekko.stream.javadsl.Flow
10 | import pekko.stream.scaladsl.Source
11 | import pekko.util.ByteString
12 | 
13 | trait DatabaseInterface {
14 | 
15 |   /** Given a source of storage where Kafka messages are contained, stream it into a database.
16 |     * @param kafkaStorageSource
17 |     * @param encodeKafkaRowToByteString
18 |     * @return
19 |     *   Number of rows updated
20 |     */
21 |   def streamInsert(kafkaStorageSource: Source[ReducedConsumerRecord, NotUsed],
22 |                    encodeKafkaRowToByteString: Flow[ReducedConsumerRecord, ByteString, NotUsed]
23 |   ): Future[Long]
24 | }
25 | 


--------------------------------------------------------------------------------
/core-compaction/src/main/scala/io/aiven/guardian/kafka/compaction/PostgresJDBCDatabase.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.compaction
 2 | 
 3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
 4 | import org.apache.pekko
 5 | import org.postgresql.copy.CopyManager
 6 | import org.postgresql.core.BaseConnection
 7 | 
 8 | import scala.concurrent.ExecutionContext
 9 | import scala.concurrent.Future
10 | import scala.concurrent.blocking
11 | 
12 | import java.sql.Connection
13 | 
14 | import pekko.NotUsed
15 | import pekko.stream.ActorAttributes
16 | import pekko.stream.Materializer
17 | import pekko.stream.javadsl.Flow
18 | import pekko.stream.scaladsl.Source
19 | import pekko.stream.scaladsl.StreamConverters
20 | import pekko.util.ByteString
21 | 
22 | /** A Postgres Database backed by JDBC which uses the Postgres COPY command to insert data into the database. Note that
23 |   * since this uses JDBC and CopyManager, its implementation is blocking under the hood.
24 |   * @param scheduler
25 |   * @param materializer
26 |   * @param conn
27 |   */
28 | class PostgresJDBCDatabase()(implicit executionContext: ExecutionContext, materializer: Materializer, conn: Connection)
29 |     extends DatabaseInterface {
30 | 
31 |   /** Inserts data into a Postgres Database using the COPY method (see
32 |     * https://www.postgresql.org/docs/9.4/sql-copy.html). This means the data insertion is buffered and also extremely
33 |     * fast since it bypasses internal parts of the Postgres engine which are not necessary.
34 |     *
35 |     * Since it uses JDBC plus `java.io.InputStream` under the hood, the operation is inherently blocking even though it
36 |     * returns a `scala.concurrent.Future`. Due to this we have used blocking IO dispatchers to avoid problems that are
37 |     * typical of blocking IO
38 |     *
39 |     * @return
40 |     *   Number of rows updated
41 |     */
42 |   override def streamInsert(kafkaStorageSource: Source[ReducedConsumerRecord, NotUsed],
43 |                             encodeKafkaRowToByteString: Flow[ReducedConsumerRecord, ByteString, NotUsed]
44 |   ): Future[Long] = {
45 |     // TODO implement SQL query
46 |     val sql = """"""
47 | 
48 |     // Since this is blocking IO we use a custom dispatcher dealt to handle with this
49 |     val sink = StreamConverters
50 |       .asInputStream()
51 |       .withAttributes(ActorAttributes.dispatcher(ActorAttributes.IODispatcher.dispatcher))
52 | 
53 |     val postgresSource = kafkaStorageSource.via(encodeKafkaRowToByteString)
54 | 
55 |     blocking(Future {
56 |       postgresSource.runWith(
57 |         sink.mapMaterializedValue(inputStream =>
58 |           new CopyManager(conn.asInstanceOf[BaseConnection]).copyIn(
59 |             sql,
60 |             inputStream
61 |           )
62 |         )
63 |       )
64 |     })
65 |   }
66 | 
67 | }
68 | 


--------------------------------------------------------------------------------
/core-compaction/src/main/scala/io/aiven/guardian/kafka/compaction/StorageInterface.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.compaction
 2 | 
 3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
 4 | import org.apache.pekko
 5 | 
 6 | import pekko.NotUsed
 7 | import pekko.stream.scaladsl.Source
 8 | 
 9 | trait StorageInterface {
10 | 
11 |   /** Retrieve Kafka data from a given storage source
12 |     * @return
13 |     */
14 |   def retrieveKafkaData: Source[ReducedConsumerRecord, NotUsed]
15 | 
16 |   /** Checks whether the storage exists and is accessible
17 |     */
18 |   def checkStorageAccessible: Source[Boolean, NotUsed]
19 | }
20 | 


--------------------------------------------------------------------------------
/core-gcs/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
1 | gcs-config = {
2 |     data-bucket = ${?GCS_CONFIG_DATA_BUCKET}
3 |     compaction-bucket = ${?GCS_CONFIG_COMPACTION_BUCKET}
4 | }
5 | 


--------------------------------------------------------------------------------
/core-gcs/src/main/scala/io/aiven/guardian/kafka/gcs/Config.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.gcs
 2 | 
 3 | import io.aiven.guardian.kafka.gcs.configs.GCS
 4 | import pureconfig._
 5 | import pureconfig.generic.auto._
 6 | 
 7 | import scala.annotation.nowarn
 8 | 
 9 | trait Config {
10 |   @nowarn("cat=lint-byname-implicit")
11 |   implicit lazy val gcsConfig: GCS =
12 |     ConfigSource.default.at("gcs-config").loadOrThrow[GCS]
13 | }
14 | 
15 | object Config extends Config
16 | 


--------------------------------------------------------------------------------
/core-gcs/src/main/scala/io/aiven/guardian/kafka/gcs/configs/GCS.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.gcs.configs
 2 | 
 3 | /** GCS specific configuration used when storing Kafka ConsumerRecords to a GCS bucket
 4 |   * @param dataBucket
 5 |   *   The bucket where a Kafka Consumer directly streams data into as storage
 6 |   * @param compactionBucket
 7 |   *   The bucket where compaction results are stored
 8 |   */
 9 | final case class GCS(dataBucket: String, compactionBucket: String)
10 | 


--------------------------------------------------------------------------------
/core-gcs/src/main/scala/io/aiven/guardian/kafka/gcs/errors/GCSErrors.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.gcs.errors
 2 | 
 3 | import io.aiven.guardian.kafka.Errors
 4 | 
 5 | sealed abstract class GCSErrors extends Errors
 6 | 
 7 | object GCSErrors {
 8 |   final case class ExpectedObjectToExist(bucketName: String, maybePrefix: Option[String]) extends GCSErrors {
 9 |     override def getMessage: String =
10 |       ???
11 |   }
12 | 
13 | }
14 | 


--------------------------------------------------------------------------------
/core-restore/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
 1 | pekko.kafka.producer {
 2 |     discovery-method = ${?PEKKO_KAFKA_PRODUCER_DISCOVERY_METHOD}
 3 |     service-name = ${?PEKKO_KAFKA_PRODUCER_SERVICE_NAME}
 4 |     resolve-timeout = ${?PEKKO_KAFKA_PRODUCER_RESOLVE_TIMEOUT}
 5 |     parallelism = ${?PEKKO_KAFKA_PRODUCER_PARALLELISM}
 6 |     close-timeout = ${?PEKKO_KAFKA_PRODUCER_CLOSE_TIMEOUT}
 7 |     close-on-producer-stop = ${?PEKKO_KAFKA_PRODUCER_CLOSE_ON_PRODUCER_STOP}
 8 |     use-dispatcher = ${?PEKKO_KAFKA_PRODUCER_USE_DISPATCHER}
 9 |     eos-commit-interval = ${?PEKKO_KAFKA_PRODUCER_EOS_COMMIT_INTERVAL}
10 | }
11 | 
12 | restore {
13 |     from-when = ${?RESTORE_FROM_WHEN}
14 |     override-topics = ${?RESTORE_OVERRIDE_TOPICS}
15 | }
16 | 


--------------------------------------------------------------------------------
/core-restore/src/main/scala/io/aiven/guardian/kafka/restore/Config.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.restore
 2 | 
 3 | import io.aiven.guardian.kafka.restore.configs.Restore
 4 | import pureconfig._
 5 | import pureconfig.configurable._
 6 | import pureconfig.generic.auto._
 7 | 
 8 | import scala.annotation.nowarn
 9 | 
10 | import java.time.OffsetDateTime
11 | import java.time.format.DateTimeFormatter
12 | 
13 | trait Config {
14 |   implicit val localDateConvert: ConfigConvert[OffsetDateTime] = offsetDateTimeConfigConvert(
15 |     DateTimeFormatter.ISO_OFFSET_DATE_TIME
16 |   )
17 | 
18 |   @nowarn("cat=lint-byname-implicit")
19 |   implicit lazy val restoreConfig: Restore = ConfigSource.default.at("restore").loadOrThrow[Restore]
20 | }
21 | 
22 | object Config extends Config
23 | 


--------------------------------------------------------------------------------
/core-restore/src/main/scala/io/aiven/guardian/kafka/restore/KafkaProducer.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.restore
 2 | 
 3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
 4 | import io.aiven.guardian.kafka.restore.configs.Restore
 5 | import org.apache.kafka.clients.producer.ProducerRecord
 6 | import org.apache.kafka.common.serialization.ByteArraySerializer
 7 | import org.apache.pekko
 8 | 
 9 | import scala.concurrent.Future
10 | 
11 | import java.util.Base64
12 | 
13 | import pekko.Done
14 | import pekko.actor.ActorSystem
15 | import pekko.kafka.ProducerSettings
16 | import pekko.kafka.scaladsl.Producer
17 | import pekko.stream.scaladsl.Sink
18 | 
19 | class KafkaProducer(
20 |     configureProducer: Option[
21 |       ProducerSettings[Array[Byte], Array[Byte]] => ProducerSettings[Array[Byte], Array[Byte]]
22 |     ] = None
23 | )(implicit system: ActorSystem, restoreConfig: Restore)
24 |     extends KafkaProducerInterface {
25 | 
26 |   private[kafka] val producerSettings = {
27 |     val base = ProducerSettings(system, new ByteArraySerializer, new ByteArraySerializer)
28 |     configureProducer
29 |       .fold(base)(block => block(base))
30 |   }
31 | 
32 |   override def getSink: Sink[ReducedConsumerRecord, Future[Done]] =
33 |     Producer.plainSink(producerSettings).contramap[ReducedConsumerRecord] { reducedConsumerRecord =>
34 |       val topic = restoreConfig.overrideTopics match {
35 |         case Some(map) =>
36 |           map.getOrElse(reducedConsumerRecord.topic, reducedConsumerRecord.topic)
37 |         case None => reducedConsumerRecord.topic
38 |       }
39 |       val valueAsByteArray = Base64.getDecoder.decode(reducedConsumerRecord.value)
40 |       reducedConsumerRecord.key match {
41 |         case Some(key) =>
42 |           new ProducerRecord[Array[Byte], Array[Byte]](
43 |             topic,
44 |             Base64.getDecoder.decode(key),
45 |             valueAsByteArray
46 |           )
47 |         case None =>
48 |           new ProducerRecord[Array[Byte], Array[Byte]](
49 |             topic,
50 |             valueAsByteArray
51 |           )
52 |       }
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/core-restore/src/main/scala/io/aiven/guardian/kafka/restore/KafkaProducerInterface.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.restore
 2 | 
 3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
 4 | import org.apache.pekko
 5 | 
 6 | import scala.concurrent.Future
 7 | 
 8 | import pekko.Done
 9 | import pekko.stream.scaladsl.Sink
10 | 
11 | trait KafkaProducerInterface {
12 |   def getSink: Sink[ReducedConsumerRecord, Future[Done]]
13 | }
14 | 


--------------------------------------------------------------------------------
/core-restore/src/main/scala/io/aiven/guardian/kafka/restore/RestoreClientInterface.scala:
--------------------------------------------------------------------------------
  1 | package io.aiven.guardian.kafka.restore
  2 | 
  3 | import com.typesafe.scalalogging.LazyLogging
  4 | import io.aiven.guardian.kafka.ExtensionsMethods._
  5 | import io.aiven.guardian.kafka.Utils
  6 | import io.aiven.guardian.kafka.codecs.Circe._
  7 | import io.aiven.guardian.kafka.configs.KafkaCluster
  8 | import io.aiven.guardian.kafka.models.BackupObjectMetadata
  9 | import io.aiven.guardian.kafka.models.Gzip
 10 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
 11 | import io.aiven.guardian.kafka.restore.configs.Restore
 12 | import org.apache.pekko
 13 | import org.mdedetrich.pekko.stream.support.CirceStreamSupport
 14 | import org.typelevel.jawn.AsyncParser
 15 | 
 16 | import scala.concurrent.ExecutionContext
 17 | import scala.concurrent.Future
 18 | 
 19 | import java.time.OffsetDateTime
 20 | 
 21 | import pekko.Done
 22 | import pekko.NotUsed
 23 | import pekko.actor.ActorSystem
 24 | import pekko.stream.Attributes
 25 | import pekko.stream.KillSwitches
 26 | import pekko.stream.UniqueKillSwitch
 27 | import pekko.stream.scaladsl.Compression
 28 | import pekko.stream.scaladsl.Concat
 29 | import pekko.stream.scaladsl.Flow
 30 | import pekko.stream.scaladsl.Keep
 31 | import pekko.stream.scaladsl.RunnableGraph
 32 | import pekko.stream.scaladsl.Source
 33 | import pekko.util.ByteString
 34 | 
 35 | trait RestoreClientInterface[T <: KafkaProducerInterface] extends LazyLogging {
 36 |   implicit val kafkaProducerInterface: T
 37 |   implicit val restoreConfig: Restore
 38 |   implicit val kafkaClusterConfig: KafkaCluster
 39 |   implicit val system: ActorSystem
 40 |   val maybeAttributes: Option[Attributes] = None
 41 | 
 42 |   def retrieveBackupKeys: Future[List[String]]
 43 | 
 44 |   def downloadFlow: Flow[String, ByteString, NotUsed]
 45 | 
 46 |   private[kafka] def keysWithOffsetDateTime(keys: List[String]): List[(String, OffsetDateTime)] = keys.map { key =>
 47 |     (key, Utils.keyToOffsetDateTime(key))
 48 |   }
 49 | 
 50 |   private[kafka] def finalKeys: Future[List[String]] = {
 51 |     implicit val ec: ExecutionContext = system.dispatcher
 52 | 
 53 |     for {
 54 |       backupKeys <- retrieveBackupKeys
 55 |       withTime = keysWithOffsetDateTime(backupKeys)
 56 |       sorted = withTime.sortBy { case (_, time) =>
 57 |                  time
 58 |                }
 59 | 
 60 |       latest = restoreConfig.fromWhen match {
 61 |                  case Some(pickedDate) =>
 62 |                    val index = sorted.indexWhere { case (_, time) =>
 63 |                      time >= pickedDate
 64 |                    }
 65 |                    index match {
 66 |                      case 0 => sorted
 67 |                      case -1 =>
 68 |                        sorted.lastOption match {
 69 |                          case Some((key, value)) =>
 70 |                            // Its still technically possible that the last key can contain a picked value.
 71 |                            List((key, value))
 72 |                          case _ => List.empty
 73 |                        }
 74 |                      case index =>
 75 |                        val (_, rest) = sorted.splitAt(index - 1)
 76 |                        rest
 77 |                    }
 78 |                  case None => sorted
 79 |                }
 80 |     } yield latest.map { case (key, _) => key }
 81 |   }
 82 | 
 83 |   private[kafka] def checkTopicInConfig(reducedConsumerRecord: ReducedConsumerRecord): Boolean =
 84 |     kafkaClusterConfig.topics.contains(reducedConsumerRecord.topic)
 85 | 
 86 |   private[kafka] def checkTopicGreaterThanTime(reducedConsumerRecord: ReducedConsumerRecord): Boolean =
 87 |     restoreConfig.fromWhen match {
 88 |       case Some(pickedDate) =>
 89 |         reducedConsumerRecord.toOffsetDateTime >= pickedDate
 90 |       case None => true
 91 |     }
 92 | 
 93 |   private[kafka] def restoreKey(key: String): Source[ByteString, NotUsed] = {
 94 |     val source = Source
 95 |       .single(key)
 96 |       .via(downloadFlow)
 97 | 
 98 |     BackupObjectMetadata.fromKey(key).compression match {
 99 |       case Some(Gzip) => source.via(Compression.gunzip())
100 |       case None       => source
101 |     }
102 |   }
103 | 
104 |   def restore: RunnableGraph[(UniqueKillSwitch, Future[Done])] = {
105 |     val sourceWithCompression = Source.future(finalKeys).flatMapConcat { keys =>
106 |       keys.map(key => restoreKey(key)) match {
107 |         case first :: Nil            => first
108 |         case first :: second :: Nil  => Source.combine(first, second)(Concat(_))
109 |         case first :: second :: rest => Source.combine(first, second, rest: _*)(Concat(_))
110 |         case Nil                     => Source.empty[ByteString]
111 |       }
112 |     }
113 | 
114 |     val asReducedConsumerRecord = sourceWithCompression
115 |       .via(CirceStreamSupport.decode[Option[ReducedConsumerRecord]](AsyncParser.UnwrapArray, multiValue = true))
116 |       .collect {
117 |         case Some(reducedConsumerRecord)
118 |             if checkTopicInConfig(reducedConsumerRecord) && checkTopicGreaterThanTime(reducedConsumerRecord) =>
119 |           reducedConsumerRecord
120 |       }
121 | 
122 |     asReducedConsumerRecord.viaMat(KillSwitches.single)(Keep.right).toMat(kafkaProducerInterface.getSink)(Keep.both)
123 |   }
124 | 
125 | }
126 | 


--------------------------------------------------------------------------------
/core-restore/src/main/scala/io/aiven/guardian/kafka/restore/configs/Restore.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.restore.configs
 2 | 
 3 | import java.time.OffsetDateTime
 4 | 
 5 | /** @param fromWhen
 6 |   *   An optional datetime which only restores topics are are after or equal to that date
 7 |   * @param overrideTopics
 8 |   *   An optional map that allows you to translate topics that are backed up to a new topic name in the destination
 9 |   *   Kafka cluster. The key is the backed up topic name and the value is the new topic name. If this map doesn't
10 |   *   contain a key for a topic then its backed up as normal.
11 |   */
12 | final case class Restore(fromWhen: Option[OffsetDateTime], overrideTopics: Option[Map[String, String]])
13 | 
14 | object Restore {
15 |   def empty: Restore = Restore(None, None)
16 | }
17 | 


--------------------------------------------------------------------------------
/core-restore/src/test/scala/io/aiven/guardian/kafka/restore/ConfigSpec.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.restore
 2 | 
 3 | import io.aiven.guardian.kafka.Generators.kafkaTopic
 4 | import io.aiven.guardian.kafka.restore.configs.Restore
 5 | import org.scalacheck.Gen
 6 | import org.scalacheck.ops.time.ImplicitJavaTimeGenerators._
 7 | import org.scalatest.matchers.must.Matchers
 8 | import org.scalatest.propspec.AnyPropSpec
 9 | import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks
10 | import pureconfig._
11 | import pureconfig.configurable._
12 | import pureconfig.generic.auto._
13 | 
14 | import scala.annotation.nowarn
15 | 
16 | import java.time.OffsetDateTime
17 | import java.time.format.DateTimeFormatter
18 | 
19 | class ConfigSpec extends AnyPropSpec with Matchers with ScalaCheckPropertyChecks {
20 |   implicit val localDateConvert: ConfigConvert[OffsetDateTime] = offsetDateTimeConfigConvert(
21 |     DateTimeFormatter.ISO_OFFSET_DATE_TIME
22 |   )
23 | 
24 |   property("Valid Restore configs should parse correctly") {
25 |     val overrideMapGen = for {
26 |       size   <- Gen.choose(1, 10)
27 |       keys   <- Gen.containerOfN[Set, String](size, kafkaTopic)
28 |       values <- Gen.containerOfN[Set, String](size, kafkaTopic)
29 |     } yield keys.zip(values).toMap
30 | 
31 |     val offsetDateTimeGen = arbZonedDateTime.arbitrary.map(_.toOffsetDateTime)
32 | 
33 |     forAll(offsetDateTimeGen, overrideMapGen) { (fromWhen: OffsetDateTime, overrideTopics: Map[String, String]) =>
34 |       val topics = overrideTopics
35 |         .map { case (key, value) =>
36 |           val k = "\"" + key + "\""
37 |           val v = "\"" + value + "\""
38 |           s"$k=$v"
39 |         }
40 |         .mkString("", "\n      ", "")
41 | 
42 |       val conf = s"""
43 |            |restore {
44 |            |    from-when = "${fromWhen.toString}"
45 |            |    override-topics = {
46 |            |      $topics
47 |            |    }
48 |            |}
49 |            |""".stripMargin
50 | 
51 |       @nowarn("cat=lint-byname-implicit")
52 |       val restore = ConfigSource.string(conf).at("restore").loadOrThrow[Restore]
53 |       restore mustEqual Restore(Some(fromWhen), Some(overrideTopics))
54 |     }
55 |   }
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/core-restore/src/test/scala/io/aiven/guardian/kafka/restore/GzipCompressionRestoreClientInterfaceSpec.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.restore
 2 | 
 3 | import io.aiven.guardian.kafka.backup.configs.Compression
 4 | import io.aiven.guardian.kafka.models.Gzip
 5 | import io.aiven.guardian.pekko.AnyPropTestKit
 6 | import org.apache.pekko.actor.ActorSystem
 7 | 
 8 | class GzipCompressionRestoreClientInterfaceSpec
 9 |     extends AnyPropTestKit(ActorSystem("GzipCompressionRestoreClientInterfaceSpec"))
10 |     with RestoreClientInterfaceTest {
11 |   override val compression: Option[Compression] = Some(Compression(Gzip, None))
12 | }
13 | 


--------------------------------------------------------------------------------
/core-restore/src/test/scala/io/aiven/guardian/kafka/restore/MockedKafkaProducerInterface.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.restore
 2 | 
 3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
 4 | import org.apache.pekko
 5 | 
 6 | import scala.concurrent.Future
 7 | 
 8 | import java.util.concurrent.ConcurrentLinkedQueue
 9 | 
10 | import pekko.Done
11 | import pekko.stream.scaladsl.Sink
12 | 
13 | class MockedKafkaProducerInterface() extends KafkaProducerInterface {
14 |   val producedData: ConcurrentLinkedQueue[ReducedConsumerRecord] = new ConcurrentLinkedQueue[ReducedConsumerRecord]()
15 | 
16 |   override def getSink: Sink[ReducedConsumerRecord, Future[Done]] =
17 |     Sink.foreach[ReducedConsumerRecord] { reducedConsumerRecord =>
18 |       producedData.add(reducedConsumerRecord)
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/core-restore/src/test/scala/io/aiven/guardian/kafka/restore/MockedRestoreClientInterface.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.restore
 2 | 
 3 | import io.aiven.guardian.kafka.configs.KafkaCluster
 4 | import io.aiven.guardian.kafka.restore.configs.Restore
 5 | import org.apache.pekko
 6 | 
 7 | import scala.concurrent.Future
 8 | 
 9 | import pekko.NotUsed
10 | import pekko.actor.ActorSystem
11 | import pekko.stream.scaladsl.Flow
12 | import pekko.util.ByteString
13 | 
14 | class MockedRestoreClientInterface(backupData: Map[String, ByteString])(implicit
15 |     override val kafkaProducerInterface: MockedKafkaProducerInterface,
16 |     override val restoreConfig: Restore,
17 |     override val kafkaClusterConfig: KafkaCluster,
18 |     override val system: ActorSystem
19 | ) extends RestoreClientInterface[MockedKafkaProducerInterface] {
20 | 
21 |   override def retrieveBackupKeys: Future[List[String]] = Future.successful(
22 |     backupData.keys.toList
23 |   )
24 | 
25 |   override def downloadFlow: Flow[String, ByteString, NotUsed] = Flow.fromFunction { key: String =>
26 |     backupData(key)
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/core-restore/src/test/scala/io/aiven/guardian/kafka/restore/RestoreClientInterfaceSpec.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.restore
 2 | 
 3 | import io.aiven.guardian.kafka.backup.configs.Compression
 4 | import io.aiven.guardian.pekko.AnyPropTestKit
 5 | import org.apache.pekko.actor.ActorSystem
 6 | 
 7 | class RestoreClientInterfaceSpec
 8 |     extends AnyPropTestKit(ActorSystem("RestoreClientInterfaceSpec"))
 9 |     with RestoreClientInterfaceTest {
10 |   override val compression: Option[Compression] = None
11 | }
12 | 


--------------------------------------------------------------------------------
/core-s3/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
 1 | pekko.connectors.s3 {
 2 |   buffer = ${?PEKKO_CONNECTORS_S3_BUFFER}
 3 |   disk-buffer-path = ${?PEKKO_CONNECTORS_S3_DISK_BUFFER_PATH}
 4 | 
 5 |   forward-proxy {
 6 |     scheme = ${?PEKKO_CONNECTORS_S3_FORWARD_PROXY_SCHEME}
 7 |     host = ${?PEKKO_CONNECTORS_S3_FORWARD_PROXY_HOST}
 8 |     port = ${?PEKKO_CONNECTORS_S3_FORWARD_PROXY_PORT}
 9 | 
10 |     credentials {
11 |       username = ${?PEKKO_CONNECTORS_S3_FORWARD_PROXY_CREDENTIALS_USERNAME}
12 |       password = ${?PEKKO_CONNECTORS_S3_FORWARD_PROXY_CREDENTIALS_PASSWORD}
13 |     }
14 |   }
15 | 
16 |   aws {
17 |     credentials {
18 |       access-key-id = ${?PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_ACCESS_KEY_ID}
19 |       secret-access-key = ${?PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_SECRET_ACCESS_KEY}
20 |       token = ${?PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_TOKEN}
21 |       provider = ${?PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_PROVIDER}
22 |     }
23 | 
24 |     region {
25 |       default-region = ${?PEKKO_CONNECTORS_S3_REGION_DEFAULT_REGION}
26 |       provider = ${?PEKKO_CONNECTORS_S3_REGION_PROVIDER}
27 |     }
28 |   }
29 | 
30 |   path-style-access = ${?PEKKO_CONNECTORS_S3_PATH_STYLE_ACCESS}
31 |   access-style = ${?PEKKO_CONNECTORS_S3_ACCESS_STYLE}
32 |   endpoint-url = ${?PEKKO_CONNECTORS_S3_ENDPOINT_URL}
33 |   list-bucket-api-version = ${?PEKKO_CONNECTORS_S3_LIST_BUCKET_API_VERSION}
34 |   validate-object-key = ${?PEKKO_CONNECTORS_S3_VALIDATE_OBJECT_KEY}
35 | 
36 |   retry-settings {
37 |     max-retries = ${?PEKKO_CONNECTORS_S3_RETRY_SETTINGS_MAX_RETRIES}
38 |     min-backoff = ${?PEKKO_CONNECTORS_S3_RETRY_SETTINGS_MIN_BACKOFF}
39 |     max-backoff = ${?PEKKO_CONNECTORS_S3_RETRY_SETTINGS_MAX_BACKOFF}
40 |     random-factor = ${?PEKKO_CONNECTORS_S3_RETRY_SETTINGS_RANDOM_FACTOR}
41 |   }
42 | }
43 | 
44 | s3-headers = {
45 |     canned-acl = ${?S3_HEADERS_CANNED_ACL}
46 |     storage-class = ${?S3_HEADERS_STORAGE_CLASS}
47 |     server-side-encryption = ${?S3_HEADERS_SERVER_SIDE_ENCRYPTION}
48 | }
49 | 
50 | s3-config = {
51 |     data-bucket = ${?S3_CONFIG_DATA_BUCKET}
52 |     data-bucket-prefix = ${?S3_CONFIG_DATA_BUCKET_PREFIX}
53 |     error-restart-settings = {
54 |         min-backoff = 5 millis
55 |         min-backoff = ${?S3_CONFIG_ERROR_RESTART_SETTINGS_MIN_BACKOFF}
56 |         max-backoff = 10 seconds
57 |         max-backoff = ${?S3_CONFIG_ERROR_RESTART_SETTINGS_MAX_BACKOFF}
58 |         random-factor = 0.2
59 |         random-factor = ${?S3_CONFIG_ERROR_RESTART_SETTINGS_RANDOM_FACTOR}
60 |         max-restarts = ${?S3_CONFIG_ERROR_RESTART_SETTINGS_MAX_RESTARTS}
61 |         max-restarts-within = ${?S3_CONFIG_ERROR_RESTART_SETTINGS_MAX_RESTARTS_WITHIN}
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/core-s3/src/main/scala/io/aiven/guardian/kafka/s3/Config.scala:
--------------------------------------------------------------------------------
  1 | package io.aiven.guardian.kafka
  2 | package s3
  3 | 
  4 | import io.aiven.guardian.kafka.PureConfigUtils._
  5 | import io.aiven.guardian.kafka.s3.configs.S3
  6 | import org.apache.pekko
  7 | import pureconfig.ConfigCursor
  8 | import pureconfig.ConfigReader
  9 | import pureconfig.ConfigReader._
 10 | import pureconfig.ConfigSource
 11 | import pureconfig.error.UserValidationFailed
 12 | 
 13 | import scala.annotation.nowarn
 14 | import scala.concurrent.duration.FiniteDuration
 15 | 
 16 | import pekko.stream.RestartSettings
 17 | import pekko.stream.connectors.s3.MetaHeaders
 18 | import pekko.stream.connectors.s3.S3Headers
 19 | import pekko.stream.connectors.s3.headers.CannedAcl
 20 | import pekko.stream.connectors.s3.headers.ServerSideEncryption
 21 | import pekko.stream.connectors.s3.headers.StorageClass
 22 | 
 23 | trait Config {
 24 | 
 25 |   // TODO Unfortunately the following boilerplate is here because the S3 Pekko Connectors providers no public constructors
 26 |   // for S3Headers apart from the limited S3Headers(). This means we can't use pureconfig.generic.auto._ and hence
 27 |   // we have to write this out manually
 28 | 
 29 |   implicit val cannedACLConfigReader: ConfigReader[CannedAcl] = (cur: ConfigCursor) =>
 30 |     cur.asString.flatMap {
 31 |       case CannedAcl.AuthenticatedRead.value      => Right(CannedAcl.AuthenticatedRead)
 32 |       case CannedAcl.AwsExecRead.value            => Right(CannedAcl.AwsExecRead)
 33 |       case CannedAcl.BucketOwnerFullControl.value => Right(CannedAcl.BucketOwnerFullControl)
 34 |       case CannedAcl.BucketOwnerRead.value        => Right(CannedAcl.BucketOwnerRead)
 35 |       case CannedAcl.Private.value                => Right(CannedAcl.Private)
 36 |       case CannedAcl.PublicRead.value             => Right(CannedAcl.PublicRead)
 37 |       case CannedAcl.PublicReadWrite.value        => Right(CannedAcl.PublicReadWrite)
 38 |       case rest                                   => Left(failure(cur, rest, "CannedAcl"))
 39 |     }
 40 | 
 41 |   implicit val metaHeadersConfigReader: ConfigReader[MetaHeaders] = mapReader[String].map(MetaHeaders.apply)
 42 | 
 43 |   implicit val storageClassConfigReader: ConfigReader[StorageClass] = (cur: ConfigCursor) =>
 44 |     cur.asString.flatMap {
 45 |       case StorageClass.Standard.storageClass          => Right(StorageClass.Standard)
 46 |       case StorageClass.InfrequentAccess.storageClass  => Right(StorageClass.InfrequentAccess)
 47 |       case StorageClass.Glacier.storageClass           => Right(StorageClass.Glacier)
 48 |       case StorageClass.ReducedRedundancy.storageClass => Right(StorageClass.ReducedRedundancy)
 49 |       case rest                                        => Left(failure(cur, rest, "StorageClass"))
 50 |     }
 51 | 
 52 |   implicit val serverSideEncryptionReader: ConfigReader[ServerSideEncryption] = (cur: ConfigCursor) =>
 53 |     cur.fluent.at("type").asString.flatMap {
 54 |       case "aes256" =>
 55 |         Right(ServerSideEncryption.aes256())
 56 |       case "kms" =>
 57 |         ConfigReader
 58 |           .forProduct2("key-id", "context") { (keyId: String, context: Option[String]) =>
 59 |             val base = ServerSideEncryption.kms(keyId)
 60 |             context.fold(base)(base.withContext)
 61 |           }
 62 |           .from(cur)
 63 |       case "customer-keys" =>
 64 |         ConfigReader
 65 |           .forProduct2("key", "md5") { (key: String, md5: Option[String]) =>
 66 |             val base = ServerSideEncryption.customerKeys(key)
 67 |             md5.fold(base)(base.withMd5)
 68 |           }
 69 |           .from(cur)
 70 |     }
 71 | 
 72 |   implicit val s3HeadersConfigReader: ConfigReader[S3Headers] =
 73 |     ConfigReader.forProduct5("canned-acl",
 74 |                              "meta-headers",
 75 |                              "storage-class",
 76 |                              "custom-headers",
 77 |                              "server-side-encryption"
 78 |     ) {
 79 |       (cannedAcl: Option[CannedAcl],
 80 |        metaHeaders: Option[MetaHeaders],
 81 |        storageClass: Option[StorageClass],
 82 |        customHeaders: Option[Map[String, String]],
 83 |        serverSideEncryption: Option[ServerSideEncryption]
 84 |       ) =>
 85 |         val base  = S3Headers()
 86 |         val base2 = cannedAcl.fold(base)(base.withCannedAcl)
 87 |         val base3 = metaHeaders.fold(base2)(base2.withMetaHeaders)
 88 |         val base4 = storageClass.fold(base3)(base3.withStorageClass)
 89 |         val base5 = customHeaders.fold(base4)(base4.withCustomHeaders)
 90 |         serverSideEncryption.fold(base5)(base5.withServerSideEncryption)
 91 |     }
 92 | 
 93 |   implicit lazy val s3Headers: S3Headers = ConfigSource.default.at("s3-headers").loadOrThrow[S3Headers]
 94 | 
 95 |   // See https://pureconfig.github.io/docs/error-handling.html#validations-in-custom-readers for details
 96 |   // on custom validation
 97 |   private val restartSettingsBase = ConfigReader.forProduct5(
 98 |     "min-backoff",
 99 |     "max-backoff",
100 |     "random-factor",
101 |     "max-restarts",
102 |     "max-restarts-within"
103 |   ) {
104 |     (minBackoff: FiniteDuration,
105 |      maxBackoff: FiniteDuration,
106 |      randomFactor: Double,
107 |      maxRestarts: Option[Int],
108 |      maxRestartsWithin: Option[FiniteDuration]
109 |     ) =>
110 |       (minBackoff, maxBackoff, randomFactor, maxRestarts, maxRestartsWithin)
111 |   }
112 | 
113 |   implicit val restartSettingsConfigReader: ConfigReader[RestartSettings] =
114 |     ConfigReader.fromCursor[RestartSettings] { cur =>
115 |       restartSettingsBase.from(cur).flatMap {
116 |         case (_, _, _, Some(_), None) =>
117 |           cur.failed(
118 |             UserValidationFailed(
119 |               "Both max-restarts and max-restarts-within need to exist if defining a maximum restarts configuration, max-restarts-within is missing"
120 |             )
121 |           )
122 |         case (_, _, _, None, Some(_)) =>
123 |           cur.failed(
124 |             UserValidationFailed(
125 |               "Both max-restarts and max-restarts-within need to exist if defining a maximum restarts configuration, max-restarts is missing"
126 |             )
127 |           )
128 |         case (minBackoff, maxBackoff, randomFactor, Some(maxRestarts), Some(maxRestartsWithin)) =>
129 |           Right(RestartSettings(minBackoff, maxBackoff, randomFactor).withMaxRestarts(maxRestarts, maxRestartsWithin))
130 |         case (minBackoff, maxBackoff, randomFactor, None, None) =>
131 |           Right(RestartSettings(minBackoff, maxBackoff, randomFactor))
132 |       }
133 |     }
134 | 
135 |   @nowarn("cat=lint-byname-implicit")
136 |   implicit lazy val s3Config: S3 = {
137 |     import pureconfig.generic.auto._
138 |     ConfigSource.default.at("s3-config").loadOrThrow[S3]
139 |   }
140 | }
141 | 
142 | object Config extends Config
143 | 


--------------------------------------------------------------------------------
/core-s3/src/main/scala/io/aiven/guardian/kafka/s3/configs/S3.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.s3.configs
 2 | 
 3 | import org.apache.pekko.stream.RestartSettings
 4 | 
 5 | /** S3 specific configuration used when storing Kafka ConsumerRecords to a S3 bucket
 6 |   *
 7 |   * @param dataBucket
 8 |   *   The bucket where a Kafka Consumer directly streams data into as storage
 9 |   * @param dataBucketPrefix
10 |   *   Prefix for the data bucket (if any)
11 |   * @param errorRestartSettings
12 |   *   Restart settings that are used whenever an pekko-stream encounters an error
13 |   */
14 | final case class S3(dataBucket: String, dataBucketPrefix: Option[String], errorRestartSettings: RestartSettings)
15 | 
16 | object S3 {
17 |   def apply(dataBucket: String, errorRestartSettings: RestartSettings): S3 = S3(dataBucket, None, errorRestartSettings)
18 | }
19 | 


--------------------------------------------------------------------------------
/core-s3/src/main/scala/io/aiven/guardian/kafka/s3/errors/S3Errors.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.s3.errors
 2 | 
 3 | import io.aiven.guardian.kafka.Errors
 4 | import org.apache.pekko
 5 | 
 6 | import pekko.http.scaladsl.model.headers.ByteRange
 7 | import pekko.stream.connectors.s3.S3Headers
 8 | 
 9 | sealed abstract class S3Errors extends Errors
10 | 
11 | object S3Errors {
12 |   final case class ExpectedObjectToExist(bucket: String,
13 |                                          key: String,
14 |                                          range: Option[ByteRange],
15 |                                          versionId: Option[String],
16 |                                          s3Headers: S3Headers
17 |   ) extends S3Errors {
18 |     override def getMessage: String = {
19 |       val finalVersionId = versionId.getOrElse("latest")
20 |       s"S3 object key:$key and version:$finalVersionId inside bucket:$bucket doesn't exist. S3 headers are ${s3Headers.toString()}"
21 |     }
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/core-s3/src/test/resources/logback.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 |     <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 3 |         <encoder>
 4 |             <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger - %msg%n</pattern>
 5 |         </encoder>
 6 |     </appender>
 7 | 
 8 |     <root level="INFO">
 9 |         <appender-ref ref="STDOUT"/>
10 |     </root>
11 | 
12 |     <logger name="org.testcontainers" level="INFO"/>
13 |     <logger name="com.github.dockerjava" level="WARN"/>
14 | </configuration>
15 | 


--------------------------------------------------------------------------------
/core-s3/src/test/scala/io/aiven/guardian/kafka/s3/Generators.scala:
--------------------------------------------------------------------------------
  1 | package io.aiven.guardian.kafka.s3
  2 | 
  3 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config}
  4 | import org.apache.pekko.stream.RestartSettings
  5 | import org.scalacheck.Gen
  6 | 
  7 | import scala.annotation.nowarn
  8 | import scala.concurrent.duration._
  9 | import scala.language.postfixOps
 10 | 
 11 | object Generators {
 12 |   val MaxBucketLength: Int = 63
 13 | 
 14 |   // See https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html for valid
 15 |   // bucketnames
 16 | 
 17 |   lazy val bucketLetterOrNumberCharGen: Gen[Char] = Gen.frequency(
 18 |     (1, Gen.numChar),
 19 |     (1, Gen.alphaLowerChar)
 20 |   )
 21 | 
 22 |   def bucketAllCharGen(useVirtualDotHost: Boolean): Gen[Char] = {
 23 |     val base = List(
 24 |       (10, Gen.alphaLowerChar),
 25 |       (1, Gen.const('-')),
 26 |       (1, Gen.numChar)
 27 |     )
 28 | 
 29 |     val frequency = if (useVirtualDotHost) (1, Gen.const('.')) +: base else base
 30 | 
 31 |     Gen.frequency(frequency: _*)
 32 |   }
 33 | 
 34 |   @nowarn("msg=not.*?exhaustive")
 35 |   private def checkInvalidDuplicateChars(chars: List[Char]): Boolean =
 36 |     chars.sliding(2).forall { case Seq(before, after) =>
 37 |       !(before == '.' && after == '.' || before == '-' && after == '.' || before == '.' && after == '-')
 38 |     }
 39 | 
 40 |   private def checkAlphaChar(c: Char): Boolean =
 41 |     c >= 'a' && c <= 'z'
 42 | 
 43 |   private def allCharCheck(useVirtualDotHost: Boolean, string: String): Boolean =
 44 |     if (useVirtualDotHost) {
 45 |       string.forall(char => Character.isDigit(char) || checkAlphaChar(char) || char == '-' || char == '.') &&
 46 |       checkInvalidDuplicateChars(string.toList)
 47 |     } else
 48 |       string.forall(char => Character.isDigit(char) || checkAlphaChar(char) || char == '-')
 49 | 
 50 |   def validatePrefix(useVirtualDotHost: Boolean, prefix: Option[String]): Option[String] = {
 51 |     val withoutWhitespace = prefix match {
 52 |       case Some(value) if value.trim == "" => None
 53 |       case Some(value)                     => Some(value)
 54 |       case None                            => None
 55 |     }
 56 | 
 57 |     withoutWhitespace match {
 58 |       case Some(value) if !(Character.isDigit(value.head) || checkAlphaChar(value.head)) =>
 59 |         throw new IllegalArgumentException(
 60 |           s"Invalid starting digit for prefix $value, ${value.head} needs to be an alpha char or digit"
 61 |         )
 62 |       case Some(value) if value.length > 1 =>
 63 |         if (!allCharCheck(useVirtualDotHost, value.drop(1)))
 64 |           throw new IllegalArgumentException(
 65 |             s"Prefix $value contains invalid characters"
 66 |           )
 67 |       case Some(value) if value.length > MaxBucketLength - 1 =>
 68 |         throw new IllegalArgumentException(
 69 |           s"Prefix is too long, it has size ${value.length} where as the max bucket size is $MaxBucketLength"
 70 |         )
 71 |       case _ => ()
 72 |     }
 73 | 
 74 |     withoutWhitespace
 75 |   }
 76 | 
 77 |   def bucketNameGen(useVirtualDotHost: Boolean, prefix: Option[String] = None): Gen[String] = {
 78 |     val finalPrefix = validatePrefix(useVirtualDotHost, prefix)
 79 | 
 80 |     for {
 81 |       range <- {
 82 |         val maxLength = finalPrefix match {
 83 |           case Some(p) => MaxBucketLength - p.length
 84 |           case None    => MaxBucketLength
 85 |         }
 86 | 
 87 |         if (maxLength > 3)
 88 |           Gen.choose(3, maxLength)
 89 |         else
 90 |           Gen.const(maxLength)
 91 |       }
 92 |       startString = finalPrefix.getOrElse("")
 93 | 
 94 |       bucketName <- range match {
 95 |                       case 3 =>
 96 |                         for {
 97 |                           first  <- bucketLetterOrNumberCharGen
 98 |                           second <- bucketAllCharGen(useVirtualDotHost)
 99 |                           third  <- bucketLetterOrNumberCharGen
100 |                         } yield startString ++ List(first, second, third).mkString
101 |                       case _ =>
102 |                         for {
103 |                           first <- bucketLetterOrNumberCharGen
104 |                           last  <- bucketLetterOrNumberCharGen
105 |                           middle <- {
106 |                             val gen = Gen.listOfN(range - 2, bucketAllCharGen(useVirtualDotHost))
107 |                             if (useVirtualDotHost) gen.filter(checkInvalidDuplicateChars) else gen
108 |                           }
109 |                         } yield startString ++ first.toString ++ middle.mkString ++ last.toString
110 |                     }
111 |     } yield bucketName
112 |   }
113 | 
114 |   val restartSetting: RestartSettings = RestartSettings(
115 |     5 millis,
116 |     10 seconds,
117 |     0.2
118 |   )
119 | 
120 |   def s3ConfigGen(useVirtualDotHost: Boolean, prefix: Option[String] = None): Gen[S3Config] = for {
121 |     dataBucket <- bucketNameGen(useVirtualDotHost, prefix)
122 |   } yield S3Config(dataBucket, restartSetting)
123 | 
124 | }
125 | 


--------------------------------------------------------------------------------
/core-s3/src/test/scala/io/aiven/guardian/kafka/s3/Main.scala:
--------------------------------------------------------------------------------
  1 | package io.aiven.guardian.kafka.s3
  2 | 
  3 | import cats.data.NonEmptyList
  4 | import cats.implicits._
  5 | import com.monovore.decline.Command
  6 | import com.monovore.decline.CommandApp
  7 | import com.monovore.decline.Opts
  8 | import com.typesafe.scalalogging.LazyLogging
  9 | import io.aiven.guardian.kafka.s3.Entry.computeAndDeleteBuckets
 10 | import org.apache.pekko
 11 | 
 12 | import scala.concurrent._
 13 | import scala.concurrent.duration._
 14 | import scala.util.control.NonFatal
 15 | 
 16 | import pekko.actor.ActorSystem
 17 | import pekko.stream.Attributes
 18 | import pekko.stream.connectors.s3.S3Attributes
 19 | import pekko.stream.connectors.s3.S3Settings
 20 | import pekko.stream.connectors.s3.scaladsl.S3
 21 | import pekko.stream.scaladsl.Sink
 22 | 
 23 | class Entry
 24 |     extends CommandApp(
 25 |       name = "guardian-s3-test-utils",
 26 |       header = "Guardian S3 Test Utilities",
 27 |       main = {
 28 |         val cleanBucketsCommand = Command(
 29 |           name = "clean-buckets",
 30 |           header = "Clean buckets left over by Guardian S3 tests"
 31 |         ) {
 32 |           val prefixOpt: Opts[String] =
 33 |             Opts
 34 |               .option[String]("prefix", help = "Only delete buckets with specified prefix")
 35 | 
 36 |           val excludeBucketsOpt: Opts[Option[NonEmptyList[String]]] =
 37 |             Opts
 38 |               .options[String]("exclude-buckets",
 39 |                                help = "Buckets that will always be excluded from cleanup, irrespective of prefix"
 40 |               )
 41 |               .orNone
 42 | 
 43 |           (prefixOpt, excludeBucketsOpt).tupled
 44 |         }
 45 | 
 46 |         Opts.subcommand(cleanBucketsCommand).map { case (bucketPrefix, excludeBuckets) =>
 47 |           implicit val system: ActorSystem    = ActorSystem()
 48 |           implicit val ec: ExecutionContext   = system.dispatcher
 49 |           implicit val s3Settings: S3Settings = S3Settings()
 50 | 
 51 |           val excludeBucketsSet = excludeBuckets.map(_.toList.toSet).getOrElse(Set.empty)
 52 | 
 53 |           try {
 54 |             Await.result(computeAndDeleteBuckets(bucketPrefix, excludeBucketsSet), Duration.Inf)
 55 |             System.exit(0)
 56 |           } catch {
 57 |             case NonFatal(_) =>
 58 |               System.exit(1)
 59 |           }
 60 |         }
 61 |       }
 62 |     )
 63 | 
 64 | object Entry extends LazyLogging {
 65 |   def computeAndDeleteBuckets(bucketPrefix: String, excludeBuckets: Set[String])(implicit
 66 |       executionContext: ExecutionContext,
 67 |       system: ActorSystem,
 68 |       s3Settings: S3Settings
 69 |   ): Future[Set[String]] = for {
 70 |     bucketsToDelete <- computeBucketsToDelete(bucketPrefix, excludeBuckets)
 71 |     _ <- if (bucketsToDelete.nonEmpty) {
 72 |            deleteBuckets(bucketsToDelete)
 73 |          } else
 74 |            Future {
 75 |              logger.info("No buckets to delete")
 76 |            }
 77 |   } yield bucketsToDelete
 78 | 
 79 |   def computeBucketsToDelete(bucketPrefix: String, excludeBuckets: Set[String])(implicit
 80 |       system: ActorSystem,
 81 |       s3Settings: S3Settings
 82 |   ): Future[Set[String]] =
 83 |     S3.listBuckets()
 84 |       .withAttributes(S3Attributes.settings(s3Settings))
 85 |       .runWith(Sink.seq)
 86 |       .map { allBuckets =>
 87 |         allBuckets.map(_.name).toSet.filter(fromS3Bucket => fromS3Bucket.startsWith(bucketPrefix)).diff(excludeBuckets)
 88 |       }(ExecutionContext.parasitic)
 89 | 
 90 |   def deleteBuckets(
 91 |       buckets: Set[String]
 92 |   )(implicit executionContext: ExecutionContext, system: ActorSystem, s3Settings: S3Settings): Future[Unit] = {
 93 |     implicit val s3Attrs: Attributes = S3Attributes.settings(s3Settings)
 94 |     val futures = buckets.map { bucket =>
 95 |       logger.info(s"Deleting bucket $bucket")
 96 |       S3TestUtils.cleanAndDeleteBucket(bucket)
 97 |     }
 98 |     Future.sequence(futures).map(_ => ())(ExecutionContext.parasitic)
 99 |   }
100 | }
101 | 
102 | object Main extends Entry
103 | 


--------------------------------------------------------------------------------
/core-s3/src/test/scala/io/aiven/guardian/kafka/s3/MinioContainer.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.s3
 2 | 
 3 | import com.dimafeng.testcontainers.GenericContainer
 4 | import org.testcontainers.containers.wait.strategy.Wait
 5 | 
 6 | import java.time.Duration
 7 | 
 8 | class MinioContainer(accessKey: String, secretKey: String)
 9 |     extends GenericContainer(
10 |       "minio/minio",
11 |       exposedPorts = List(9000),
12 |       waitStrategy = Some(Wait.forHttp("/minio/health/ready").forPort(9000).withStartupTimeout(Duration.ofSeconds(10))),
13 |       command = List("server", "/data"),
14 |       env = Map(
15 |         "MINIO_ACCESS_KEY" -> accessKey,
16 |         "MINIO_SECRET_KEY" -> secretKey
17 |       )
18 |     ) {
19 | 
20 |   def getHostAddress: String =
21 |     s"http://${container.getHost}:${container.getMappedPort(9000)}"
22 | }
23 | 


--------------------------------------------------------------------------------
/core-s3/src/test/scala/io/aiven/guardian/kafka/s3/MinioS3Test.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.s3
 2 | 
 3 | import com.dimafeng.testcontainers.ForAllTestContainer
 4 | import org.apache.pekko
 5 | import org.scalatest.Suite
 6 | import software.amazon.awssdk.auth.credentials.AwsBasicCredentials
 7 | import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider
 8 | import software.amazon.awssdk.regions.Region
 9 | import software.amazon.awssdk.regions.providers.AwsRegionProvider
10 | 
11 | import pekko.stream.connectors.s3.AccessStyle
12 | import pekko.stream.connectors.s3.S3Settings
13 | import pekko.testkit.TestKitBase
14 | 
15 | trait MinioS3Test extends ForAllTestContainer with TestKitBase { this: Suite =>
16 |   private val S3DummyAccessKey = "DUMMY_ACCESS_KEY"
17 |   private val S3DummySecretKey = "DUMMY_SECRET_KEY"
18 | 
19 |   lazy val s3Settings: S3Settings = S3Settings()
20 |     .withEndpointUrl(container.getHostAddress)
21 |     .withCredentialsProvider(
22 |       StaticCredentialsProvider.create(AwsBasicCredentials.create(S3DummyAccessKey, S3DummySecretKey))
23 |     )
24 |     .withS3RegionProvider(new AwsRegionProvider {
25 |       lazy val getRegion: Region = Region.US_EAST_1
26 |     })
27 |     .withAccessStyle(AccessStyle.PathAccessStyle)
28 | 
29 |   override lazy val container: MinioContainer = new MinioContainer(S3DummyAccessKey, S3DummySecretKey)
30 | }
31 | 


--------------------------------------------------------------------------------
/core-s3/src/test/scala/io/aiven/guardian/kafka/s3/S3TestUtils.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.s3
 2 | 
 3 | import com.typesafe.scalalogging.StrictLogging
 4 | import markatta.futiles.Retry
 5 | import org.apache.pekko
 6 | 
 7 | import scala.concurrent.ExecutionContext
 8 | import scala.concurrent.Future
 9 | import scala.concurrent.duration._
10 | import scala.language.postfixOps
11 | 
12 | import pekko.actor.ActorSystem
13 | import pekko.stream.Attributes
14 | import pekko.stream.connectors.s3.scaladsl.S3
15 | import pekko.stream.scaladsl.Sink
16 | 
17 | object S3TestUtils extends StrictLogging {
18 | 
19 |   /** Completely cleans a bucket contents as well as deleting it afterwards.
20 |     */
21 |   def cleanAndDeleteBucket(bucket: String)(implicit system: ActorSystem, s3Attrs: Attributes): Future[Unit] = {
22 |     implicit val ec: ExecutionContext = system.dispatcher
23 |     for {
24 |       _ <- S3.deleteBucketContents(bucket, deleteAllVersions = true).withAttributes(s3Attrs).runWith(Sink.ignore)
25 |       multiParts <-
26 |         S3.listMultipartUpload(bucket, None).withAttributes(s3Attrs).runWith(Sink.seq)
27 |       _ <- Future.sequence(multiParts.map { part =>
28 |              S3.deleteUpload(bucket, part.key, part.uploadId)
29 |            })
30 |       _ <- Retry.retryWithBackOff(
31 |              5,
32 |              100 millis,
33 |              throwable => throwable.getMessage.contains("The bucket you tried to delete is not empty")
34 |            )(S3.deleteBucket(bucket))
35 |       _ = logger.info(s"Completed deleting bucket $bucket")
36 |     } yield ()
37 |   }
38 | 
39 | }
40 | 


--------------------------------------------------------------------------------
/core-s3/src/test/scala/org/apache/pekko/stream/connectors/s3/GeneratorsSpec.scala:
--------------------------------------------------------------------------------
 1 | package org.apache.pekko.stream.connectors.s3
 2 | 
 3 | import com.typesafe.config.Config
 4 | import com.typesafe.config.ConfigFactory
 5 | import com.typesafe.config.ConfigValueFactory
 6 | import io.aiven.guardian.kafka.s3.Generators
 7 | import org.scalacheck.Gen
 8 | import org.scalatest.matchers.must.Matchers
 9 | import org.scalatest.propspec.AnyPropSpec
10 | import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks
11 | 
12 | import scala.annotation.nowarn
13 | 
14 | class GeneratorsSpec extends AnyPropSpec with Matchers with ScalaCheckPropertyChecks {
15 | 
16 |   def createBasicConfigFactory(virtualDotHost: Boolean): Config = {
17 |     @nowarn("msg=possible missing interpolator: detected an interpolated expression")
18 |     val baseS3SettingsConf =
19 |       """
20 |         |buffer = "memory"
21 |         |disk-buffer-path = ""
22 |         |
23 |         |aws {
24 |         |  credentials {
25 |         |    provider = default
26 |         |  }
27 |         |  region {
28 |         |    provider = default
29 |         |  }
30 |         |}
31 |         |access-style = virtual
32 |         |list-bucket-api-version = 2
33 |         |validate-object-key = true
34 |         |retry-settings {
35 |         |  max-retries = 3
36 |         |  min-backoff = 200ms
37 |         |  max-backoff = 10s
38 |         |  random-factor = 0.0
39 |         |}
40 |         |multipart-upload {
41 |         |  retry-settings = ${retry-settings}
42 |         |}
43 |         |sign-anonymous-requests = true
44 |         |""".stripMargin
45 | 
46 |     val config = ConfigFactory.parseString(baseS3SettingsConf).resolve()
47 |     if (virtualDotHost)
48 |       config.withValue("access-style", ConfigValueFactory.fromAnyRef("virtual"))
49 |     else
50 |       config.withValue("access-style", ConfigValueFactory.fromAnyRef("path"))
51 |   }
52 | 
53 |   property("Bucket name generators generates valid bucket names according to S3Settings with virtualDotHost") {
54 |     forAll(Generators.bucketNameGen(useVirtualDotHost = true)) { bucket =>
55 |       noException must be thrownBy BucketAndKey.validateBucketName(bucket, S3Settings(createBasicConfigFactory(true)))
56 |     }
57 |   }
58 | 
59 |   property("Bucket name generators generates valid bucket names according to S3Settings without virtualDotHost") {
60 |     forAll(Generators.bucketNameGen(useVirtualDotHost = false)) { bucket =>
61 |       noException must be thrownBy BucketAndKey.validateBucketName(bucket, S3Settings(createBasicConfigFactory(false)))
62 |     }
63 |   }
64 | 
65 |   def withPrefixGen(useVirtualDotHost: Boolean): Gen[String] = for {
66 |     range      <- Gen.choose(2, Generators.MaxBucketLength - 3)
67 |     firstChar  <- Generators.bucketLetterOrNumberCharGen
68 |     chars      <- Gen.listOfN(range, Generators.bucketAllCharGen(useVirtualDotHost = false))
69 |     bucketName <- Generators.bucketNameGen(useVirtualDotHost, Some((firstChar +: chars).mkString))
70 |   } yield bucketName
71 | 
72 |   property(
73 |     "Bucket name generators generates valid bucket names according to S3Settings with virtualDotHost and prefix"
74 |   ) {
75 |     forAll(withPrefixGen(useVirtualDotHost = true)) { bucket =>
76 |       noException must be thrownBy BucketAndKey.validateBucketName(bucket, S3Settings(createBasicConfigFactory(true)))
77 |     }
78 |   }
79 | }
80 | 


--------------------------------------------------------------------------------
/core/README.md:
--------------------------------------------------------------------------------
 1 | # Guardian for Apache Kafka - Core
 2 | 
 3 | This module contains core configuration for setting up the Kafka Consumer
 4 | 
 5 | By default core uses [Alpakka Kafka][alpakka-kafka] to interact with a Kafka Cluster however you can also provide your
 6 | own implementation by extending the `aiven.io.guardian.kafka.KafkaClientInterface`. Since Kafka consumers handle auto 
 7 | commit of cursors the `KafkaClientInterface` uses a `SourceWithContext` so that its possible for the `Source`
 8 | to automatically commit cursors when successfully reading topics.
 9 | 
10 | ## Configuration
11 | 
12 | Specification (including environment variable overrides) can be found [here](/src/main/resources/reference.conf).
13 | 
14 | The primary `aiven.io.guardian.kafka.KafkaClient` is configured using [Alpakka Kafka][alpakka-kafka] [Consumer
15 | configuration](https://doc.akka.io/docs/alpakka-kafka/current/consumer.html) which also contains the default values.
16 | The committing of Kafka cursors also requires 
17 | [CommitterSettings configuration](https://doc.akka.io/docs/alpakka-kafka/current/consumer.html#committer-sink).
18 | 
19 | There is also a generic `aiven.io.guardian.kafka.configs.KafkaCluster` configuration at `"kafka-cluster"` for anything not specific
20 | to the kafka consumer, i.e. which topics to backup/compact/restore.
21 | 
22 | [alpakka-kafka]: https://doc.akka.io/docs/alpakka-kafka/current/home.html
23 | 


--------------------------------------------------------------------------------
/core/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
 1 | # See https://github.com/akka/akka-http/issues/3201 and https://discuss.lightbend.com/t/about-nomoreelementsneeded-exception/8599
 2 | 
 3 | pekko.http.client.stream-cancellation-delay = 1000 millis
 4 | pekko.http.client.stream-cancellation-delay = ${?PEKKO_HTTP_CLIENT_STREAM_CANCELLATION_DELAY}
 5 | 
 6 | kafka-cluster = {
 7 |     topics = []
 8 |     topics = ${?KAFKA_CLUSTER_TOPICS}
 9 | }
10 | 


--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/Config.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka
 2 | 
 3 | import io.aiven.guardian.kafka.configs.KafkaCluster
 4 | import pureconfig.ConfigSource
 5 | import pureconfig.generic.auto._
 6 | 
 7 | import scala.annotation.nowarn
 8 | 
 9 | trait Config {
10 | 
11 |   @nowarn("cat=lint-byname-implicit")
12 |   implicit lazy val kafkaClusterConfig: KafkaCluster =
13 |     ConfigSource.default.at("kafka-cluster").loadOrThrow[KafkaCluster]
14 | }
15 | 
16 | object Config extends Config
17 | 


--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/Errors.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka
 2 | 
 3 | trait Errors extends Exception
 4 | 
 5 | object Errors {
 6 |   case object ExpectedStartOfSource extends Errors {
 7 |     override def getMessage: String = "Always expect a single element at the start of a stream"
 8 |   }
 9 | 
10 |   final case class UnhandledStreamCase[T](elems: Seq[T]) extends Errors {
11 |     override def getMessage: String = s"Unhandled case for stream ${elems.map(_.toString).mkString(",")}"
12 |   }
13 | }
14 | 


--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/ExtensionsMethods.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka
 2 | 
 3 | import java.time.OffsetDateTime
 4 | 
 5 | object ExtensionsMethods {
 6 | 
 7 |   implicit final class OffsetDateTimeMethods(value: OffsetDateTime) {
 8 |     def >(other: OffsetDateTime): Boolean  = value.compareTo(other) > 0
 9 |     def >=(other: OffsetDateTime): Boolean = value.compareTo(other) > 0 || value == other
10 |     def <(other: OffsetDateTime): Boolean  = value.compareTo(other) < 0
11 |     def <=(other: OffsetDateTime): Boolean = value.compareTo(other) < 0 || value == other
12 |   }
13 | 
14 | }
15 | 


--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/PureConfigUtils.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka
 2 | 
 3 | import pureconfig.ConfigCursor
 4 | import pureconfig.error.CannotConvert
 5 | import pureconfig.error.ConfigReaderFailures
 6 | import pureconfig.error.ConvertFailure
 7 | 
 8 | object PureConfigUtils {
 9 |   private[kafka] def failure(cur: ConfigCursor, value: String, `type`: String) = ConfigReaderFailures(
10 |     ConvertFailure(
11 |       CannotConvert(value, `type`, s"Invalid ${`type`}"),
12 |       cur
13 |     )
14 |   )
15 | }
16 | 


--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/Utils.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka
 2 | 
 3 | import scala.annotation.tailrec
 4 | 
 5 | import java.time.OffsetDateTime
 6 | import java.time.format.DateTimeParseException
 7 | 
 8 | object Utils {
 9 | 
10 |   private def parseToOffsetDateTime(string: String): Option[OffsetDateTime] =
11 |     try
12 |       Some(OffsetDateTime.parse(string))
13 |     catch {
14 |       case _: DateTimeParseException =>
15 |         None
16 |     }
17 | 
18 |   @tailrec
19 |   def keyToOffsetDateTime(key: String): OffsetDateTime = {
20 |     val withoutExtension = key.substring(0, key.lastIndexOf('.'))
21 |     parseToOffsetDateTime(withoutExtension) match {
22 |       case Some(offsetDateTime) => offsetDateTime
23 |       case None                 => keyToOffsetDateTime(withoutExtension)
24 |     }
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/codecs/Circe.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.codecs
 2 | 
 3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
 4 | import io.circe._
 5 | import io.circe.syntax._
 6 | import org.apache.kafka.common.record.TimestampType
 7 | 
 8 | trait Circe {
 9 |   implicit val kafkaTimestampTypeDecoder: Decoder[TimestampType] = (c: HCursor) =>
10 |     c.as[Int].flatMap { id =>
11 |       TimestampType
12 |         .values()
13 |         .find(_.id == id)
14 |         .toRight(DecodingFailure(s"No TimestampType with $id", c.history))
15 |     }
16 | 
17 |   implicit val kafkaTimestampTypeEncoder: Encoder[TimestampType] = Encoder.instance[TimestampType](_.id.asJson)
18 | 
19 |   implicit val reducedConsumerRecordDecoder: Decoder[ReducedConsumerRecord] = Decoder.forProduct7(
20 |     "topic",
21 |     "partition",
22 |     "offset",
23 |     "key",
24 |     "value",
25 |     "timestamp",
26 |     "timestamp_type"
27 |   )(ReducedConsumerRecord.apply)
28 | 
29 |   implicit val reducedConsumerRecordEncoder: Encoder[ReducedConsumerRecord] = Encoder.forProduct7(
30 |     "topic",
31 |     "partition",
32 |     "offset",
33 |     "key",
34 |     "value",
35 |     "timestamp",
36 |     "timestamp_type"
37 |   )(x => ReducedConsumerRecord.unapply(x).get)
38 | }
39 | 
40 | object Circe extends Circe
41 | 


--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/configs/KafkaCluster.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.configs
2 | 
3 | /** @param topics
4 |   *   The set of topics to subscribe to (and hence backup and restore)
5 |   */
6 | final case class KafkaCluster(topics: Set[String])
7 | 


--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/models/BackupObjectMetadata.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.models
 2 | 
 3 | final case class BackupObjectMetadata(compression: Option[CompressionType])
 4 | 
 5 | object BackupObjectMetadata {
 6 |   def fromKey(key: String): BackupObjectMetadata =
 7 |     if (key.endsWith(".gz"))
 8 |       BackupObjectMetadata(Some(Gzip))
 9 |     else
10 |       BackupObjectMetadata(None)
11 | }
12 | 


--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/models/CompressionType.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.models
 2 | 
 3 | sealed trait CompressionType {
 4 |   val pretty: String
 5 | }
 6 | 
 7 | case object Gzip extends CompressionType {
 8 |   override val pretty: String = "Gzip"
 9 | }
10 | 


--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/models/ReducedConsumerRecord.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.models
 2 | 
 3 | import org.apache.kafka.common.record.TimestampType
 4 | 
 5 | import java.time.Instant
 6 | import java.time.OffsetDateTime
 7 | import java.time.ZoneId
 8 | 
 9 | /** A `ConsumerRecord` that only contains the necessary data for guardian
10 |   *
11 |   * @param topic
12 |   *   The kafka topic (same as `ConsumerRecord` `topic`)
13 |   * @param offset
14 |   *   The kafka offset (same as `ConsumerRecord` `offset`)
15 |   * @param key
16 |   *   Base64 encoded version of the original ConsumerRecord key as a byte array
17 |   * @param value
18 |   *   Base64 encoded version of the original ConsumerRecord value as a byte array
19 |   * @param timestamp
20 |   *   The timestamp value (same as `ConsumerRecord` `timestamp`)
21 |   * @param timestampType
22 |   *   The timestamp type (same as `ConsumerRecord` `timestampType`)
23 |   */
24 | final case class ReducedConsumerRecord(topic: String,
25 |                                        partition: Int,
26 |                                        offset: Long,
27 |                                        key: Option[String],
28 |                                        value: String,
29 |                                        timestamp: Long,
30 |                                        timestampType: TimestampType
31 | ) {
32 |   def toOffsetDateTime: OffsetDateTime =
33 |     Instant.ofEpochMilli(this.timestamp).atZone(ZoneId.of("UTC")).toOffsetDateTime
34 | }
35 | 


--------------------------------------------------------------------------------
/core/src/test/resources/application.conf:
--------------------------------------------------------------------------------
1 | pekko {
2 |   log-dead-letters-during-shutdown = false
3 |   log-dead-letters = 0
4 | }
5 | 


--------------------------------------------------------------------------------
/core/src/test/resources/logback.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 | 
 3 |     <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 4 |         <encoder>
 5 |             <pattern>[%highlight(%-5level)] %d{HH:mm:ss.SSS} %logger{0} - %msg%n</pattern>
 6 |         </encoder>
 7 |     </appender>
 8 | 
 9 |     <appender name="ASYNCSTDOUT" class="ch.qos.logback.classic.AsyncAppender">
10 |         <appender-ref ref="STDOUT" />
11 |     </appender>
12 | 
13 |     <root level="INFO">
14 |         <appender-ref ref="ASYNCSTDOUT" />
15 |     </root>
16 | 
17 | </configuration>
18 | 


--------------------------------------------------------------------------------
/core/src/test/scala/io/aiven/guardian/kafka/ConfigSpec.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka
 2 | 
 3 | import io.aiven.guardian.kafka.configs.KafkaCluster
 4 | import org.scalacheck.Arbitrary
 5 | import org.scalacheck.Gen
 6 | import org.scalatest.matchers.must.Matchers
 7 | import org.scalatest.propspec.AnyPropSpec
 8 | import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks
 9 | import pureconfig.ConfigSource
10 | 
11 | class ConfigSpec extends AnyPropSpec with Matchers with ScalaCheckPropertyChecks {
12 |   implicit val kafkaClusterArb: Arbitrary[KafkaCluster] = Arbitrary(
13 |     Gen.containerOf[Set, String](Gen.alphaStr).map(topics => KafkaCluster(topics))
14 |   )
15 | 
16 |   property("Valid KafkaClusterConfig configs should parse correctly") {
17 |     forAll { (kafkaClusterConfig: KafkaCluster) =>
18 |       val conf =
19 |         s"""
20 |         |kafka-cluster = {
21 |         |  topics = [${kafkaClusterConfig.topics.map(topic => s""""$topic"""").mkString(",")}]
22 |         |}
23 |         |""".stripMargin
24 | 
25 |       noException should be thrownBy ConfigSource.string(conf).at("kafka-cluster")
26 |     }
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/core/src/test/scala/io/aiven/guardian/kafka/KafkaClusterTest.scala:
--------------------------------------------------------------------------------
  1 | package io.aiven.guardian.kafka
  2 | 
  3 | import com.dimafeng.testcontainers.ForAllTestContainer
  4 | import com.dimafeng.testcontainers.KafkaContainer
  5 | import io.aiven.guardian.kafka.TestUtils.KafkaFutureToCompletableFuture
  6 | import io.aiven.guardian.pekko.PekkoStreamTestKit
  7 | import org.apache.kafka.clients.CommonClientConfigs
  8 | import org.apache.kafka.clients.admin.AdminClient
  9 | import org.apache.kafka.clients.admin.NewTopic
 10 | import org.apache.kafka.clients.producer.ProducerConfig
 11 | import org.apache.kafka.clients.producer.ProducerRecord
 12 | import org.apache.kafka.common.serialization.ByteArraySerializer
 13 | import org.apache.pekko
 14 | import org.scalatest.Suite
 15 | 
 16 | import scala.concurrent.ExecutionContext
 17 | import scala.concurrent.Future
 18 | import scala.concurrent.duration.FiniteDuration
 19 | import scala.concurrent.duration._
 20 | import scala.jdk.CollectionConverters._
 21 | import scala.jdk.FutureConverters._
 22 | import scala.language.postfixOps
 23 | 
 24 | import pekko.Done
 25 | import pekko.kafka.ConsumerSettings
 26 | import pekko.kafka.ProducerSettings
 27 | import pekko.kafka.scaladsl.Producer
 28 | import pekko.stream.scaladsl.Source
 29 | 
 30 | trait KafkaClusterTest extends ForAllTestContainer with PekkoStreamTestKit { this: Suite =>
 31 | 
 32 |   /** Timeout constant to wait for both Pekko Streams plus initialization of consumer/kafka cluster
 33 |     */
 34 |   val KafkaInitializationTimeoutConstant: FiniteDuration = PekkoStreamInitializationConstant + (2.5 seconds)
 35 | 
 36 |   override lazy val container: KafkaContainer = new KafkaContainer()
 37 | 
 38 |   def baseKafkaConfig: Some[ConsumerSettings[Array[Byte], Array[Byte]] => ConsumerSettings[Array[Byte], Array[Byte]]] =
 39 |     Some(
 40 |       _.withBootstrapServers(
 41 |         container.bootstrapServers
 42 |       )
 43 |     )
 44 | 
 45 |   /** This config ensures that our producer is atomic since we only ever send a single kafka topic per request and there
 46 |     * can only be a single request at a given time
 47 |     * @return
 48 |     */
 49 |   def baseProducerConfig
 50 |       : Some[ProducerSettings[Array[Byte], Array[Byte]] => ProducerSettings[Array[Byte], Array[Byte]]] =
 51 |     Some(
 52 |       _.withBootstrapServers(
 53 |         container.bootstrapServers
 54 |       ).withProperties(
 55 |         Map(
 56 |           ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG             -> true.toString,
 57 |           ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION -> 1.toString,
 58 |           ProducerConfig.BATCH_SIZE_CONFIG                     -> 0.toString
 59 |         )
 60 |       ).withParallelism(1)
 61 |     )
 62 | 
 63 |   def createProducer(): ProducerSettings[Array[Byte], Array[Byte]] =
 64 |     ProducerSettings(system, new ByteArraySerializer, new ByteArraySerializer)
 65 |       .withBootstrapServers(container.bootstrapServers)
 66 | 
 67 |   /** Call this function to send a message after the next step of configured time period to trigger a rollover so the
 68 |     * current object will finish processing
 69 |     * @param duration
 70 |     * @param producerSettings
 71 |     * @param topic
 72 |     * @return
 73 |     */
 74 |   def sendTopicAfterTimePeriod(duration: FiniteDuration,
 75 |                                producerSettings: ProducerSettings[Array[Byte], Array[Byte]],
 76 |                                topic: String
 77 |   ): Future[Done] = pekko.pattern.after(duration) {
 78 |     Source(
 79 |       List(
 80 |         new ProducerRecord[Array[Byte], Array[Byte]](topic, "1".getBytes, "1".getBytes)
 81 |       )
 82 |     ).runWith(Producer.plainSink(producerSettings))
 83 |   }
 84 | 
 85 |   protected var adminClient: AdminClient = _
 86 | 
 87 |   override def afterStart(): Unit = {
 88 |     super.afterStart()
 89 |     adminClient = AdminClient.create(
 90 |       Map[String, AnyRef](
 91 |         CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> container.bootstrapServers
 92 |       ).asJava
 93 |     )
 94 |   }
 95 | 
 96 |   override def beforeStop(): Unit = {
 97 |     adminClient.close()
 98 |     super.beforeStop()
 99 |   }
100 | 
101 |   def createTopics(topics: Set[String])(implicit executionContext: ExecutionContext): Future[Unit] =
102 |     for {
103 |       currentTopics <- adminClient.listTopics().names().toCompletableFuture.asScala
104 |       topicsToCreate = topics.diff(currentTopics.asScala.toSet)
105 |       _ <- adminClient
106 |              .createTopics(topicsToCreate.map { topic =>
107 |                new NewTopic(topic, 1, 1.toShort)
108 |              }.asJava)
109 |              .all()
110 |              .toCompletableFuture
111 |              .asScala
112 |     } yield ()
113 | 
114 |   def cleanTopics(topics: Set[String])(implicit executionContext: ExecutionContext): Future[Unit] =
115 |     for {
116 |       currentTopics <- adminClient.listTopics().names().toCompletableFuture.asScala
117 |       topicsToDelete = topics.intersect(currentTopics.asScala.toSet)
118 |       _ <- adminClient.deleteTopics(topicsToDelete.asJava).all().toCompletableFuture.asScala
119 |     } yield ()
120 | 
121 |   case object TerminationException extends Exception("termination-exception")
122 | }
123 | 


--------------------------------------------------------------------------------
/core/src/test/scala/io/aiven/guardian/kafka/TestUtils.scala:
--------------------------------------------------------------------------------
  1 | package io.aiven.guardian.kafka
  2 | 
  3 | import com.typesafe.scalalogging.LazyLogging
  4 | import org.apache.kafka.common.KafkaFuture
  5 | import org.apache.pekko
  6 | 
  7 | import scala.collection.immutable
  8 | import scala.collection.mutable
  9 | import scala.collection.mutable.ListBuffer
 10 | import scala.concurrent.ExecutionContext
 11 | import scala.concurrent.Future
 12 | import scala.jdk.DurationConverters._
 13 | import scala.util.Failure
 14 | import scala.util.Success
 15 | 
 16 | import java.time.OffsetDateTime
 17 | import java.time.temporal.ChronoUnit
 18 | import java.util.concurrent.CompletableFuture
 19 | 
 20 | import pekko.actor.ActorSystem
 21 | 
 22 | object TestUtils {
 23 | 
 24 |   // Taken from https://stackoverflow.com/a/56763206/1519631
 25 |   implicit final class KafkaFutureToCompletableFuture[T](kafkaFuture: KafkaFuture[T]) {
 26 |     @SuppressWarnings(Array("DisableSyntax.null"))
 27 |     def toCompletableFuture: CompletableFuture[T] = {
 28 |       val wrappingFuture = new CompletableFuture[T]
 29 |       kafkaFuture.whenComplete { (value, throwable) =>
 30 |         if (throwable != null)
 31 |           wrappingFuture.completeExceptionally(throwable)
 32 |         else
 33 |           wrappingFuture.complete(value)
 34 |       }
 35 |       wrappingFuture
 36 |     }
 37 |   }
 38 | 
 39 |   implicit final class ScalaFutureExtensionMethods[T](future: Future[T]) extends LazyLogging {
 40 |     def onCompleteLogError(f: () => Unit)(implicit executor: ExecutionContext): Unit =
 41 |       future.onComplete { result =>
 42 |         result match {
 43 |           case Failure(exception) => logger.error("Future resulted in error", exception)
 44 |           case Success(_)         => ()
 45 |         }
 46 |         f()
 47 |       }
 48 |   }
 49 | 
 50 |   /** The standard Scala groupBy returns an `immutable.Map` which is unordered, this version returns an ordered
 51 |     * `ListMap` for when preserving insertion order is important
 52 |     */
 53 |   implicit class GroupBy[A](val t: IterableOnce[A]) {
 54 |     def orderedGroupBy[K](f: A => K): immutable.ListMap[K, List[A]] = {
 55 |       var m = immutable.ListMap.empty[K, ListBuffer[A]]
 56 |       for (elem <- t.iterator) {
 57 |         val key = f(elem)
 58 |         m = m.updatedWith(key) {
 59 |           case Some(value) => Some(value.addOne(elem))
 60 |           case None        => Some(mutable.ListBuffer[A](elem))
 61 |         }
 62 |       }
 63 |       m.map { case (k, v) => (k, v.toList) }
 64 |     }
 65 |   }
 66 | 
 67 |   final case class UnsupportedTimeUnit(chronoUnit: ChronoUnit) extends Exception(s"$chronoUnit not supported")
 68 | 
 69 |   private def recurseUntilHitTimeUnit(previousChronoUnit: ChronoUnit, buffer: BigDecimal)(implicit
 70 |       system: ActorSystem
 71 |   ): Future[Unit] = {
 72 |     val now = OffsetDateTime.now()
 73 |     val (current, max) = previousChronoUnit match {
 74 |       case ChronoUnit.SECONDS =>
 75 |         (now.getSecond, 59)
 76 |       case ChronoUnit.MINUTES =>
 77 |         (now.getMinute, 59)
 78 |       case ChronoUnit.HOURS =>
 79 |         (now.getHour, 23)
 80 |       case ChronoUnit.DAYS =>
 81 |         (now.getDayOfWeek.getValue - 1, 6)
 82 |       case ChronoUnit.MONTHS =>
 83 |         (now.getMonth.getValue - 1, 11)
 84 |       case _ => throw UnsupportedTimeUnit(previousChronoUnit)
 85 |     }
 86 | 
 87 |     if (BigDecimal(current) / BigDecimal(max) * BigDecimal(100) <= buffer)
 88 |       Future.successful(())
 89 |     else
 90 |       pekko.pattern.after(previousChronoUnit.getDuration.toScala)(recurseUntilHitTimeUnit(previousChronoUnit, buffer))
 91 |   }
 92 | 
 93 |   def waitForStartOfTimeUnit(chronoUnit: ChronoUnit, buffer: BigDecimal = BigDecimal(5))(implicit
 94 |       system: ActorSystem
 95 |   ): Future[Unit] = {
 96 |     val allEnums     = ChronoUnit.values()
 97 |     val previousEnum = allEnums(chronoUnit.ordinal - 1)
 98 |     recurseUntilHitTimeUnit(previousEnum, buffer)
 99 |   }
100 | 
101 | }
102 | 


--------------------------------------------------------------------------------
/core/src/test/scala/io/aiven/guardian/pekko/AnyPropTestKit.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.pekko
 2 | 
 3 | import org.apache.pekko
 4 | import org.scalatest.fixture
 5 | import org.scalatest.propspec.FixtureAnyPropSpecLike
 6 | 
 7 | import pekko.actor.ActorSystem
 8 | import pekko.testkit.TestKitBase
 9 | 
10 | class AnyPropTestKit(_system: ActorSystem)
11 |     extends FixtureAnyPropSpecLike
12 |     with TestKitBase
13 |     with fixture.TestDataFixture {
14 |   implicit val system: ActorSystem = _system
15 | }
16 | 


--------------------------------------------------------------------------------
/core/src/test/scala/io/aiven/guardian/pekko/PekkoHttpTestKit.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.pekko
 2 | 
 3 | import org.apache.pekko
 4 | import org.scalatest.Suite
 5 | 
 6 | import pekko.actor.ActorSystem
 7 | import pekko.http.scaladsl.Http
 8 | 
 9 | trait PekkoHttpTestKit extends PekkoStreamTestKit { this: Suite =>
10 |   implicit val system: ActorSystem
11 | 
12 |   override protected def afterAll(): Unit =
13 |     Http(system)
14 |       .shutdownAllConnectionPools()
15 |       .foreach { _ =>
16 |         super.afterAll()
17 |       }(system.dispatcher)
18 | }
19 | 


--------------------------------------------------------------------------------
/core/src/test/scala/io/aiven/guardian/pekko/PekkoStreamTestKit.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.pekko
 2 | 
 3 | import com.typesafe.scalalogging.CanLog
 4 | import com.typesafe.scalalogging.Logger
 5 | import com.typesafe.scalalogging.LoggerTakingImplicit
 6 | import org.apache.pekko
 7 | import org.scalatest.BeforeAndAfterAll
 8 | import org.scalatest.Suite
 9 | import org.scalatest.TestData
10 | 
11 | import scala.concurrent.duration._
12 | import scala.language.postfixOps
13 | 
14 | import pekko.actor.ActorSystem
15 | import pekko.testkit.TestKit
16 | import pekko.testkit.TestKitBase
17 | 
18 | trait PekkoStreamTestKit extends TestKitBase with BeforeAndAfterAll { this: Suite =>
19 |   implicit val system: ActorSystem
20 | 
21 |   override protected def afterAll(): Unit =
22 |     TestKit.shutdownActorSystem(system)
23 | 
24 |   /** If its not possible to determine whether a Stream has finished in a test and instead you need to use a manual
25 |     * wait, make sure you wait at least this period of time for akka-streams to initialize properly.
26 |     */
27 |   val PekkoStreamInitializationConstant: FiniteDuration = 1 second
28 | 
29 |   private implicit case object CanLogTestData extends CanLog[TestData] {
30 |     override def logMessage(originalMsg: String, context: TestData): String =
31 |       s"${context.name}: $originalMsg"
32 |   }
33 | 
34 |   lazy val logger: LoggerTakingImplicit[TestData] = Logger.takingImplicit[TestData](getClass.getName)
35 | }
36 | 


--------------------------------------------------------------------------------
/dependency-check/suppression.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <suppressions xmlns="https://jeremylong.github.io/DependencyCheck/dependency-suppression.1.3.xsd">
 3 |     <suppress>
 4 |         <notes><![CDATA[
 5 |       file name: akka-stream-json_2.13-0.8.2.jar
 6 |       ]]></notes>
 7 |         <packageUrl regex="true">^pkg:maven/org\.mdedetrich/akka\-stream\-json_2\.13@.*$</packageUrl>
 8 |         <cpe>cpe:/a:akka:akka</cpe>
 9 |     </suppress>
10 |     <suppress>
11 |         <notes><![CDATA[
12 |    file name: akka-stream-circe_2.13-0.8.2.jar
13 |    ]]></notes>
14 |         <packageUrl regex="true">^pkg:maven/org\.mdedetrich/akka\-stream\-circe_2\.13@.*$</packageUrl>
15 |         <cpe>cpe:/a:akka:akka</cpe>
16 |     </suppress>
17 | </suppressions>
18 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/application/design.md:
--------------------------------------------------------------------------------
 1 | # Design
 2 | 
 3 | Each application is contained within a corresponding sbt submodule, i.e. the application for `backup` is contained
 4 | within the `cli-backup` sbt submodule. The `core-cli` sbt submodule contains common cli arguments (i.e. `kafka-topics`).
 5 | 
 6 | Scala packaging has been disabled for these submodules which means that when publishing/packaging Guardian it won't push
 7 | any built `.jar` files. This is because its unnecessary since you are meant to run these applications as a binary and
 8 | not include it as a library. By the same token this also means that the cli modules are built with global inlining
 9 | using `"-opt-inline-from:**"`, see [here](https://www.lightbend.com/blog/scala-inliner-optimizer) for more info.
10 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/application/index.md:
--------------------------------------------------------------------------------
 1 | # Application
 2 | 
 3 | Guardian also becomes packaged as various application/s that lets you run it using a CLI interface. Currently, the
 4 | binaries provided are
 5 | 
 6 | * restore: A continuously running binary that performs the restore operation.
 7 | * backup: A binary which when executed allows you to restore an existing backup.
 8 | 
 9 | The CLI follows POSIX guidelines which means you can use `--help` as an argument to provide information on all of the
10 | parameters.
11 | 
12 | @@toc { depth=2 }
13 | 
14 | @@@ index
15 | 
16 | * [design](design.md)
17 | * [packaging](packaging.md)
18 | * [logging](logging.md)
19 | 
20 | @@@
21 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/application/logging.md:
--------------------------------------------------------------------------------
 1 | # Logging
 2 | 
 3 | The CLI provides its own default
 4 | logback `logback.xml` @github[logging file](/core-cli/src/main/resources/logback.xml) which has sane defaults for
 5 | typical usage. It's also possible to provide a custom `logback.xml` configuration file using the `--logback-file`
 6 | command line argument.
 7 | 
 8 | For more details about logback and/or the `logback.xml` configuration format read the
 9 | @ref:[general architecture section on logging](../general-architecture/logging.md).
10 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/application/packaging.md:
--------------------------------------------------------------------------------
 1 | # Packaging
 2 | 
 3 | Guardian is currently packaged using [sbt-native-packager](https://github.com/sbt/sbt-native-packager) to provide the
 4 | following formats by using the sbt shell.
 5 | 
 6 | * `rpm`
 7 |     * restore: `cliRestore/rpm:packageBin`. Created `rpm` file will be contained
 8 |       in `cli-restore/target/rpm/RPMS/noarch/`
 9 |     * backup: `cliBackup/rpm:packageBin`. Created `rpm` file will be contained in `cli-backup/target/rpm/RPMS/noarch/`
10 |       NOTE: In order to build packages you need to have the [rpm-tools](https://rpm.org/) (specifically `rpmbuild`)
11 |       installed and available on `PATH`. Please consult your Linux distribution for more info
12 | * `zip`
13 |     * restore: `cliRestore/universal:packageBin`. Created `zip` file will be contained
14 |       in `cli-restore/target/universal/`
15 |     * backup: `cliBackup/universal:packageBin`. Created `zip` file will be contained in `cli-backup/target/universal/`
16 | * `tar`
17 |     * restore: `cliRestore/universal:packageZipTarball`. Created `tar` file will be contained
18 |       in `cli-restore/target/universal/`
19 |     * backup: `cliBackup/universal:packageZipTarball`. Created `tar` file will be contained
20 |       in `cli-backup/target/universal/`
21 | * `Xz`
22 |     * restore: `cliRestore/universal:packageXzTarball`. Created `xz` file will be contained
23 |       in `cli-restore/target/universal/`
24 |     * backup: `cliBackup/universal:packageXzTarball`. Created `xz` file will be contained
25 |       in `cli-backup/target/universal/`
26 | 
27 | Note that for these packages formats you need to have JRE installed on your system to run the package. For more details
28 | about packaging read the [docs](https://sbt-native-packager.readthedocs.io/en/latest/)
29 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/backup/configuration.md:
--------------------------------------------------------------------------------
 1 | # Configuration
 2 | 
 3 | ## Reference
 4 | 
 5 | @@snip (/core-backup/src/main/resources/reference.conf)
 6 | 
 7 | Scala API doc @apidoc[kafka.backup.configs.Backup]
 8 | 
 9 | ## Explanation
10 | 
11 | * `pekko.kafka.consumer`: See @extref:[documentation](pekko-connectors-kafka-docs:consumer.html#settings)
12 | * `pekko.kafka.consumer.kafka-clients`: See @extref:[documentation](kafka-docs:documentation.html#consumerconfigs)
13 | * `backup`:
14 |     * `kafka-group-id`: The group id for the Kafka consumer that's used in restore tool
15 |     * `time-configuration`: How to slice the persisted keys/files based by time
16 |         * `type`: The type of time configuration. Either `period-from-first` or `chrono-unit-slice`
17 |             * `period-from-first`: Guardian will split up the backup keys/files determined by the `duration` specified.
18 |               The key/filename will be determined by the timestamp of the first message received from the Kafka consumer
19 |               with each further key/filename being incremented by the configured `duration`. If guardian is shut down
20 |               then it will terminate and complete stream with the final element in the JSON array being a `null`
21 |                 * This is done so it's possible to determine if a backup has been terminated by shut down of Guardian
22 |                   and also because it's not really possible to resume using arbitrary durations.
23 |             * `chrono-unit-slice`: Guardian will split up the backup keys/files determined by the `chrono-unit` which
24 |               represent intervals such as days and weeks. As such when using this setting its possible for Guardian to
25 |               resume from a previous uncompleted backup.
26 |         * `duration`: If configuration is `period-from-first` then this determines max period of time for each time
27 |           slice.
28 |         * `chrono-unit`: if configuration is `chrono-unit-slice` the `chrono-unit` determines
29 |     * `commit-timeout-buffer-window`: Guardian sets the commit timeout of the Kafka consumer based on the `time-configuration`
30 |       since Guardian does manual committing of cursors. The buffer gets added onto the `time-configuration` to give
31 |       some headroom for any theoretical delays.
32 |     * `compression`: The compression format to use for the data being backed up. Note that changes in compression
33 |       configuration will not apply for any currently existing backups that need to be completed, only for future
34 |       new backups.
35 |       * `type`: Which compression to use.
36 |         * `gzip`. Standard [Gzip](https://en.wikipedia.org/wiki/Gzip) compression
37 |       * `level`: The level of compression to use
38 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/backup/design.md:
--------------------------------------------------------------------------------
 1 | # Design
 2 | 
 3 | The format for backups is in JSON consisting of a large JSON array filled with JSON objects that have the following
 4 | format.
 5 | 
 6 | ```json
 7 | {
 8 |   "topic": "kafka topic",
 9 |   "partition": 0,
10 |   "offset": 0,
11 |   "key": "a2V5",
12 |   "value": "dmFsdWU=",
13 |   "timestamp": 0,
14 |   "timestamp_type": 0
15 | }
16 | ```
17 | 
18 | The `key` and `value` are Base64 encoded byte arrays (in the above example `"a2V5"` decodes to the string `key`
19 | and `"dmFsdWU="` decodes to the string `value`). This is due to the fact that the backup tool can make no assumptions on
20 | the format of the key or value, so we encode the raw byte arrays.
21 | 
22 | One thing to note is that its possible for the last JSON object in the JSON array to be `null`, see for more info.
23 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/backup/index.md:
--------------------------------------------------------------------------------
 1 | # Backup
 2 | 
 3 | The backup module is responsible for backing up a specific set of Kafka topics into a persistent storage. The backup
 4 | runs as a continuous stream that is split depending on time buckets which are configurable.
 5 | 
 6 | @@project-info { projectId="coreBackup" }
 7 | 
 8 | @@toc { depth=2 }
 9 | 
10 | @@@ index
11 | 
12 | * [configuration](configuration.md)
13 | * [design](design.md)
14 | 
15 | @@@
16 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/ci.md:
--------------------------------------------------------------------------------
 1 | # CI - Continuous Integration
 2 | 
 3 | Guardian uses github actions to perform CI whenever a pull request is made and when a pull request is merged into
 4 | master. CI is also responsible for publishing github github. The integration with github actions for the main build is
 5 | performed using [sbt-github-actions][sbt-github-actions-link].
 6 | 
 7 | ## Design
 8 | 
 9 | One thing to note about [sbt-github-actions][sbt-github-actions-link] is that it generates the github workflow files
10 | directly from the sbt @github[build definition file](/build.sbt).
11 | This means that the `build.sbt` is the source of truth and hence [sbt-github-actions][sbt-github-actions-link] also
12 | checks that the github workflow is in sync with `build.sbt` as part of the CI process.
13 | 
14 | Essentially that means any changes to `build.sbt` (such as updating Scala versions) can also cause changes in github
15 | workflow actions. Likewise if you need to do any custom changes to
16 | the @github[ci.yaml](/.github/workflows/ci.yml) file you need to do this in `build.sbt` using
17 | [sbt-github-actions][sbt-github-actions-link] SBT dsl.
18 | 
19 | To regenerate the relevant github workflow files after changes to `build.sbt` are done you need to run
20 | 
21 | ```
22 | githubWorkflowGenerate
23 | ```
24 | 
25 | In the sbt shell. For more information go [here](https://github.com/djspiewak/sbt-github-actions#generative-plugin)
26 | 
27 | ## Scalafmt
28 | 
29 | In addition and separately to [sbt-github-actions][sbt-github-actions-link] Guardian also has
30 | a [scalafmt][scalafmt-link] pipeline that checks the code is correctly formatted on each PR. This allows the
31 | @github[scalafmt pipeline](/.github/workflows/format.yml) to run at the same time the main build 
32 | does. Furthermore, it uses [scalafmt-native](https://scalameta.org/scalafmt/docs/installation.html#native-image) for
33 | improved runtime performance (typically it takes 5-10 seconds to check the entire project is formatted).
34 | 
35 | This means that if you ever update the scalafmt version in
36 | the @github[configuration file](/.scalafmt.conf#L1) you also need to update it in the
37 | @github[scalafmt-pipeline](/.github/workflows/format.yml#L26).
38 | 
39 | [sbt-github-actions-link]: https://github.com/djspiewak/sbt-github-actions
40 | [scalafmt-link]: https://scalameta.org/scalafmt/
41 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/doc-generation.md:
--------------------------------------------------------------------------------
 1 | # Document Generation
 2 | 
 3 | Guardian uses [sbt-paradox][sbt-paradox-link] as the main plugin for generating documentation which is hosted
 4 | using [github pages][github-pages-link]. In addition various other plugins are used which are noted below
 5 | 
 6 | * [sbt-paradox-api-doc](https://github.com/lightbend/sbt-paradox-apidoc): Allows you to directly link to Scala
 7 |   documentation using the `@@apidoc` directive
 8 | * [sbt-paradox-project-info](https://github.com/lightbend/sbt-paradox-project-info): Provides an `@@projectInfo`
 9 |   directive that derives common information about the project (such as dependencies, project info etc etc)
10 | * [sbt-site](https://github.com/sbt/sbt-site): Used in conjunction with [sbt-paradox][sbt-paradox-link] to generate the
11 |   final site structure
12 | * [sbt-ghpages](https://github.com/sbt/sbt-ghpages): Used for uploading the final site
13 |   to [github-pages][github-pages-link].
14 | * [sbt-unidoc](https://github.com/sbt/sbt-unidoc): Used to aggregate/concatenate documentation Scala API documentation
15 |   from various sbt modules into a single documentation result
16 | 
17 | ## Design
18 | 
19 | [sbt-paradox][sbt-paradox-link] generates documentation using standard [Markdown](https://www.markdownguide.org/). The
20 | documentation can be found in the @github[docs-folder](/docs). Note that this folder also corresponds to a sbt-module
21 | which is also named `docs` which also means that commands related to documentation are run in that sbt sub-project
22 | (i.e. `docs/makeSite` generates the documentation site).
23 | 
24 | Guardian also uses [scaladoc][scaladoc-link] which is already included within Scala compiler/SBT to generate Scala API
25 | documentation. [scaladoc][scaladoc-link] is analogous to Java's own [javadoc](https://en.wikipedia.org/wiki/Javadoc)
26 | which generates API documentation that is written within the code itself.
27 | 
28 | One advantage of using [sbt-paradox][sbt-paradox-link] and its various plugins as the main driver for documentation
29 | generation is it that checks at document generation (i.e. compile time) that the docs are well-formed. This checking
30 | includes
31 | 
32 | * references to other links
33 | * references to specific Scala API documentation directly using Scala classes/objects/traits
34 | * TOC (table of contents) are well-formed (e.g. you don't have markdown files in `docs` which aren't referenced
35 |   anywhere)
36 | * references to versions from Guardians various Scala submodules are always up-to-date
37 | * references to code snippets
38 | 
39 | [sbt-paradox-link]: https://github.com/lightbend/paradox
40 | [github-pages-link]: https://pages.github.com/
41 | [scaladoc-link]: https://docs.scala-lang.org/style/scaladoc.html
42 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/general-architecture/index.md:
--------------------------------------------------------------------------------
 1 | # General Architecture
 2 | 
 3 | General documentation about how Guardian for Apache Kafka is architected lives here.
 4 | 
 5 | @@toc { depth=2 }
 6 | 
 7 | @@@ index
 8 | 
 9 | * [logging](logging.md)
10 | 
11 | @@@
12 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/general-architecture/logging.md:
--------------------------------------------------------------------------------
 1 | # Logging
 2 | 
 3 | Guardian for Apache Kafka uses [logback](https://logback.qos.ch/index.html) to perform logging. This means if you are
 4 | using the modules as libraries you need to provide a `logback.xml` in your classpath (typically this is done by putting
 5 | the `logback.xml` in your `/src/main/resources` folder). Note that the Guardian modules do not provide a default
 6 | `logback.xml` for deployed artifacts since this is typically the responsibility of an application to configure and
 7 | provide.
 8 | 
 9 | If you want examples of `logback.xml` configuration you can have a look at the
10 | official [logback page](https://logback.qos.ch/manual/configuration.html) but you can also use existing `logback.xml`'s
11 | from either the @github[cli](/core-cli/src/main/resources/logback.xml) or the
12 | @github[tests](/core/src/test/resources/logback.xml) as a reference.
13 | 
14 | @@@ warning
15 | 
16 | As documented at @extref:[pekko logback configuration](pekko-docs:logging.html#logback-configuration) it is highly recommended
17 | to use an `AsyncAppender` in your configuration as this offsets the logging to a background thread otherwise you will
18 | end up blocking the core pekko/pekko-streams library whenever a log is made.
19 | 
20 | @@@
21 | 
22 | ## Logback adapter for pekko/pekko-streams
23 | 
24 | By default, pekko/pekko-streams uses its own asynchronous logger however they provide a
25 | @extref:[logging adapter](pekko-docs:logging.html#slf4j) which has already been preconfigured for use in Guardian.
26 | 
27 | ## CLI/Application
28 | 
29 | Note that unlike the core libraries, the CLI application does provide a default `logback.xml`. For more details read
30 | @ref:[application logging](../application/logging.md).
31 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/index.md:
--------------------------------------------------------------------------------
 1 | # Guardian for Apache Kafka Documentation
 2 | 
 3 | Guardian for Apache Kafka is an open source utility for backing up [Apache Kafka](https://kafka.apache.org/) clusters.
 4 | It is built using [Scala](https://www.scala-lang.org/) entirely
 5 | with [Pekko-Streams](https://pekko.apache.org/docs/pekko/current/stream/index.html)
 6 | to ensure that the tool runs reliably and as desired with large datasets in different scenarios.
 7 | 
 8 | @@toc { depth=2 }
 9 | 
10 | @@@ index
11 | 
12 | * [overview](overview.md)
13 | * [security](security.md)
14 | * [license-report](license-report.md)
15 | * [ci](ci.md)
16 | * [doc-generation](doc-generation.md)
17 | * [general-architecture](general-architecture/index.md)
18 | * [testing](testing/index.md)
19 | * [application](application/index.md)
20 | * [backup](backup/index.md)
21 | * [persistence](persistence/index.md)
22 | * [restore](restore/index.md)
23 | 
24 | @@@
25 | 
26 | ## Trademarks
27 | 
28 | Apache Kafka is either a registered trademark or trademark of the Apache Software Foundation in the United States and/or
29 | other countries.
30 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/overview.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | Guardian for Apache Kafka is an open source utility for backing up [Apache Kafka](https://kafka.apache.org/) clusters.
 4 | It is built using [Scala](https://www.scala-lang.org/) entirely
 5 | with [Pekko-Streams](https://pekko.apache.org/docs/pekko/current/stream/index.html)
 6 | to ensure that the tool runs as desired with large datasets in different scenarios.
 7 | 
 8 | ## Versions
 9 | 
10 | The core modules are compiled against:
11 | 
12 | * Pekko Streams $pekko.version$+ (@extref:[Reference](pekko-docs:stream/index.html), [Github](https://github.com/apache/incubator-pekko))
13 | * Pekko Streams Circe $pekko-stream-circe.version$+ ([Github](https://github.com/mdedetrich/pekko-streams-circe))
14 | * PureConfig $pure-config.version$+ ([Reference](https://pureconfig.github.io/docs/), [Github](https://github.com/pureconfig/pureconfig))
15 | * ScalaLogging $scala-logging.version$+ ([Github](https://github.com/lightbend/scala-logging))
16 | 
17 | The cli modules are compiled against:
18 | 
19 | * Decline $decline.version$+ ([Reference](https://ben.kirw.in/decline/), [Github](https://github.com/bkirwi/decline))
20 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/persistence/design.md:
--------------------------------------------------------------------------------
 1 | # Design
 2 | 
 3 | Storage mechanisms are implemented via the @apidoc[BackupClientInterface] and @apidoc[RestoreClientInterface]. To add
 4 | custom storage mechanisms you need to implement these methods. These interfaces are designed to be as simple as possible
 5 | while being completely abstract to allow for any theoretical storage mechanism.
 6 | 
 7 | ## BackupClientInterface
 8 | 
 9 | The @apidoc[BackupClientInterface] implements the entire backup flow including the resuming from a previously terminated
10 | backup. Of note is the @apidoc[BackupClientInterface.State](BackupClientInterface) which is the data structure that is
11 | returned when any previously existing backup for that key exists. This is provided to
12 | @apidoc[BackupClientInterface.backupToStorageSink](BackupClientInterface) indicating whether the backup being performed
13 | is a new backup or resuming from a previous one with the retrieval of the current state being defined by
14 | @apidoc[BackupClientInterface.getCurrentUploadState](BackupClientInterface).
15 | 
16 | Note that when implementing @apidoc[BackupClientInterface] you do not need to handle the corner cases regarding the
17 | contents of the byte string when resuming/suspending/terminating, this is automatically handled for you. Essentially you
18 | just need to handle how to store/push `ByteString` into the storage of your choice.
19 | 
20 | ## RestoreClientInterface
21 | 
22 | The @apidoc[RestoreClientInterface] implements restoration from an existing backup. Implementing this is quite simple,
23 | you need to define @apidoc[RestoreClientInterface.retrieveBackupKeys](RestoreClientInterface) which returns all valid
24 | keys to restore (i.e. don't include currently in progress backup keys) and
25 | @apidoc[RestoreClientInterface.downloadFlow](RestoreClientInterface) which is a pekko-stream `Flow` that takes
26 | a `String` which is the key and outputs the content of that key.
27 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/persistence/index.md:
--------------------------------------------------------------------------------
 1 | # Persistence Modules
 2 | 
 3 | Guardian for Apache Kafka has a modular architecture that provides support for different persistence backups.
 4 | 
 5 | @@toc { depth=2 }
 6 | 
 7 | @@@ index
 8 | 
 9 | * [design](design.md)
10 | * [S3](s3/index.md)
11 | 
12 | @@@


--------------------------------------------------------------------------------
/docs/src/main/paradox/persistence/s3/configuration.md:
--------------------------------------------------------------------------------
 1 | # S3
 2 | 
 3 | ## Reference
 4 | 
 5 | @@snip (/core-s3/src/main/resources/reference.conf)
 6 | 
 7 | Scala API doc @apidoc[kafka.s3.configs.S3]
 8 | 
 9 | ## Explanation
10 | 
11 | * `s3-headers`: See @extref:[documentation](pekko-connectors:org/apache/pekko/stream/connectors/s3/headers/index.html)
12 | * `pekko.connectors.s3`: See @extref:[documentation](pekko-connectors-docs:s3.html#configuration)
13 | * `s3-config`: Core S3 configuration
14 |     * `data-bucket`: The main S3 bucket where data is backed up and where to restore data from
15 |     * `data-bucket-prefix`: S3 prefix configuration to be used when searching for the bucket
16 |     * `error-restart-settings`: Specific retry settings when recovering from known errors in S3. See @extref:[apidoc](pekko:org/apache/pekko/stream/RestartSettings.html)
17 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/persistence/s3/index.md:
--------------------------------------------------------------------------------
 1 | # S3
 2 | 
 3 | The S3 persistence module allows you to store kafka backups on [AWS S3 Cloud Storage](https://aws.amazon.com/s3/).
 4 | 
 5 | @@project-info { projectId="coreS3" }
 6 | @@project-info { projectId="backupS3" }
 7 | @@project-info { projectId="restoreS3" }
 8 | 
 9 | @@toc { depth=2 }
10 | 
11 | @@@ index
12 | 
13 | * [configuration](configuration.md)
14 | 
15 | @@@
16 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/restore/configuration.md:
--------------------------------------------------------------------------------
 1 | # Configuration
 2 | 
 3 | ## Reference
 4 | 
 5 | @@snip (/core-restore/src/main/resources/reference.conf)
 6 | 
 7 | Scala API doc @apidoc[kafka.restore.configs.Restore]
 8 | 
 9 | ## Explanation
10 | 
11 | * `pekko.kafka.producer`: See @extref:[documentation](pekko-connectors-kafka-docs:producer.html#settings)
12 | * `pekko.kafka.producer.kafka-clients`: See @extref:[documentation](kafka-docs:documentation.html#producerconfigs)
13 | * `restore`:
14 |     * `from-when`: An `ISO-8601` time that specifies from when topics need to be restored. Note that the time used is
15 |       based on the original Kafka timestamp and **NOT** the current time.
16 |     * `override-topics`: A mapping of currently backed up topics to a new topic in the destination Kafka cluster
17 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/restore/index.md:
--------------------------------------------------------------------------------
 1 | # Restore
 2 | 
 3 | The restore module is responsible for streaming data from a backup storage location into a fresh new cluster in the
 4 | circumstance of a disaster recovery. The restore is able to work in any format of backed up files created by Guardian's
 5 | restore.
 6 | 
 7 | @@project-info { projectId="coreRestore" }
 8 | 
 9 | @@toc { depth=2 }
10 | 
11 | @@@ index
12 | 
13 | * [configuration](configuration.md)
14 | 
15 | @@@
16 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/security.md:
--------------------------------------------------------------------------------
 1 | # Security
 2 | 
 3 | ## OWASP Report
 4 | 
 5 | Guardian uses [sbt-dependency-check](https://github.com/albuch/sbt-dependency-check) to generate
 6 | a [dependency-check-report][dependency-check-report-link] which checks direct and transitive dependencies for
 7 | vulnerabilities against [NVD](https://nvd.nist.gov/) in the form of a HTML file that can be viewed in a standard
 8 | browser.
 9 | 
10 | ### Generating a report
11 | 
12 | You can use the sbt shell to generate a report at any time using
13 | 
14 | ```
15 | dependencyCheckAggregate
16 | ```
17 | 
18 | This will overwrite the @github[current report file](/dependency-check/dependency-check-report.html)
19 | 
20 | ### Suppressing false positives
21 | 
22 | Sometimes it is possible that a false positive get generated in the report. To add a false positive, first you need to
23 | open the @github[report file](/dependency-check/dependency-check-report.html) in a supported browser. In the list of found vulnerabilities there
24 | should be a suppress button which when clicked displays a popup containing an `XML` suppression entry. You then add
25 | that `<suppress>` tag entry to the
26 | existing [suppression-file](https://github.com/aiven/guardian-for-apache-kafka/edit/main/dependency-check/suppression.xml)
27 | . Finally, regenerate the report again using sbt's `dependencyCheckAggregate`
28 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/testing/index.md:
--------------------------------------------------------------------------------
 1 | # Testing
 2 | 
 3 | As much as possible, Guardian for Apache Kafka aims to provide as little friction as possible to run tests (ideally you
 4 | should be able to run tests directly and only in SBT). As an example this means avoiding handwritten shell scripts to
 5 | set up environments since this typically doesn't play well with IDE integrations such
 6 | as [Intellij IDEA](https://www.jetbrains.com/idea/) or [Metals](https://scalameta.org/metals/) integrated SBT test.
 7 | runner.
 8 | 
 9 | ## ScalaTest
10 | 
11 | Guardian for Apache Kafka uses [scalatest](https://www.scalatest.org/) as its testing framework. The primary reasons for
12 | using this testing framework are
13 | 
14 | * It's the most supported testing framework in Scala, so much so that its considered a critical dependency whenever a
15 |   new Scala release is made
16 | * It provides very handy utilities for testing asynchronous code, for example a
17 |   @extref:[PatienceConfig](scalatest:concurrent/AbstractPatienceConfiguration$PatienceConfig.html)
18 |   that provides efficient polling of Scala futures with configurable scalable timeouts and intervals.
19 | * Pekko provides @extref:[Testkit](pekko-docs:testing.html#asynchronous-testing-testkit) with direct integration into
20 |   ScalaTest for easy testing of pekko-streams.
21 | 
22 | ### Property based tests
23 | 
24 | Guardian for Apache Kafka emphasises using property based testing over unit based tests. This is mainly due
25 | to the fact that property based tests often reveal more problems due to covering more cases compared to unit
26 | based tests. Here are more [details](https://www.scalatest.org/user_guide/generator_driven_property_checks)
27 | on how property based testing works with Scala.
28 | 
29 | Like most random data generation, ScalaTest/ScalaCheck relies on an initial seed to deterministically generate
30 | the data. When a test fails the seed for the failing test is automatically shown (search for `Init Seed: `).
31 | If you want to specify the seed to regenerate the exact same data that caused the test to fail, you need to
32 | specify it as a test argument in `sbt`
33 | 
34 | ```sbt
35 | Test / testOptions += Tests.Argument(TestFrameworks.ScalaTest, "-S", "7832168009826873070")
36 | ```
37 | 
38 | where `7832168009826873070` happens to be the seed
39 | 
40 | This argument can be put into any of the projects within the @github[build](/build.sbt). For example if you
41 | want to only specify the speed in the `core` project you can place it like so
42 | 
43 | ```sbt
44 | lazy val core = project
45 |   .in(file("core"))
46 |   .settings(
47 |     librarySettings,
48 |     name := s"$baseName-core",
49 |     Test / testOptions += Tests.Argument(TestFrameworks.ScalaTest, "-S", "7832168009826873070"),
50 | ```
51 | 
52 | Whereas if you want it to apply globally you can just place it in the `guardian` project.
53 | 
54 | ## Running test/s until failure
55 | 
56 | When diagnosing flaky tests it's very useful to be able to run a test until it fails which sbt allows you to
57 | do with [commands](https://www.scala-sbt.org/1.x/docs/Commands.html). Doing this using a sbt command
58 | is far quicker than other options such a shell script since you don't have to deal with startup time cost for
59 | every test run.
60 | 
61 | This is what the base command looks like
62 | 
63 | ```sbt
64 | commands += Command.command("testUntilFailed") { state =>
65 |   "test" :: "testUntilFailed" :: state
66 | }
67 | ```
68 | 
69 | The command will recursively call a specific task (in this case `test`) until it fails. For it to work with
70 | Guardin for Apache Kafka's @github[build](/build.sbt), you need to place it as a setting
71 | within the `guardian` project.
72 | 
73 | Note that this works with any command, not just `test`. For example if you want to only run a single test
74 | suite until failure you can do
75 | 
76 | ```sbt
77 | commands += Command.command("testUntilFailed") { state =>
78 |   "backupS3/testOnly io.aiven.guardian.kafka.backup.s3.MockedKafkaClientBackupClientSpec" :: "testUntilFailed" :: state
79 | }
80 | ```
81 | 
82 | Once specified in the @github[build](/build.sbt) file you can then run `testUntilFailed` within the sbt shell.
83 | 
84 | ## TestContainers
85 | 
86 | [testcontainers](https://www.testcontainers.org/) along with the Scala
87 | wrapper [testcontainers-scala](https://github.com/testcontainers/testcontainers-scala) is used to automate the spinning
88 | up of [docker](https://www.docker.com/) whenever the relevant test is run. As long as you have docker installed on your
89 | system you souldn't have to worry about anhything.
90 | 
91 | @@toc { depth=2 }
92 | 
93 | @@@ index
94 | 
95 | * [s3](s3.md)
96 | 
97 | @@@
98 | 


--------------------------------------------------------------------------------
/docs/src/main/paradox/testing/s3.md:
--------------------------------------------------------------------------------
 1 | # S3 - Testing
 2 | 
 3 | For tests that run against the [AWS S3 service](https://aws.amazon.com/s3/) you need to provide the relevant credentials
 4 | to S3. The most typical way to provide these credentials is with the usage of environment variables, e.g.
 5 | 
 6 | ```shell
 7 | export PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_PROVIDER=static
 8 | export PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_ACCESS_KEY_ID="my key"
 9 | export PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_SECRET_ACCESS_KEY="my secret"
10 | export PEKKO_CONNECTORS_S3_REGION_PROVIDER=static
11 | export PEKKO_CONNECTORS_S3_REGION_DEFAULT_REGION=eu-central-1
12 | ```
13 | 
14 | ## Utilities
15 | 
16 | Guardian provides a utility to help deal with running S3 related tests. Due to the possibility of this tool
17 | making unintentional consequences to your S3 account, it needs to be manually run in sbt. To run the tool
18 | without any parameters do this
19 | 
20 | ```sh
21 | sbt "coreS3/test:runMain io.aiven.guardian.kafka.s3.Main"
22 | ```
23 | 
24 | Current commands
25 | 
26 | * `cleanup-buckets`: Helps in cleaning up S3 buckets that have been inadvertently left over by tests.
27 | 
28 | ## Tagging S3 Tests
29 | 
30 | Due to a current limitation where there is no way to expose Github secrets to PR's made from external forks, tests which
31 | run against S3 need to be @extref:[Tagged](scalatest:Tag.html)
32 | using @github[RealS3Available](/core-s3/src/test/scala/io/aiven/guardian/kafka/s3/S3Spec.scala#L45-L48).
33 | 


--------------------------------------------------------------------------------
/project/LicenseReport.scala:
--------------------------------------------------------------------------------
 1 | import sbt._
 2 | import sbtlicensereport.SbtLicenseReport
 3 | import sbtlicensereport.SbtLicenseReport.autoImportImpl._
 4 | import sbtlicensereport.license.{DepModuleInfo, MarkDown}
 5 | 
 6 | object LicenseReport extends AutoPlugin {
 7 | 
 8 |   override lazy val projectSettings = Seq(
 9 |     licenseReportTypes      := Seq(MarkDown),
10 |     licenseReportMakeHeader := (language => language.header1("License Report")),
11 |     licenseConfigurations   := Set("compile", "test", "provided"),
12 |     licenseDepExclusions := {
13 |       case dep: DepModuleInfo if dep.organization == "io.aiven" && dep.name.contains("guardian") =>
14 |         true // Inter guardian project dependencies are pointless
15 |       case DepModuleInfo(_, "scala-library", _) => true // Scala library is part of Scala language
16 |       case DepModuleInfo(_, "scala-reflect", _) => true // Scala reflect is part of Scala language
17 |     },
18 |     licenseReportColumns := Seq(Column.Category, Column.License, Column.Dependency, Column.Configuration)
19 |   )
20 | 
21 |   override def requires = plugins.JvmPlugin && SbtLicenseReport
22 | 
23 |   override def trigger = allRequirements
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.10.0
2 | 


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
 1 | addSbtPlugin("org.scalameta"                     % "sbt-scalafmt"             % "2.5.2")
 2 | addSbtPlugin("com.lightbend.paradox"             % "sbt-paradox"              % "0.10.7")
 3 | addSbtPlugin("com.lightbend.paradox"             % "sbt-paradox-apidoc"       % "1.1.0")
 4 | addSbtPlugin("com.lightbend.paradox"             % "sbt-paradox-project-info" % "3.0.1")
 5 | addSbtPlugin("com.github.sbt"                    % "sbt-unidoc"               % "0.5.0")
 6 | addSbtPlugin("com.github.sbt"                    % "sbt-ghpages"              % "0.8.0")
 7 | addSbtPlugin("com.thoughtworks.sbt-api-mappings" % "sbt-api-mappings"         % "3.0.2")
 8 | addSbtPlugin("com.github.sbt"                    % "sbt-site-paradox"         % "1.7.0")
 9 | addSbtPlugin("com.github.sbt"                    % "sbt-native-packager"      % "1.10.0")
10 | addSbtPlugin("com.github.sbt"                    % "sbt-github-actions"       % "0.23.0")
11 | addSbtPlugin("com.github.sbt"                    % "sbt-pgp"                  % "2.2.1")
12 | addSbtPlugin("com.github.sbt"                    % "sbt-release"              % "1.4.0")
13 | addSbtPlugin("ch.epfl.scala"                     % "sbt-scalafix"             % "0.12.1")
14 | addSbtPlugin("org.scoverage"                     % "sbt-scoverage"            % "2.0.11")
15 | addSbtPlugin("org.scoverage"                     % "sbt-coveralls"            % "1.3.11")
16 | addSbtPlugin("net.vonbuchholtz"                  % "sbt-dependency-check"     % "5.1.0")
17 | addSbtPlugin("com.github.sbt"                    % "sbt-license-report"       % "1.5.0")
18 | 
19 | // This is here to bump dependencies for sbt-paradox/sbt-site, see
20 | // https://github.com/sirthias/parboiled/issues/175, https://github.com/sirthias/parboiled/issues/128 and
21 | // https://github.com/sirthias/parboiled/pull/195
22 | libraryDependencies ++= Seq(
23 |   "org.parboiled" %% "parboiled-scala" % "1.4.1",
24 |   "org.parboiled"  % "parboiled-java"  % "1.4.1"
25 | )
26 | 
27 | // See https://github.com/akka/akka-http/pull/3995 and https://github.com/akka/akka-http/pull/3995#issuecomment-1026978593
28 | libraryDependencySchemes += "org.scala-lang.modules" %% "scala-xml" % "always"
29 | 


--------------------------------------------------------------------------------
/project/project-info.conf:
--------------------------------------------------------------------------------
 1 | project-info {
 2 |   version: "current"
 3 |   labels: "https://github.com/aiven/guardian-for-apache-kafka/labels/p%3A"
 4 |   scaladoc: "https://aiven.github.io/guardian-for-apache-kafka/api/"${project-info.version}"/io/aiven/guardian/"
 5 |   shared-info {
 6 |     jdk-versions: ["Adopt OpenJDK 11", "Adopt OpenJDK 17"]
 7 |     issues: {
 8 |       url: "https://github.com/aiven/guardian-for-apache-kafka/issues"
 9 |       text: "Github issues"
10 |     }
11 |     release-notes: {
12 |       url: "https://github.com/aiven/guardian-for-apache-kafka/releases"
13 |       text: "GitHub releases"
14 |     }
15 |   }
16 |   backupS3: ${project-info.shared-info} {
17 |     title: "Backup S3"
18 |     jpms-name: "io.aiven.guardian.kafka.backup.s3"
19 |     api-docs: [
20 |       {
21 |         url: ${project-info.scaladoc}"kafka/backup/s3/index.html"
22 |         text: "API (Scaladoc)"
23 |       }
24 |     ]
25 |   }
26 |   cliBackup: ${project-info.shared-info} {
27 |     title: "CLI Backup"
28 |     jpms-name: "io.aiven.guardian.kafka.backup"
29 |   }
30 |   cliRestore: ${project-info.shared-info} {
31 |     title: "CLI Restore"
32 |     jpms-name: "io.aiven.guardian.kafka.restore"
33 |   }
34 |   core: ${project-info.shared-info} {
35 |     title: "Core"
36 |     jpms-name: "io.aiven.guardian.kafka"
37 |     api-docs: [
38 |       {
39 |         url: ${project-info.scaladoc}"kafka/index.html"
40 |         text: "API (Scaladoc)"
41 |       }
42 |     ]
43 |   }
44 |   coreBackup: ${project-info.shared-info} {
45 |     title: "Core Backup"
46 |     jpms-name: "io.aiven.guardian.kafka.backup"
47 |     api-docs: [
48 |       {
49 |         url: ${project-info.scaladoc}"kafka/backup/index.html"
50 |         text: "API (Scaladoc)"
51 |       }
52 |     ]
53 |   }
54 |   coreCli: ${project-info.shared-info} {
55 |     title: "Core CLI"
56 |     jpms-name: "io.aiven.guardian.cli"
57 |   }
58 |   coreRestore: ${project-info.shared-info} {
59 |     title: "Core Restore"
60 |     jpms-name: "io.aiven.guardian.kafka.restore"
61 |     api-docs: [
62 |       {
63 |         url: ${project-info.scaladoc}"kafka/restore/index.html"
64 |         text: "API (Scaladoc)"
65 |       }
66 |     ]
67 |   }
68 |   coreS3: ${project-info.shared-info} {
69 |     title: "Core S3"
70 |     jpms-name: "io.aiven.guardian.kafka.restore"
71 |     api-docs: [
72 |       {
73 |         url: ${project-info.scaladoc}"kafka/s3/index.html"
74 |         text: "API (Scaladoc)"
75 |       }
76 |     ]
77 |   }
78 |   restoreS3: ${project-info.shared-info} {
79 |     title: "Restore S3"
80 |     jpms-name: "io.aiven.guardian.kafka.restore.s3"
81 |     api-docs: [
82 |       {
83 |         url: ${project-info.scaladoc}"kafka/restore/s3/index.html"
84 |         text: "API (Scaladoc)"
85 |       }
86 |     ]
87 |   }
88 | }
89 | 


--------------------------------------------------------------------------------
/restore-gcs/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aiven-Open/guardian-for-apache-kafka/9fadf3388140820b161cf28744d1587b91bf0776/restore-gcs/.gitkeep


--------------------------------------------------------------------------------
/restore-s3/src/main/scala/io/aiven/guardian/kafka/restore/s3/RestoreClient.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.restore.s3
 2 | 
 3 | import io.aiven.guardian.kafka.configs.KafkaCluster
 4 | import io.aiven.guardian.kafka.restore.KafkaProducerInterface
 5 | import io.aiven.guardian.kafka.restore.RestoreClientInterface
 6 | import io.aiven.guardian.kafka.restore.configs.Restore
 7 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config}
 8 | import org.apache.pekko
 9 | 
10 | import scala.concurrent.ExecutionContext
11 | import scala.concurrent.Future
12 | 
13 | import pekko.NotUsed
14 | import pekko.actor.ActorSystem
15 | import pekko.stream.connectors.s3.S3Attributes
16 | import pekko.stream.connectors.s3.S3Headers
17 | import pekko.stream.connectors.s3.S3Settings
18 | import pekko.stream.connectors.s3.scaladsl.S3
19 | import pekko.stream.scaladsl.Flow
20 | import pekko.stream.scaladsl.Sink
21 | import pekko.util.ByteString
22 | 
23 | class RestoreClient[T <: KafkaProducerInterface](maybeS3Settings: Option[S3Settings])(implicit
24 |     override val kafkaProducerInterface: T,
25 |     override val restoreConfig: Restore,
26 |     override val kafkaClusterConfig: KafkaCluster,
27 |     override val system: ActorSystem,
28 |     s3Config: S3Config,
29 |     s3Headers: S3Headers
30 | ) extends RestoreClientInterface[T] {
31 | 
32 |   override def retrieveBackupKeys: Future[List[String]] = {
33 |     implicit val ec: ExecutionContext = system.dispatcher
34 | 
35 |     val base = S3.listBucket(s3Config.dataBucket, s3Config.dataBucketPrefix, s3Headers)
36 |     for {
37 |       bucketContents <- maybeS3Settings
38 |                           .fold(base)(s3Settings => base.withAttributes(S3Attributes.settings(s3Settings)))
39 |                           .runWith(Sink.collection)
40 |     } yield bucketContents.map(_.key).toList
41 |   }
42 | 
43 |   override def downloadFlow: Flow[String, ByteString, NotUsed] =
44 |     Flow[String]
45 |       .flatMapConcat { key =>
46 |         val base = S3.getObject(s3Config.dataBucket, key, None, None, s3Headers)
47 |         maybeS3Settings
48 |           .fold(base)(s3Settings => base.withAttributes(S3Attributes.settings(s3Settings)))
49 |       }
50 | 
51 | }
52 | 


--------------------------------------------------------------------------------
/restore-s3/src/test/scala/io/aiven/guardian/kafka/restore/s3/RealS3GzipCompressionRestoreClientSpec.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.restore.s3
 2 | 
 3 | import io.aiven.guardian.kafka.backup.configs.Compression
 4 | import io.aiven.guardian.kafka.models.Gzip
 5 | import io.aiven.guardian.pekko.AnyPropTestKit
 6 | import org.apache.pekko.actor.ActorSystem
 7 | 
 8 | class RealS3GzipCompressionRestoreClientSpec
 9 |     extends AnyPropTestKit(ActorSystem("RealS3GzipCompressionRestoreClientSpec"))
10 |     with RealS3RestoreClientTest {
11 |   override val compression: Option[Compression] = Some(Compression(Gzip, None))
12 | }
13 | 


--------------------------------------------------------------------------------
/restore-s3/src/test/scala/io/aiven/guardian/kafka/restore/s3/RealS3RestoreClientSpec.scala:
--------------------------------------------------------------------------------
 1 | package io.aiven.guardian.kafka.restore.s3
 2 | 
 3 | import io.aiven.guardian.kafka.backup.configs.Compression
 4 | import io.aiven.guardian.pekko.AnyPropTestKit
 5 | import org.apache.pekko.actor.ActorSystem
 6 | 
 7 | class RealS3RestoreClientSpec
 8 |     extends AnyPropTestKit(ActorSystem("RealS3RestoreClientSpec"))
 9 |     with RealS3RestoreClientTest {
10 |   override val compression: Option[Compression] = None
11 | }
12 | 


--------------------------------------------------------------------------------