├── .github
├── ISSUE_TEMPLATE
│ ├── 01_question.md
│ ├── 02_bug.md
│ └── 03_feature.md
├── PULL_REQUEST_TEMPLATE.md
└── workflows
│ ├── ci.yml
│ ├── clean.yml
│ ├── dependency-graph.yml
│ ├── format.yml
│ └── scala-steward.yml
├── .gitignore
├── .scala-steward.conf
├── .scalafix.conf
├── .scalafmt.conf
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── SECURITY.md
├── backup-gcs
└── src
│ └── main
│ └── scala
│ └── io
│ └── aiven
│ └── guardian
│ └── kafka
│ └── backup
│ └── gcs
│ └── BackupClient.scala
├── backup-s3
└── src
│ ├── main
│ └── scala
│ │ └── io
│ │ └── aiven
│ │ └── guardian
│ │ └── kafka
│ │ └── backup
│ │ └── s3
│ │ └── BackupClient.scala
│ └── test
│ └── scala
│ └── io
│ └── aiven
│ └── guardian
│ └── kafka
│ └── backup
│ └── s3
│ ├── BackupClientChunkState.scala
│ ├── BackupClientSpec.scala
│ ├── KafkaConsumerWithKillSwitch.scala
│ ├── MinioBackupClientSpec.scala
│ ├── MockedKafkaClientBackupConsumerSpec.scala
│ ├── MockedS3BackupClientInterface.scala
│ ├── RealS3BackupClientSpec.scala
│ ├── RealS3BackupClientTest.scala
│ └── RealS3GzipCompressionBackupClientSpec.scala
├── build.sbt
├── cli-backup
└── src
│ ├── main
│ └── scala
│ │ └── io
│ │ └── aiven
│ │ └── guardian
│ │ └── kafka
│ │ └── backup
│ │ ├── App.scala
│ │ ├── BackupApp.scala
│ │ ├── Main.scala
│ │ └── S3App.scala
│ └── test
│ ├── resources
│ └── logback.xml
│ └── scala
│ └── io
│ └── aiven
│ └── guardian
│ └── kafka
│ └── backup
│ └── CliSpec.scala
├── cli-compaction
└── .gitkeep
├── cli-restore
└── src
│ ├── main
│ └── scala
│ │ └── io
│ │ └── aiven
│ │ └── guardian
│ │ └── kafka
│ │ └── restore
│ │ ├── App.scala
│ │ ├── Main.scala
│ │ ├── RestoreApp.scala
│ │ └── S3App.scala
│ └── test
│ └── scala
│ └── io
│ └── aiven
│ └── guardian
│ └── kafka
│ └── restore
│ └── CliSpec.scala
├── compaction-gcs
└── src
│ └── main
│ ├── resources
│ └── reference.conf
│ └── scala
│ └── io
│ └── aiven
│ └── guardian
│ └── kafka
│ └── compaction
│ └── gcs
│ ├── Config.scala
│ ├── StorageClient.scala
│ └── models
│ └── StorageConfig.scala
├── compaction-s3
└── src
│ └── main
│ ├── resources
│ └── reference.conf
│ └── scala
│ └── io
│ └── aiven
│ └── guardian
│ └── kafka
│ └── compaction
│ └── s3
│ ├── Config.scala
│ ├── StorageClient.scala
│ └── models
│ └── StorageConfig.scala
├── core-backup
└── src
│ ├── main
│ ├── resources
│ │ └── reference.conf
│ └── scala
│ │ └── io
│ │ └── aiven
│ │ └── guardian
│ │ └── kafka
│ │ └── backup
│ │ ├── BackupClientInterface.scala
│ │ ├── Config.scala
│ │ ├── KafkaConsumer.scala
│ │ ├── KafkaConsumerInterface.scala
│ │ └── configs
│ │ ├── Backup.scala
│ │ ├── Compression.scala
│ │ └── TimeConfiguration.scala
│ └── test
│ └── scala
│ └── io
│ └── aiven
│ └── guardian
│ └── kafka
│ └── backup
│ ├── BackupClientControlWrapper.scala
│ ├── BackupClientInterfaceSpec.scala
│ ├── BackupClientInterfaceTest.scala
│ ├── CompressionSpec.scala
│ ├── ConfigSpec.scala
│ ├── ConfigurationChangeRestartSpec.scala
│ ├── GzipCompressionBackupClientInterfaceSpec.scala
│ ├── MockedBackupClientInterface.scala
│ └── MockedKafkaConsumerInterface.scala
├── core-cli
└── src
│ └── main
│ ├── resources
│ ├── application.conf
│ └── logback.xml
│ └── scala
│ └── io
│ └── aiven
│ └── guardian
│ └── cli
│ ├── MainUtils.scala
│ ├── PekkoSettings.scala
│ ├── arguments
│ ├── PropertiesOpt.scala
│ └── StorageOpt.scala
│ └── options
│ └── Options.scala
├── core-compaction
└── src
│ └── main
│ └── scala
│ └── io
│ └── aiven
│ └── guardian
│ └── kafka
│ └── compaction
│ ├── DatabaseInterface.scala
│ ├── PostgresJDBCDatabase.scala
│ └── StorageInterface.scala
├── core-gcs
└── src
│ └── main
│ ├── resources
│ └── reference.conf
│ └── scala
│ └── io
│ └── aiven
│ └── guardian
│ └── kafka
│ └── gcs
│ ├── Config.scala
│ ├── configs
│ └── GCS.scala
│ └── errors
│ └── GCSErrors.scala
├── core-restore
└── src
│ ├── main
│ ├── resources
│ │ └── reference.conf
│ └── scala
│ │ └── io
│ │ └── aiven
│ │ └── guardian
│ │ └── kafka
│ │ └── restore
│ │ ├── Config.scala
│ │ ├── KafkaProducer.scala
│ │ ├── KafkaProducerInterface.scala
│ │ ├── RestoreClientInterface.scala
│ │ └── configs
│ │ └── Restore.scala
│ └── test
│ └── scala
│ └── io
│ └── aiven
│ └── guardian
│ └── kafka
│ └── restore
│ ├── ConfigSpec.scala
│ ├── GzipCompressionRestoreClientInterfaceSpec.scala
│ ├── MockedKafkaProducerInterface.scala
│ ├── MockedRestoreClientInterface.scala
│ ├── RestoreClientInterfaceSpec.scala
│ └── RestoreClientInterfaceTest.scala
├── core-s3
└── src
│ ├── main
│ ├── resources
│ │ └── reference.conf
│ └── scala
│ │ └── io
│ │ └── aiven
│ │ └── guardian
│ │ └── kafka
│ │ └── s3
│ │ ├── Config.scala
│ │ ├── configs
│ │ └── S3.scala
│ │ └── errors
│ │ └── S3Errors.scala
│ └── test
│ ├── resources
│ └── logback.xml
│ └── scala
│ ├── io
│ └── aiven
│ │ └── guardian
│ │ └── kafka
│ │ └── s3
│ │ ├── Generators.scala
│ │ ├── Main.scala
│ │ ├── MinioContainer.scala
│ │ ├── MinioS3Test.scala
│ │ ├── PureConfigS3HeadersSpec.scala
│ │ ├── S3Spec.scala
│ │ └── S3TestUtils.scala
│ └── org
│ └── apache
│ └── pekko
│ └── stream
│ └── connectors
│ └── s3
│ └── GeneratorsSpec.scala
├── core
├── README.md
└── src
│ ├── main
│ ├── resources
│ │ └── reference.conf
│ └── scala
│ │ └── io
│ │ └── aiven
│ │ └── guardian
│ │ └── kafka
│ │ ├── Config.scala
│ │ ├── Errors.scala
│ │ ├── ExtensionsMethods.scala
│ │ ├── PureConfigUtils.scala
│ │ ├── Utils.scala
│ │ ├── codecs
│ │ └── Circe.scala
│ │ ├── configs
│ │ └── KafkaCluster.scala
│ │ └── models
│ │ ├── BackupObjectMetadata.scala
│ │ ├── CompressionType.scala
│ │ └── ReducedConsumerRecord.scala
│ └── test
│ ├── resources
│ ├── application.conf
│ └── logback.xml
│ └── scala
│ └── io
│ └── aiven
│ └── guardian
│ ├── kafka
│ ├── ConfigSpec.scala
│ ├── Generators.scala
│ ├── KafkaClusterTest.scala
│ └── TestUtils.scala
│ └── pekko
│ ├── AnyPropTestKit.scala
│ ├── PekkoHttpTestKit.scala
│ └── PekkoStreamTestKit.scala
├── dependency-check
├── dependency-check-report.html
└── suppression.xml
├── docs
└── src
│ └── main
│ └── paradox
│ ├── application
│ ├── design.md
│ ├── index.md
│ ├── logging.md
│ └── packaging.md
│ ├── backup
│ ├── configuration.md
│ ├── design.md
│ └── index.md
│ ├── ci.md
│ ├── doc-generation.md
│ ├── general-architecture
│ ├── index.md
│ └── logging.md
│ ├── index.md
│ ├── overview.md
│ ├── persistence
│ ├── design.md
│ ├── index.md
│ └── s3
│ │ ├── configuration.md
│ │ └── index.md
│ ├── restore
│ ├── configuration.md
│ └── index.md
│ ├── security.md
│ └── testing
│ ├── index.md
│ └── s3.md
├── project
├── LicenseReport.scala
├── build.properties
├── plugins.sbt
└── project-info.conf
├── restore-gcs
└── .gitkeep
└── restore-s3
└── src
├── main
└── scala
│ └── io
│ └── aiven
│ └── guardian
│ └── kafka
│ └── restore
│ └── s3
│ └── RestoreClient.scala
└── test
└── scala
└── io
└── aiven
└── guardian
└── kafka
└── restore
└── s3
├── RealS3GzipCompressionRestoreClientSpec.scala
├── RealS3RestoreClientSpec.scala
└── RealS3RestoreClientTest.scala
/.github/ISSUE_TEMPLATE/01_question.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: ❓ Ask a question
3 | about: Got stuck or missing something from the docs? Ask away!
4 | ---
5 |
6 | # What can we help you with?
7 |
8 |
9 |
10 | # Where would you expect to find this information?
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/02_bug.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: 🐜 Report a bug
3 | about: Spotted a problem? Let us know
4 | ---
5 |
6 | # What happened?
7 |
8 |
9 |
10 | # What did you expect to happen?
11 |
12 |
13 |
14 | # What else do we need to know?
15 |
16 |
17 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/03_feature.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: 💡 Feature suggestion
3 | about: What would make this even better?
4 | ---
5 |
6 | # What is currently missing?
7 |
8 |
9 |
10 | # How could this be improved?
11 |
12 |
13 |
14 | # Is this a feature you would work on yourself?
15 |
16 | * [ ] I plan to open a pull request for this feature
17 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |
2 | # About this change - What it does
3 |
4 |
5 |
6 |
7 | Resolves: #xxxxx
8 |
9 | # Why this way
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | # This file was automatically generated by sbt-github-actions using the
2 | # githubWorkflowGenerate task. You should add and commit this file to
3 | # your git repository. It goes without saying that you shouldn't edit
4 | # this file by hand! Instead, if you wish to make changes, you should
5 | # change your sbt build configuration to revise the workflow description
6 | # to meet your needs, then regenerate this file.
7 |
8 | name: Continuous Integration
9 |
10 | on:
11 | pull_request:
12 | branches: [main]
13 | push:
14 | branches: [main]
15 |
16 | permissions:
17 | id-token: write
18 |
19 | env:
20 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
21 |
22 | jobs:
23 | build:
24 | name: Build and Test
25 | strategy:
26 | matrix:
27 | os: [ubuntu-latest]
28 | scala: [2.13.13]
29 | java: [temurin@11]
30 | runs-on: ${{ matrix.os }}
31 | steps:
32 | - name: Checkout current branch (full)
33 | uses: actions/checkout@v4
34 | with:
35 | fetch-depth: 0
36 |
37 | - name: Setup Java (temurin@11)
38 | if: matrix.java == 'temurin@11'
39 | uses: actions/setup-java@v4
40 | with:
41 | distribution: temurin
42 | java-version: 11
43 | cache: sbt
44 |
45 | - name: 'Linter: Scalafix checks'
46 | run: sbt '++ ${{ matrix.scala }}' 'scalafixAll --check'
47 |
48 | - name: Configure AWS credentials
49 | uses: aws-actions/configure-aws-credentials@v2
50 | with:
51 | role-to-assume: 'arn:aws:iam::310017459104:role/aiven-guardian-github-action'
52 | aws-region: us-west-2
53 | role-duration-seconds: 7200
54 |
55 | - name: Check that workflows are up to date
56 | run: sbt '++ ${{ matrix.scala }}' githubWorkflowCheck
57 |
58 | - name: Build project
59 | env:
60 | PEKKO_CONNECTORS_S3_REGION_PROVIDER: default
61 | PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_PROVIDER: default
62 | run: sbt '++ ${{ matrix.scala }}' clean coverage test
63 |
64 | - name: Compile docs
65 | run: sbt '++ ${{ matrix.scala }}' docs/makeSite
66 |
67 | - name: Upload coverage data to Coveralls
68 | env:
69 | COVERALLS_REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
70 | COVERALLS_FLAG_NAME: Scala ${{ matrix.scala }}
71 | run: sbt '++ ${{ matrix.scala }}' coverageReport coverageAggregate coveralls
72 |
73 | - name: Compress target directories
74 | run: tar cf targets.tar target cli-compaction/target compaction-gcs/target backup-s3/target compaction-s3/target docs/target cli-backup/target core-restore/target restore-s3/target core-gcs/target core-compaction/target core-s3/target core-backup/target core-cli/target cli-restore/target core/target restore-gcs/target backup-gcs/target project/target
75 |
76 | - name: Upload target directories
77 | uses: actions/upload-artifact@v4
78 | with:
79 | name: target-${{ matrix.os }}-${{ matrix.scala }}-${{ matrix.java }}
80 | path: targets.tar
81 |
82 | publish:
83 | name: Publish Artifacts
84 | needs: [build]
85 | if: github.event_name != 'pull_request' && (github.ref == 'refs/heads/main')
86 | strategy:
87 | matrix:
88 | os: [ubuntu-latest]
89 | scala: [2.13.13]
90 | java: [temurin@11]
91 | runs-on: ${{ matrix.os }}
92 | steps:
93 | - name: Checkout current branch (full)
94 | uses: actions/checkout@v4
95 | with:
96 | fetch-depth: 0
97 |
98 | - name: Setup Java (temurin@11)
99 | if: matrix.java == 'temurin@11'
100 | uses: actions/setup-java@v4
101 | with:
102 | distribution: temurin
103 | java-version: 11
104 | cache: sbt
105 |
106 | - name: Download target directories (2.13.13)
107 | uses: actions/download-artifact@v4
108 | with:
109 | name: target-${{ matrix.os }}-2.13.13-${{ matrix.java }}
110 |
111 | - name: Inflate target directories (2.13.13)
112 | run: |
113 | tar xf targets.tar
114 | rm targets.tar
115 |
116 | - run: |
117 | git config --global user.name "$(git --no-pager log --format=format:'%an' -n 1)"
118 | git config --global user.email "$(git --no-pager log --format=format:'%ae' -n 1)"
119 |
120 | - uses: webfactory/ssh-agent@v0.5.4
121 | with:
122 | ssh-private-key: ${{ secrets.GH_PAGES_SSH_PRIVATE_KEY }}
123 |
124 | - run: sbt docs/ghpagesPushSite
125 |
--------------------------------------------------------------------------------
/.github/workflows/clean.yml:
--------------------------------------------------------------------------------
1 | # This file was automatically generated by sbt-github-actions using the
2 | # githubWorkflowGenerate task. You should add and commit this file to
3 | # your git repository. It goes without saying that you shouldn't edit
4 | # this file by hand! Instead, if you wish to make changes, you should
5 | # change your sbt build configuration to revise the workflow description
6 | # to meet your needs, then regenerate this file.
7 |
8 | name: Clean
9 |
10 | on: push
11 |
12 | jobs:
13 | delete-artifacts:
14 | name: Delete Artifacts
15 | runs-on: ubuntu-latest
16 | env:
17 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
18 | steps:
19 | - name: Delete artifacts
20 | shell: bash {0}
21 | run: |
22 | # Customize those three lines with your repository and credentials:
23 | REPO=${GITHUB_API_URL}/repos/${{ github.repository }}
24 |
25 | # A shortcut to call GitHub API.
26 | ghapi() { curl --silent --location --user _:$GITHUB_TOKEN "$@"; }
27 |
28 | # A temporary file which receives HTTP response headers.
29 | TMPFILE=$(mktemp)
30 |
31 | # An associative array, key: artifact name, value: number of artifacts of that name.
32 | declare -A ARTCOUNT
33 |
34 | # Process all artifacts on this repository, loop on returned "pages".
35 | URL=$REPO/actions/artifacts
36 | while [[ -n "$URL" ]]; do
37 |
38 | # Get current page, get response headers in a temporary file.
39 | JSON=$(ghapi --dump-header $TMPFILE "$URL")
40 |
41 | # Get URL of next page. Will be empty if we are at the last page.
42 | URL=$(grep '^Link:' "$TMPFILE" | tr ',' '\n' | grep 'rel="next"' | head -1 | sed -e 's/.*/' -e 's/>.*//')
43 | rm -f $TMPFILE
44 |
45 | # Number of artifacts on this page:
46 | COUNT=$(( $(jq <<<$JSON -r '.artifacts | length') ))
47 |
48 | # Loop on all artifacts on this page.
49 | for ((i=0; $i < $COUNT; i++)); do
50 |
51 | # Get name of artifact and count instances of this name.
52 | name=$(jq <<<$JSON -r ".artifacts[$i].name?")
53 | ARTCOUNT[$name]=$(( $(( ${ARTCOUNT[$name]} )) + 1))
54 |
55 | id=$(jq <<<$JSON -r ".artifacts[$i].id?")
56 | size=$(( $(jq <<<$JSON -r ".artifacts[$i].size_in_bytes?") ))
57 | printf "Deleting '%s' #%d, %'d bytes\n" $name ${ARTCOUNT[$name]} $size
58 | ghapi -X DELETE $REPO/actions/artifacts/$id
59 | done
60 | done
61 |
--------------------------------------------------------------------------------
/.github/workflows/dependency-graph.yml:
--------------------------------------------------------------------------------
1 | name: Update Dependency Graph
2 | on:
3 | push:
4 | branches:
5 | - main # default branch of the project
6 | permissions:
7 | contents: write
8 | jobs:
9 | dependency-graph:
10 | name: Update Dependency Graph
11 | runs-on: ubuntu-latest
12 | steps:
13 | - uses: actions/checkout@v4
14 | - uses: scalacenter/sbt-dependency-submission@v2
15 |
--------------------------------------------------------------------------------
/.github/workflows/format.yml:
--------------------------------------------------------------------------------
1 | name: Scalafmt
2 |
3 | permissions: read-all
4 |
5 | on:
6 | pull_request:
7 | branches: ['**']
8 |
9 | jobs:
10 | build:
11 | name: Code is formatted
12 | runs-on: ubuntu-latest
13 | steps:
14 | - name: Checkout current branch (full)
15 | uses: actions/checkout@v4
16 | with:
17 | fetch-depth: 0
18 | persist-credentials: false
19 |
20 | - name: Check project is formatted
21 | uses: jrouly/scalafmt-native-action@v3
22 | with:
23 | arguments: '--list --mode diff-ref=origin/main'
24 |
--------------------------------------------------------------------------------
/.github/workflows/scala-steward.yml:
--------------------------------------------------------------------------------
1 | on:
2 | workflow_dispatch:
3 | schedule:
4 | - cron: '0 0 * * 0'
5 |
6 | name: Launch Scala Steward
7 |
8 | jobs:
9 | scala-steward:
10 | runs-on: ubuntu-22.04
11 | name: Launch Scala Steward
12 | steps:
13 | - name: Launch Scala Steward
14 | uses: scala-steward-org/scala-steward-action@v2
15 | with:
16 | github-app-id: ${{ secrets.APP_ID }}
17 | github-app-installation-id: ${{ secrets.APP_INSTALLATION_ID }}
18 | github-app-key: ${{ secrets.APP_PRIVATE_KEY }}
19 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ### SBT template
2 | # Simple Build Tool
3 | # http://www.scala-sbt.org/release/docs/Getting-Started/Directories.html#configuring-version-control
4 |
5 | dist/*
6 | target/
7 | lib_managed/
8 | src_managed/
9 | project/boot/
10 | project/plugins/project/
11 | .history
12 | .cache
13 | .lib/
14 |
15 | ### Scala template
16 | *.class
17 | *.log
18 |
--------------------------------------------------------------------------------
/.scala-steward.conf:
--------------------------------------------------------------------------------
1 | updatePullRequests = "always"
2 |
--------------------------------------------------------------------------------
/.scalafix.conf:
--------------------------------------------------------------------------------
1 | rules = [
2 | DisableSyntax, # Disables some constructs that make no semantic sense like `final val`
3 | ProcedureSyntax, # Procedure syntax in Scala is always discouraged
4 | ExplicitResultTypes, # To avoid public API breakages by mistake is good to always annotate the return types of public methods
5 | NoValInForComprehension, # `val` in for comprehensions are deprecated and shouldn't be used
6 | NoAutoTupling, # Avoids the automatic tupling in parameters
7 | RemoveUnused, # Removes unused elements
8 | LeakingImplicitClassVal, # This rule adds the private access modifier on the field of implicit value classes in order to prevent direct access.
9 | OrganizeImports # Organizes imports and removes unused ones
10 | ]
11 |
12 | ExplicitResultTypes.memberKind = [Def, Val, Var]
13 | ExplicitResultTypes.memberVisibility = [Public, Protected]
14 | ExplicitResultTypes.skipSimpleDefinitions = ['Lit', 'Term.New', 'Term.Ref']
15 | ExplicitResultTypes.fatalWarnings = true
16 | DisableSyntax.noNulls = true
17 | DisableSyntax.noReturns = true
18 | DisableSyntax.noWhileLoops = true
19 | DisableSyntax.noIsInstanceOf = true
20 | DisableSyntax.noXml = true
21 | DisableSyntax.noFinalVal = true
22 | DisableSyntax.noFinalize = true
23 | DisableSyntax.noValPatterns = true
24 | RemoveUnused.imports = false # The plugin organize imports removes unused and clashes with this
25 | OrganizeImports.groups = [
26 | "*"
27 | "scala."
28 | "re:javax?\\."
29 | ] # Reasoning for this config is to keep the more business related imports at the top, while language imports are on the bottom
30 |
--------------------------------------------------------------------------------
/.scalafmt.conf:
--------------------------------------------------------------------------------
1 | version = 3.8.2
2 | runner.dialect = scala213
3 | preset = default
4 | align.preset = more
5 | maxColumn = 120
6 | project.git = true
7 | align.openParenDefnSite = true
8 | align.openParenCallSite = true
9 | align.arrowEnumeratorGenerator = true
10 | danglingParentheses.preset = true
11 | rewrite.rules = [RedundantBraces, RedundantParens]
12 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | We as members, contributors, and leaders pledge to make participation in our
6 | community a harassment-free experience for everyone, regardless of age, body
7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
8 | identity and expression, level of experience, education, socio-economic status,
9 | nationality, personal appearance, race, religion, or sexual identity
10 | and orientation.
11 |
12 | We pledge to act and interact in ways that contribute to an open, welcoming,
13 | diverse, inclusive, and healthy community.
14 |
15 | ## Our Standards
16 |
17 | Examples of behavior that contributes to a positive environment for our
18 | community include:
19 |
20 | * Demonstrating empathy and kindness toward other people
21 | * Being respectful of differing opinions, viewpoints, and experiences
22 | * Giving and gracefully accepting constructive feedback
23 | * Accepting responsibility and apologizing to those affected by our mistakes,
24 | and learning from the experience
25 | * Focusing on what is best not just for us as individuals, but for the
26 | overall community
27 |
28 | Examples of unacceptable behavior include:
29 |
30 | * The use of sexualized language or imagery, and sexual attention or
31 | advances of any kind
32 | * Trolling, insulting or derogatory comments, and personal or political attacks
33 | * Public or private harassment
34 | * Publishing others' private information, such as a physical or email
35 | address, without their explicit permission
36 | * Other conduct which could reasonably be considered inappropriate in a
37 | professional setting
38 |
39 | ## Enforcement Responsibilities
40 |
41 | Community leaders are responsible for clarifying and enforcing our standards of
42 | acceptable behavior and will take appropriate and fair corrective action in
43 | response to any behavior that they deem inappropriate, threatening, offensive,
44 | or harmful.
45 |
46 | Community leaders have the right and responsibility to remove, edit, or reject
47 | comments, commits, code, wiki edits, issues, and other contributions that are
48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
49 | decisions when appropriate.
50 |
51 | ## Scope
52 |
53 | This Code of Conduct applies within all community spaces, and also applies when
54 | an individual is officially representing the community in public spaces.
55 | Examples of representing our community include using an official e-mail address,
56 | posting via an official social media account, or acting as an appointed
57 | representative at an online or offline event.
58 |
59 | ## Enforcement
60 |
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported to the community leaders responsible for enforcement at
63 | opensource@aiven.io.
64 | All complaints will be reviewed and investigated promptly and fairly.
65 |
66 | All community leaders are obligated to respect the privacy and security of the
67 | reporter of any incident.
68 |
69 | ## Enforcement Guidelines
70 |
71 | Community leaders will follow these Community Impact Guidelines in determining
72 | the consequences for any action they deem in violation of this Code of Conduct:
73 |
74 | ### 1. Correction
75 |
76 | **Community Impact**: Use of inappropriate language or other behavior deemed
77 | unprofessional or unwelcome in the community.
78 |
79 | **Consequence**: A private, written warning from community leaders, providing
80 | clarity around the nature of the violation and an explanation of why the
81 | behavior was inappropriate. A public apology may be requested.
82 |
83 | ### 2. Warning
84 |
85 | **Community Impact**: A violation through a single incident or series
86 | of actions.
87 |
88 | **Consequence**: A warning with consequences for continued behavior. No
89 | interaction with the people involved, including unsolicited interaction with
90 | those enforcing the Code of Conduct, for a specified period of time. This
91 | includes avoiding interactions in community spaces as well as external channels
92 | like social media. Violating these terms may lead to a temporary or
93 | permanent ban.
94 |
95 | ### 3. Temporary Ban
96 |
97 | **Community Impact**: A serious violation of community standards, including
98 | sustained inappropriate behavior.
99 |
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 |
106 | ### 4. Permanent Ban
107 |
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior, harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 |
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 |
115 | ## Attribution
116 |
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 |
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 |
124 | [homepage]: https://www.contributor-covenant.org
125 |
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Welcome!
2 |
3 | Guardian for Apache Kafka follows the [fork and pull](https://help.github.com/articles/using-pull-requests/#fork--pull)
4 | development model. You can simply fork the repository, create and checkout a new branch and commit changes to that
5 | branch and then create a pull request once you are done.
6 |
7 | Feel free to submit a PR earlier rather than later, this is recommended as it can spur discussion to see if you are on
8 | the right track. If you create a PR before its ready, we recommend using github's
9 | [draft](https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/changing-the-stage-of-a-pull-request)
10 | feature to clear indicate that a PR is still being worked on.
11 |
12 | ## Setting up development environment
13 |
14 | If you haven't already done so, before you get started you need to set up your machine for development. Guardian for
15 | Apache Kafka is written in [Scala](https://www.scala-lang.org/) so a few steps are needed.
16 |
17 | ## JDK
18 |
19 | Guardian for Apache Kafka is developed on the latest stable branch of OpenJDK. For Windows and MacOS we recommend
20 | using [AdoptOpenJDK][adopt-openjdk-link] to download the latest installer. For Linux its recommended installing
21 | OpenJDK through your distribution (but you can also use [AdoptOpenJDK][adopt-openjdk-link] as a last resort)
22 |
23 | ## Scala and sbt
24 | Once you have installed JDK having [Scala](https://www.scala-lang.org) and [sbt][sbt-link] installed is recommended.
25 | Although some IDE's (such as Intellij) automatically handle Scala and sbt installation for you, it's still recommended
26 | having a standalone version so you can compile/test/run the project without an IDE/Editor. The Scala installation also
27 | comes with its own REPL which can aid in development.
28 |
29 | We recommend following the official [Scala2 documentation](https://www.scala-lang.org/download/scala2.html) on how to
30 | install Scala
31 |
32 | ## Editors/IDE's
33 | The following editors are recommended for development with Scala. Although It's possible to use other environments since
34 | Scala is a strongly typed language using a well supported editor is beneficial.
35 |
36 | ### Intellij IDEA
37 |
38 | [Intellij IDEA](https://www.jetbrains.com/idea/) is one of the most used editors for Scala development. Upon installing
39 | of IDEA you need to install the [scala plugin](https://plugins.jetbrains.com/plugin/1347-scala) so it can recognize sbt
40 | projects. After installation of the plugin you can simply open the cloned `guardian-for-apache-kafka` and it should
41 | setup everything for you.
42 |
43 | ### Metals
44 |
45 | [Metals][metals-link] is a Scala [LSP](https://en.wikipedia.org/wiki/Language_Server_Protocol) implementation that
46 | supports various editors. The primary supported editor for [Metals][metals-link] is
47 | [Visual Studio Code](https://code.visualstudio.com/) along with relevant
48 | [marketplace plugin](https://marketplace.visualstudio.com/items?itemName=scalameta.metals).
49 |
50 | Note that other editors can also be used with metals, documentation can be found
51 | [here](https://scalameta.org/metals/docs/). [Spacemacs](https://www.spacemacs.org/) an
52 | [Emacs](https://www.gnu.org/software/emacs/) distribution also supports [Metals][metals-link] via the
53 | [Scala layer](https://develop.spacemacs.org/layers/+lang/scala/README.html)
54 |
55 | ## Formatting
56 |
57 | The codebase is formatted with [scalafmt](https://scalameta.org/scalafmt/), as such the codebase needs to be formatted
58 | before submitting a PR.
59 |
60 | Various runners for Scalafmt exist, such as
61 | * A [sbt scalafmt plugin](https://github.com/scalameta/sbt-scalafmt) that lets you run scalafmt directly within sbt using
62 | * `scalafmt` to format base scala sources
63 | * `test:scalafmt` to format test scala sources
64 | * `scalafmtSbt` to format the `build.sbt` file
65 | * IntelliJ IDEA and VSCode will automatically detect projects with scalafmt and prompt you whether to use Scalafmt. See
66 | the [scalafmt installation guide][scalafmt-installation-link] for more details
67 | * There are native builds of Scalafmt that let you run a `scalafmt` as a CLI tool, see the CLI section in
68 | [scalafmt installation guide][scalafmt-installation-link]
69 |
70 | Note that a github action exists which will check that your code is formatted whenever you create a PR. For more details
71 | read the [documentation](https://aiven.github.io/guardian-for-apache-kafka/ci.html#scalafmt)
72 |
73 | ## sbt - Compiling, Building and Testing
74 |
75 | We use [sbt][sbt-link] as the primary build tool for the project. When you run [sbt][sbt-link] by itself
76 | it will start a REPL session where you can type in commands, i.e.
77 |
78 | * `compile` will compile the entire project
79 | * `test:compile` will only compile the test sources
80 | * `test` will run the tests for the entire project
81 | * `core/compile` will only compile the `core` project. See [build.sbt](build.sbt) to get a reference for how the projects
82 | are named
83 | * `publishLocal` will publish the project into the local `~/.m2` repository
84 | * `clean` will clean all builds targets (including documentation) from the project. Note that sbt stores build
85 | in sub-directories named `target`
86 | * `reload` will reload sbt which is used when the [sbt][sbt-link] build definition is changed
87 |
88 | ## Testing
89 |
90 | As mentioned before testing is completely handled using sbt, there are no custom shell scripts that are required to set
91 | up environments unless otherwise noted in
92 | the [testing docs](https://aiven.github.io/guardian-for-apache-kafka/testing/index.html) (typically when tests run
93 | against actual services such as S3)
94 |
95 | ### Docker
96 |
97 | For integration tests Guardian for Apache Kafka uses docker to spin up services. For MacOS the best way to install
98 | docker is from the [official website](https://www.docker.com/products/docker-desktop/) whereas if you are running Linux
99 | then consult your distribution/package manager/repository.
100 |
101 | Since Guardian for Apache Kafka uses [testcontainers](https://www.testcontainers.org/) you don't need to worry about
102 | starting/stopping the docker instances manually, this is automatically handled when you run the relevant test/s.
103 |
104 | ## sbt - documentation
105 |
106 | Documentation is also built within SBT, i.e.
107 |
108 | * `docs/makeSite` will compile documentation
109 | * `docs/previewSite` will compile documentation (if needed) and open the result in your system's default browser
110 |
111 | For details about how the document generation works go
112 | [here](https://aiven.github.io/guardian-for-apache-kafka/doc-generation.html)
113 |
114 | [adopt-openjdk-link]: https://adoptopenjdk.net/
115 | [metals-link]: https://scalameta.org/metals/
116 | [scalafmt-installation-link]: https://scalameta.org/scalafmt/docs/installation.html
117 | [sbt-link]: https://www.scala-sbt.org/
118 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://github.com/aiven/guardian-for-apache-kafka/actions/workflows/ci.yml?query=branch%3Amain)
2 | [](https://www.apache.org/licenses/LICENSE-2.0)
3 | [](https://coveralls.io/github/aiven/guardian-for-apache-kafka?branch=main)
4 |
5 | # Guardian for Apache Kafka®
6 |
7 | Guardian is a backup and restore tool for Apache Kafka clusters. It is designed to continuously stream kafka topics into
8 | persistent/object storages such as S3 and also provides tools for restoring said backups.
9 |
10 | ## Documentation
11 |
12 | * [Guardian reference](https://aiven-open.github.io/guardian-for-apache-kafka/) documentation.
13 |
14 | ## Trademarks
15 |
16 | Apache Kafka is either a registered trademark or trademark of the Apache Software Foundation in the United States and/or
17 | other countries.
18 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | ## Supported Versions
4 |
5 | We release patches for security vulnerabilities. Which versions are eligible
6 | to receive such patches depend on the CVSS v3.0 Rating:
7 |
8 | | CVSS v3.0 | Supported Versions |
9 | | --------- | ----------------------------------------- |
10 | | 4.0-10.0 | Most recent release |
11 |
12 | ## Reporting a Vulnerability
13 |
14 | Please report (suspected) security vulnerabilities to our **[bug bounty
15 | program](https://bugcrowd.com/aiven-mbb-og)**. You will receive a response from
16 | us within 2 working days. If the issue is confirmed, we will release a patch as
17 | soon as possible depending on impact and complexity.
18 |
19 | ## Qualifying Vulnerabilities
20 |
21 | Any reproducible vulnerability that has a severe effect on the security or
22 | privacy of our users is likely to be in scope for the program.
23 |
24 | We generally **aren't** interested in the following issues:
25 | * Social engineering (e.g. phishing, vishing, smishing) attacks
26 | * Brute force, DoS, text injection
27 | * Missing best practices such as HTTP security headers (CSP, X-XSS, etc.),
28 | email (SPF/DKIM/DMARC records), SSL/TLS configuration.
29 | * Software version disclosure / Banner identification issues / Descriptive
30 | error messages or headers (e.g. stack traces, application or server errors).
31 | * Clickjacking on pages with no sensitive actions
32 | * Theoretical vulnerabilities where you can't demonstrate a significant
33 | security impact with a proof of concept.
34 |
--------------------------------------------------------------------------------
/backup-gcs/src/main/scala/io/aiven/guardian/kafka/backup/gcs/BackupClient.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup.gcs
2 |
3 | import io.aiven.guardian.kafka.backup.BackupClientInterface
4 | import io.aiven.guardian.kafka.backup.KafkaConsumerInterface
5 | import io.aiven.guardian.kafka.backup.configs.Backup
6 | import io.aiven.guardian.kafka.gcs.configs.{GCS => GCSConfig}
7 | import org.apache.pekko
8 |
9 | import scala.concurrent.ExecutionContext
10 | import scala.concurrent.Future
11 |
12 | import pekko.actor.ActorSystem
13 | import pekko.http.scaladsl.model.ContentTypes
14 | import pekko.stream.connectors.google.GoogleAttributes
15 | import pekko.stream.connectors.google.GoogleSettings
16 | import pekko.stream.connectors.googlecloud.storage.StorageObject
17 | import pekko.stream.connectors.googlecloud.storage.scaladsl.GCStorage
18 | import pekko.stream.scaladsl.Sink
19 | import pekko.util.ByteString
20 |
21 | // TODO: GCS implementation currently does not work correctly because of inability of current GCS implementation in
22 | // Pekko Connectors to allow us to commit Kafka cursor whenever chunks are uploaded
23 | class BackupClient[T <: KafkaConsumerInterface](maybeGoogleSettings: Option[GoogleSettings])(implicit
24 | override val kafkaClientInterface: T,
25 | override val backupConfig: Backup,
26 | override val system: ActorSystem,
27 | gcsConfig: GCSConfig
28 | ) extends BackupClientInterface[T] {
29 |
30 | override def empty: () => Future[Option[StorageObject]] = () => Future.successful(None)
31 |
32 | override type BackupResult = Option[StorageObject]
33 |
34 | override type State = Nothing
35 |
36 | override def getCurrentUploadState(key: String): Future[UploadStateResult] =
37 | Future.successful(UploadStateResult.empty)
38 |
39 | override def backupToStorageTerminateSink(
40 | previousState: PreviousState
41 | ): Sink[ByteString, Future[Option[StorageObject]]] = {
42 | val base = GCStorage
43 | .resumableUpload(gcsConfig.dataBucket, previousState.previousKey, ContentTypes.`application/json`)
44 | .mapMaterializedValue(future => future.map(result => Some(result))(ExecutionContext.parasitic))
45 |
46 | maybeGoogleSettings
47 | .fold(base)(googleSettings => base.withAttributes(GoogleAttributes.settings(googleSettings)))
48 | }
49 |
50 | override def backupToStorageSink(key: String,
51 | currentState: Option[Nothing]
52 | ): Sink[(ByteString, kafkaClientInterface.CursorContext), Future[BackupResult]] = {
53 | val base = GCStorage
54 | .resumableUpload(gcsConfig.dataBucket, key, ContentTypes.`application/json`)
55 | .mapMaterializedValue(future => future.map(result => Some(result))(ExecutionContext.parasitic))
56 |
57 | maybeGoogleSettings
58 | .fold(base)(googleSettings => base.withAttributes(GoogleAttributes.settings(googleSettings)))
59 | .contramap[(ByteString, kafkaClientInterface.CursorContext)] { case (byteString, _) =>
60 | byteString
61 | }
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/BackupClientChunkState.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup.s3
2 |
3 | import io.aiven.guardian.kafka.backup.KafkaConsumerInterface
4 | import io.aiven.guardian.kafka.backup.configs.Backup
5 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config}
6 | import org.apache.pekko
7 |
8 | import scala.collection.immutable
9 | import scala.concurrent.Future
10 |
11 | import java.util.concurrent.ConcurrentLinkedQueue
12 |
13 | import pekko.Done
14 | import pekko.actor.ActorSystem
15 | import pekko.stream.connectors.s3.S3Headers
16 | import pekko.stream.connectors.s3.S3Settings
17 | import pekko.stream.connectors.s3.SuccessfulUploadPart
18 | import pekko.stream.scaladsl.Flow
19 | import pekko.stream.scaladsl.Sink
20 |
21 | class BackupClientChunkState[T <: KafkaConsumerInterface](maybeS3Settings: Option[S3Settings])(implicit
22 | override val kafkaClientInterface: T,
23 | override val backupConfig: Backup,
24 | override val system: ActorSystem,
25 | s3Config: S3Config,
26 | s3Headers: S3Headers
27 | ) extends BackupClient[T](maybeS3Settings) {
28 | val processedChunks: ConcurrentLinkedQueue[SuccessfulUploadPart] = new ConcurrentLinkedQueue[SuccessfulUploadPart]()
29 |
30 | override def successSink
31 | : Sink[(SuccessfulUploadPart, immutable.Iterable[kafkaClientInterface.CursorContext]), Future[Done]] =
32 | Flow[(SuccessfulUploadPart, immutable.Iterable[kafkaClientInterface.CursorContext])]
33 | .alsoTo(Sink.foreach { case (part, _) =>
34 | processedChunks.add(part)
35 | })
36 | .to(super.successSink)
37 | .mapMaterializedValue(_ => Future.successful(Done))
38 | }
39 |
--------------------------------------------------------------------------------
/backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/BackupClientSpec.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup.s3
2 |
3 | import com.softwaremill.diffx.generic.auto._
4 | import com.softwaremill.diffx.scalatest.DiffMustMatcher._
5 | import io.aiven.guardian.kafka.Generators._
6 | import io.aiven.guardian.kafka.backup.configs.PeriodFromFirst
7 | import io.aiven.guardian.kafka.codecs.Circe._
8 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
9 | import io.aiven.guardian.kafka.s3.Generators._
10 | import io.aiven.guardian.kafka.s3.S3Spec
11 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config}
12 | import org.apache.pekko
13 | import org.mdedetrich.pekko.stream.support.CirceStreamSupport
14 | import org.scalatest.BeforeAndAfterAll
15 | import org.scalatest.TestData
16 | import org.scalatest.matchers.must.Matchers
17 |
18 | import scala.concurrent.ExecutionContext
19 | import scala.concurrent.Future
20 | import scala.concurrent.duration._
21 | import scala.language.postfixOps
22 |
23 | import java.time.OffsetDateTime
24 |
25 | import pekko.stream.connectors.s3.scaladsl.S3
26 | import pekko.stream.scaladsl.Keep
27 | import pekko.stream.scaladsl.Sink
28 | import pekko.stream.scaladsl.Source
29 |
30 | trait BackupClientSpec extends S3Spec with Matchers with BeforeAndAfterAll {
31 |
32 | val ThrottleElements: Int = 100
33 | val ThrottleAmount: FiniteDuration = 1 millis
34 |
35 | property("backup method completes flow correctly for all valid Kafka events") { implicit td: TestData =>
36 | forAll(kafkaDataWithTimePeriodsGen(), s3ConfigGen(useVirtualDotHost, bucketPrefix)) {
37 | (kafkaDataWithTimePeriod: KafkaDataWithTimePeriod, s3Config: S3Config) =>
38 | logger.info(s"Data bucket is ${s3Config.dataBucket}")
39 | val backupClient = new MockedS3BackupClientInterface(
40 | Source(kafkaDataWithTimePeriod.data).throttle(ThrottleElements, ThrottleAmount),
41 | PeriodFromFirst(kafkaDataWithTimePeriod.periodSlice),
42 | s3Config,
43 | Some(s3Settings)
44 | )
45 |
46 | val delay =
47 | (ThrottleAmount * (kafkaDataWithTimePeriod.data.size / ThrottleElements) * 1.2) + (10 millis) match {
48 | case fd: FiniteDuration => fd
49 | case _: Duration.Infinite => throw new Exception("Expected Finite Duration")
50 | }
51 |
52 | val calculatedFuture = for {
53 | _ <- createBucket(s3Config.dataBucket)
54 | _ <- backupClient.backup.run()
55 | _ <- pekko.pattern.after(delay)(Future.successful(()))
56 | bucketContents <-
57 | S3.listBucket(s3Config.dataBucket, None, s3Headers)
58 | .withAttributes(s3Attrs)
59 | .toMat(Sink.collection)(Keep.right)
60 | .run()
61 | keysWithRecords <- Future.sequence(bucketContents.map { bucketContents =>
62 | S3.getObject(s3Config.dataBucket, bucketContents.key)
63 | .withAttributes(s3Attrs)
64 | .via(CirceStreamSupport.decode[List[Option[ReducedConsumerRecord]]])
65 | .toMat(Sink.collection)(Keep.right)
66 | .run()
67 | .map(list => (bucketContents.key, list.flatten))(ExecutionContext.parasitic)
68 | })
69 | sorted = keysWithRecords.toList.sortBy { case (key, _) =>
70 | val date = key.replace(".json", "")
71 | OffsetDateTime.parse(date).toEpochSecond
72 | }(Ordering[Long].reverse)
73 | flattened = sorted.flatMap { case (_, records) => records }
74 | } yield flattened.collect { case Some(reducedConsumerRecord) =>
75 | reducedConsumerRecord
76 | }
77 | val observed = calculatedFuture.futureValue
78 |
79 | kafkaDataWithTimePeriod.data.containsSlice(observed) mustEqual true
80 | if (observed.nonEmpty) {
81 | observed.head mustMatchTo (kafkaDataWithTimePeriod.data.head)
82 | }
83 | }
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/KafkaConsumerWithKillSwitch.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup.s3
2 |
3 | import io.aiven.guardian.kafka.backup.KafkaConsumer
4 | import io.aiven.guardian.kafka.backup.configs.Backup
5 | import io.aiven.guardian.kafka.configs.KafkaCluster
6 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
7 | import org.apache.pekko
8 |
9 | import pekko.actor.ActorSystem
10 | import pekko.kafka.CommitterSettings
11 | import pekko.kafka.ConsumerMessage
12 | import pekko.kafka.ConsumerSettings
13 | import pekko.kafka.scaladsl.Consumer
14 | import pekko.stream.SharedKillSwitch
15 | import pekko.stream.scaladsl.SourceWithContext
16 |
17 | class KafkaConsumerWithKillSwitch(
18 | configureConsumer: Option[
19 | ConsumerSettings[Array[Byte], Array[Byte]] => ConsumerSettings[Array[Byte], Array[Byte]]
20 | ] = None,
21 | configureCommitter: Option[
22 | CommitterSettings => CommitterSettings
23 | ] = None,
24 | killSwitch: SharedKillSwitch
25 | )(implicit system: ActorSystem, kafkaClusterConfig: KafkaCluster, backupConfig: Backup)
26 | extends KafkaConsumer(configureConsumer, configureCommitter) {
27 | override def getSource
28 | : SourceWithContext[ReducedConsumerRecord, ConsumerMessage.CommittableOffset, Consumer.Control] =
29 | super.getSource.via(killSwitch.flow)
30 | }
31 |
--------------------------------------------------------------------------------
/backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/MinioBackupClientSpec.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup.s3
2 |
3 | import io.aiven.guardian.kafka.s3.MinioS3Test
4 | import io.aiven.guardian.pekko.AnyPropTestKit
5 | import org.apache.pekko.actor.ActorSystem
6 |
7 | class MinioBackupClientSpec
8 | extends AnyPropTestKit(ActorSystem("MinioS3BackupClientSpec"))
9 | with BackupClientSpec
10 | with MinioS3Test {
11 |
12 | /** Since Minio doesn't do DNS name verification we can enable this
13 | */
14 | override lazy val useVirtualDotHost: Boolean = true
15 | }
16 |
--------------------------------------------------------------------------------
/backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/MockedKafkaClientBackupConsumerSpec.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup.s3
2 |
3 | import com.softwaremill.diffx.scalatest.DiffMustMatcher._
4 | import io.aiven.guardian.kafka.Generators._
5 | import io.aiven.guardian.kafka.Utils
6 | import io.aiven.guardian.kafka.backup.MockedBackupClientInterface
7 | import io.aiven.guardian.kafka.backup.MockedKafkaConsumerInterface
8 | import io.aiven.guardian.kafka.backup.configs.Backup
9 | import io.aiven.guardian.kafka.backup.configs.PeriodFromFirst
10 | import io.aiven.guardian.kafka.codecs.Circe._
11 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
12 | import io.aiven.guardian.kafka.s3.Generators._
13 | import io.aiven.guardian.kafka.s3.S3Spec
14 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config}
15 | import io.aiven.guardian.pekko.AnyPropTestKit
16 | import org.apache.pekko
17 | import org.mdedetrich.pekko.stream.support.CirceStreamSupport
18 | import org.scalatest.TestData
19 | import org.scalatest.matchers.must.Matchers
20 |
21 | import scala.concurrent.ExecutionContext
22 | import scala.concurrent.Future
23 | import scala.concurrent.duration.FiniteDuration
24 | import scala.concurrent.duration._
25 | import scala.language.postfixOps
26 |
27 | import pekko.actor.ActorSystem
28 | import pekko.stream.connectors.s3.S3Settings
29 | import pekko.stream.connectors.s3.scaladsl.S3
30 | import pekko.stream.scaladsl.Sink
31 | import pekko.stream.scaladsl.Source
32 |
33 | class MockedKafkaClientBackupConsumerSpec
34 | extends AnyPropTestKit(ActorSystem("MockedKafkaClientBackupClientSpec"))
35 | with S3Spec
36 | with Matchers {
37 | override lazy val s3Settings: S3Settings = S3Settings()
38 |
39 | /** Virtual Dot Host in bucket names are disabled because you need an actual DNS certificate otherwise AWS will fail
40 | * on bucket creation
41 | */
42 | override lazy val useVirtualDotHost: Boolean = false
43 | override lazy val bucketPrefix: Option[String] = Some("guardian-")
44 | override lazy val enableCleanup: Option[FiniteDuration] = Some(5 seconds)
45 |
46 | property(
47 | "Creating many objects in a small period of time works despite S3's in progress multipart upload eventual consistency issues"
48 | ) { implicit td: TestData =>
49 | forAll(
50 | kafkaDataWithTimePeriodsGen(20,
51 | 20,
52 | padTimestampsMillis = Range.inclusive(1000, 1000),
53 | trailingSentinelValue = true
54 | ),
55 | s3ConfigGen(useVirtualDotHost, bucketPrefix)
56 | ) { (kafkaDataWithTimePeriod: KafkaDataWithTimePeriod, s3Config: S3Config) =>
57 | logger.info(s"Data bucket is ${s3Config.dataBucket}")
58 | val data = kafkaDataWithTimePeriod.data
59 |
60 | implicit val config: S3Config = s3Config
61 | implicit val backupConfig: Backup =
62 | Backup(MockedBackupClientInterface.KafkaGroupId, PeriodFromFirst(1 second), 10 seconds, None)
63 |
64 | val backupClient =
65 | new BackupClient(Some(s3Settings))(new MockedKafkaConsumerInterface(Source(data)),
66 | implicitly,
67 | implicitly,
68 | implicitly,
69 | implicitly
70 | )
71 |
72 | val calculatedFuture = for {
73 | _ <- createBucket(s3Config.dataBucket)
74 | _ = backupClient.backup.run()
75 | bucketContents <- pekko.pattern.after(10 seconds)(
76 | S3.listBucket(s3Config.dataBucket, None).withAttributes(s3Attrs).runWith(Sink.seq)
77 | )
78 | keysSorted = bucketContents.map(_.key).sortBy(Utils.keyToOffsetDateTime)
79 | downloaded <-
80 | Future
81 | .sequence(keysSorted.map { key =>
82 | S3.getObject(s3Config.dataBucket, key)
83 | .withAttributes(s3Attrs)
84 | .via(CirceStreamSupport.decode[List[Option[ReducedConsumerRecord]]])
85 | .runWith(Sink.seq)
86 | })
87 | .map(_.flatten)(ExecutionContext.parasitic)
88 |
89 | } yield downloaded.flatten.collect { case Some(reducedConsumerRecord) =>
90 | reducedConsumerRecord
91 | }
92 |
93 | val downloaded = calculatedFuture.futureValue
94 |
95 | // Only care about ordering when it comes to key
96 | val downloadedGroupedAsKey = downloaded
97 | .groupBy(_.key)
98 | .view
99 | .mapValues { reducedConsumerRecords =>
100 | reducedConsumerRecords.map(_.value)
101 | }
102 | .toMap
103 |
104 | val inputAsKey = data
105 | .dropRight(1) // Drop the generated sentinel value which we don't care about
106 | .groupBy(_.key)
107 | .view
108 | .mapValues { reducedConsumerRecords =>
109 | reducedConsumerRecords.map(_.value)
110 | }
111 | .toMap
112 |
113 | downloadedGroupedAsKey mustMatchTo inputAsKey
114 | }
115 | }
116 |
117 | }
118 |
--------------------------------------------------------------------------------
/backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/MockedS3BackupClientInterface.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup.s3
2 |
3 | import io.aiven.guardian.kafka.backup.MockedBackupClientInterface
4 | import io.aiven.guardian.kafka.backup.MockedKafkaConsumerInterface
5 | import io.aiven.guardian.kafka.backup.configs.Backup
6 | import io.aiven.guardian.kafka.backup.configs.TimeConfiguration
7 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
8 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config}
9 | import org.apache.pekko
10 |
11 | import scala.concurrent.duration._
12 | import scala.language.postfixOps
13 |
14 | import pekko.NotUsed
15 | import pekko.actor.ActorSystem
16 | import pekko.stream.connectors.s3.S3Headers
17 | import pekko.stream.connectors.s3.S3Settings
18 | import pekko.stream.scaladsl.Source
19 |
20 | class MockedS3BackupClientInterface(
21 | kafkaData: Source[ReducedConsumerRecord, NotUsed],
22 | timeConfiguration: TimeConfiguration,
23 | s3Config: S3Config,
24 | maybeS3Settings: Option[S3Settings]
25 | )(implicit val s3Headers: S3Headers, system: ActorSystem)
26 | extends BackupClient(
27 | maybeS3Settings
28 | )(
29 | new MockedKafkaConsumerInterface(kafkaData),
30 | Backup(MockedBackupClientInterface.KafkaGroupId, timeConfiguration, 10 seconds, None),
31 | implicitly,
32 | s3Config,
33 | implicitly
34 | )
35 |
--------------------------------------------------------------------------------
/backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/RealS3BackupClientSpec.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup.s3
2 |
3 | import io.aiven.guardian.kafka.backup.configs.Compression
4 | import io.aiven.guardian.pekko.AnyPropTestKit
5 | import org.apache.pekko.actor.ActorSystem
6 |
7 | class RealS3BackupClientSpec extends AnyPropTestKit(ActorSystem("RealS3BackupClientSpec")) with RealS3BackupClientTest {
8 | override val compression: Option[Compression] = None
9 | }
10 |
--------------------------------------------------------------------------------
/backup-s3/src/test/scala/io/aiven/guardian/kafka/backup/s3/RealS3GzipCompressionBackupClientSpec.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup.s3
2 |
3 | import io.aiven.guardian.kafka.backup.configs.Compression
4 | import io.aiven.guardian.kafka.models.Gzip
5 | import io.aiven.guardian.pekko.AnyPropTestKit
6 | import org.apache.pekko.actor.ActorSystem
7 |
8 | class RealS3GzipCompressionBackupClientSpec
9 | extends AnyPropTestKit(ActorSystem("RealS3GzipCompressionBackupClientSpec"))
10 | with RealS3BackupClientTest {
11 | override val compression: Option[Compression] = Some(Compression(Gzip, None))
12 | }
13 |
--------------------------------------------------------------------------------
/cli-backup/src/main/scala/io/aiven/guardian/kafka/backup/App.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup
2 |
3 | import com.typesafe.scalalogging.LazyLogging
4 | import io.aiven.guardian.kafka.backup.BackupClientInterface
5 | import io.aiven.guardian.kafka.backup.KafkaConsumer
6 | import io.aiven.guardian.kafka.backup.KafkaConsumerInterface
7 | import org.apache.pekko
8 |
9 | import scala.concurrent.ExecutionContext
10 | import scala.concurrent.Future
11 |
12 | import pekko.Done
13 | import pekko.actor.ActorSystem
14 | import pekko.kafka.scaladsl.Consumer
15 | import pekko.stream.ActorAttributes
16 | import pekko.stream.Supervision
17 |
18 | trait App[T <: KafkaConsumerInterface] extends LazyLogging {
19 | implicit val kafkaClient: T
20 | implicit val backupClient: BackupClientInterface[KafkaConsumer]
21 | implicit val actorSystem: ActorSystem
22 | implicit val executionContext: ExecutionContext
23 |
24 | def run(): Consumer.Control = {
25 | val decider: Supervision.Decider = { e =>
26 | logger.error("Unhandled exception in stream", e)
27 | Supervision.Stop
28 | }
29 |
30 | backupClient.backup.withAttributes(ActorAttributes.supervisionStrategy(decider)).run()
31 | }
32 |
33 | def shutdown(control: Consumer.Control): Future[Done] = {
34 | logger.info("Shutdown of Guardian detected")
35 | val future = control.shutdown()
36 | future.onComplete(_ => logger.info("Guardian shut down"))
37 | future
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/cli-backup/src/main/scala/io/aiven/guardian/kafka/backup/BackupApp.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup
2 |
3 | import io.aiven.guardian.cli.PekkoSettings
4 | import io.aiven.guardian.kafka.backup.KafkaConsumer
5 | import io.aiven.guardian.kafka.backup.{Config => BackupConfig}
6 | import io.aiven.guardian.kafka.{Config => KafkaConfig}
7 |
8 | trait BackupApp extends BackupConfig with KafkaConfig with PekkoSettings {
9 | implicit lazy val kafkaClient: KafkaConsumer = new KafkaConsumer()
10 | }
11 |
--------------------------------------------------------------------------------
/cli-backup/src/main/scala/io/aiven/guardian/kafka/backup/S3App.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup
2 |
3 | import io.aiven.guardian.kafka.backup.KafkaConsumer
4 | import io.aiven.guardian.kafka.backup.s3.BackupClient
5 | import io.aiven.guardian.kafka.s3.{Config => S3Config}
6 | import org.apache.pekko
7 |
8 | import pekko.stream.connectors.s3.S3Settings
9 |
10 | trait S3App extends S3Config with BackupApp with App[KafkaConsumer] {
11 | lazy val s3Settings: S3Settings = S3Settings()
12 | implicit lazy val backupClient: BackupClient[KafkaConsumer] = new BackupClient[KafkaConsumer](Some(s3Settings))
13 | }
14 |
--------------------------------------------------------------------------------
/cli-backup/src/test/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | [%highlight(%-5level)] %d{HH:mm:ss.SSS} %logger{0} - %msg%n
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/cli-backup/src/test/scala/io/aiven/guardian/kafka/backup/CliSpec.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup
2 |
3 | import com.typesafe.scalalogging.StrictLogging
4 | import io.aiven.guardian.kafka.backup.configs.ChronoUnitSlice
5 | import io.aiven.guardian.kafka.backup.configs.Compression
6 | import io.aiven.guardian.kafka.backup.configs.{Backup => BackupConfig}
7 | import io.aiven.guardian.kafka.configs.{KafkaCluster => KafkaClusterConfig}
8 | import io.aiven.guardian.kafka.models.Gzip
9 | import markatta.futiles.CancellableFuture
10 | import org.apache.pekko
11 | import org.scalatest.concurrent.ScalaFutures
12 | import org.scalatest.matchers.must.Matchers
13 | import org.scalatest.propspec.AnyPropSpecLike
14 |
15 | import scala.annotation.nowarn
16 | import scala.concurrent.ExecutionContext
17 | import scala.concurrent.Future
18 | import scala.concurrent.Promise
19 | import scala.concurrent.duration._
20 | import scala.language.postfixOps
21 |
22 | import java.time.temporal.ChronoUnit
23 | import java.util.concurrent.TimeUnit
24 |
25 | import pekko.actor.ActorSystem
26 | import pekko.testkit.TestKit
27 |
28 | @nowarn("msg=method main in class CommandApp is deprecated")
29 | class CliSpec
30 | extends TestKit(ActorSystem("BackupCliSpec"))
31 | with AnyPropSpecLike
32 | with Matchers
33 | with ScalaFutures
34 | with StrictLogging {
35 | implicit val ec: ExecutionContext = system.dispatcher
36 | implicit override val patienceConfig: PatienceConfig = PatienceConfig(5 minutes, 100 millis)
37 |
38 | property("Command line args are properly passed into application") {
39 | val groupId = "my-consumer-group"
40 | val topic = "topic"
41 | val bootstrapServer = "localhost:9092"
42 | val dataBucket = "backup-bucket"
43 |
44 | val args = List(
45 | "--storage",
46 | "s3",
47 | "--kafka-topics",
48 | topic,
49 | "--kafka-bootstrap-servers",
50 | bootstrapServer,
51 | "--s3-data-bucket",
52 | dataBucket,
53 | "--kafka-group-id",
54 | groupId,
55 | "--chrono-unit-slice",
56 | "hours",
57 | "--commit-timeout-buffer-window",
58 | "1 second",
59 | "gzip",
60 | "--compression-level",
61 | "5"
62 | )
63 |
64 | val cancellable = CancellableFuture {
65 | Main.main(args.toArray)
66 | }
67 |
68 | def checkUntilMainInitialized(main: io.aiven.guardian.kafka.backup.Entry): Future[(App[_], Promise[Unit])] =
69 | main.initializedApp.get() match {
70 | case Some((app, promise)) => Future.successful((app, promise))
71 | case None => pekko.pattern.after(100 millis)(checkUntilMainInitialized(main))
72 | }
73 |
74 | val (app, promise) = checkUntilMainInitialized(Main).futureValue
75 |
76 | cancellable.cancel()
77 | promise.success(())
78 |
79 | app match {
80 | case s3App: S3App =>
81 | s3App.backupConfig mustEqual BackupConfig(groupId,
82 | ChronoUnitSlice(ChronoUnit.HOURS),
83 | FiniteDuration(1, TimeUnit.SECONDS),
84 | Some(Compression(Gzip, Some(5)))
85 | )
86 | s3App.kafkaClusterConfig mustEqual KafkaClusterConfig(Set(topic))
87 | s3App.kafkaClient.consumerSettings.getProperty("bootstrap.servers") mustEqual bootstrapServer
88 | s3App.s3Config.dataBucket mustEqual dataBucket
89 | case _ =>
90 | fail("Expected an App to be initialized")
91 | }
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/cli-compaction/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aiven-Open/guardian-for-apache-kafka/9fadf3388140820b161cf28744d1587b91bf0776/cli-compaction/.gitkeep
--------------------------------------------------------------------------------
/cli-restore/src/main/scala/io/aiven/guardian/kafka/restore/App.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.restore
2 |
3 | import com.typesafe.scalalogging.LazyLogging
4 | import io.aiven.guardian.kafka.restore.KafkaProducer
5 | import io.aiven.guardian.kafka.restore.s3.RestoreClient
6 | import org.apache.pekko
7 |
8 | import scala.concurrent.Future
9 |
10 | import pekko.Done
11 | import pekko.actor.ActorSystem
12 | import pekko.stream.ActorAttributes
13 | import pekko.stream.KillSwitch
14 | import pekko.stream.Supervision
15 | import pekko.stream.UniqueKillSwitch
16 |
17 | trait App extends LazyLogging {
18 | implicit val kafkaProducer: KafkaProducer
19 | implicit val restoreClient: RestoreClient[KafkaProducer]
20 | implicit val actorSystem: ActorSystem
21 |
22 | val decider: Supervision.Decider = { e =>
23 | logger.error("Unhandled exception in stream", e)
24 | Supervision.Stop
25 | }
26 |
27 | def run(): (UniqueKillSwitch, Future[Done]) =
28 | restoreClient.restore.withAttributes(ActorAttributes.supervisionStrategy(decider)).run()
29 |
30 | def shutdown(killSwitch: KillSwitch): Unit = {
31 | logger.info("Shutdown of Guardian detected")
32 | killSwitch.shutdown()
33 | logger.info("Guardian shut down")
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/cli-restore/src/main/scala/io/aiven/guardian/kafka/restore/RestoreApp.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.restore
2 |
3 | import io.aiven.guardian.cli.PekkoSettings
4 | import io.aiven.guardian.kafka.restore.KafkaProducer
5 | import io.aiven.guardian.kafka.restore.{Config => RestoreConfig}
6 | import io.aiven.guardian.kafka.{Config => KafkaConfig}
7 |
8 | trait RestoreApp extends RestoreConfig with KafkaConfig with PekkoSettings {
9 | implicit lazy val kafkaProducer: KafkaProducer = new KafkaProducer()
10 | }
11 |
--------------------------------------------------------------------------------
/cli-restore/src/main/scala/io/aiven/guardian/kafka/restore/S3App.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.restore
2 |
3 | import com.typesafe.scalalogging.LazyLogging
4 | import io.aiven.guardian.kafka.restore.s3.RestoreClient
5 | import io.aiven.guardian.kafka.s3.{Config => S3Config}
6 | import org.apache.pekko
7 |
8 | import pekko.stream.ActorAttributes
9 | import pekko.stream.Attributes
10 | import pekko.stream.Supervision
11 | import pekko.stream.connectors.s3.S3Settings
12 |
13 | trait S3App extends S3Config with RestoreApp with App with LazyLogging {
14 | lazy val s3Settings: S3Settings = S3Settings()
15 | implicit lazy val restoreClient: RestoreClient[KafkaProducer] =
16 | new RestoreClient[KafkaProducer](Some(s3Settings)) {
17 | override val maybeAttributes: Some[Attributes] = {
18 | val decider: Supervision.Decider = { e =>
19 | logger.error("Unhandled exception in stream", e)
20 | Supervision.Stop
21 | }
22 |
23 | Some(ActorAttributes.supervisionStrategy(decider))
24 | }
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/cli-restore/src/test/scala/io/aiven/guardian/kafka/restore/CliSpec.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.restore
2 |
3 | import io.aiven.guardian.kafka.configs.{KafkaCluster => KafkaClusterConfig}
4 | import io.aiven.guardian.kafka.restore.configs.{Restore => RestoreConfig}
5 | import org.apache.kafka.clients.producer.ProducerConfig
6 | import org.scalatest.matchers.must.Matchers
7 | import org.scalatest.propspec.AnyPropSpec
8 |
9 | import scala.annotation.nowarn
10 | import scala.jdk.CollectionConverters._
11 |
12 | import java.time.Instant
13 | import java.time.ZoneId
14 |
15 | @nowarn("msg=method main in class CommandApp is deprecated")
16 | class CliSpec extends AnyPropSpec with Matchers {
17 |
18 | property("Command line args are properly passed into application") {
19 | val bootstrapServer = "localhost:9092"
20 | val fromWhen = Instant.ofEpochMilli(0).atZone(ZoneId.of("UTC")).toOffsetDateTime
21 | val topic1 = "topic-1"
22 | val topic2 = "topic-2"
23 | val restoreTopicOne = s"restore-$topic1"
24 | val restoreTopicTwo = s"restore-$topic2"
25 | val overrideTopicOne = s"$topic1:$restoreTopicOne"
26 | val overrideTopicTwo = s"$topic2:$restoreTopicTwo"
27 | val dataBucket = "backup-bucket"
28 |
29 | val args = List(
30 | "--storage",
31 | "s3",
32 | "--kafka-topics",
33 | topic1,
34 | "--kafka-topics",
35 | topic2,
36 | "--kafka-bootstrap-servers",
37 | bootstrapServer,
38 | "--s3-data-bucket",
39 | dataBucket,
40 | "--from-when",
41 | fromWhen.toString,
42 | "--override-topics",
43 | overrideTopicOne,
44 | "--override-topics",
45 | overrideTopicTwo,
46 | "--single-message-per-kafka-request"
47 | )
48 |
49 | try Main.main(args.toArray)
50 | catch {
51 | case _: Throwable =>
52 | }
53 | Main.initializedApp.get() match {
54 | case Some(s3App: S3App) =>
55 | s3App.restoreConfig mustEqual RestoreConfig(Some(fromWhen),
56 | Some(
57 | Map(
58 | topic1 -> restoreTopicOne,
59 | topic2 -> restoreTopicTwo
60 | )
61 | )
62 | )
63 | s3App.kafkaClusterConfig mustEqual KafkaClusterConfig(Set(topic1, topic2))
64 | s3App.kafkaProducer.producerSettings.getProperties.get("bootstrap.servers") mustEqual bootstrapServer
65 | s3App.s3Config.dataBucket mustEqual dataBucket
66 | (Map(
67 | ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG -> true.toString,
68 | ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION -> 1.toString,
69 | ProducerConfig.BATCH_SIZE_CONFIG -> 0.toString
70 | ): Map[String, AnyRef]).toSet
71 | .subsetOf(s3App.kafkaProducer.producerSettings.getProperties.asScala.toMap.toSet) mustEqual true
72 | s3App.kafkaProducer.producerSettings.parallelism mustEqual 1
73 | case _ =>
74 | fail("Expected an App to be initialized")
75 | }
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/compaction-gcs/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
1 | storage-config-gcs {
2 | parallel-object-download-limit = 10
3 | parallel-object-download-limit = ${?STORAGE_CONFIG_GCS_PARALLEL_OBJECT_DOWNLOAD_LIMIT}
4 | }
5 |
--------------------------------------------------------------------------------
/compaction-gcs/src/main/scala/io/aiven/guardian/kafka/compaction/gcs/Config.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.compaction.gcs
2 |
3 | import io.aiven.guardian.kafka.compaction.gcs.models.StorageConfig
4 | import pureconfig.ConfigSource
5 | import pureconfig.generic.auto._
6 |
7 | import scala.annotation.nowarn
8 |
9 | trait Config {
10 | @nowarn("cat=lint-byname-implicit")
11 | implicit lazy val storageConfigGCS: StorageConfig =
12 | ConfigSource.default.at("storage-config-gcs").loadOrThrow[StorageConfig]
13 | }
14 |
15 | object Config extends Config
16 |
--------------------------------------------------------------------------------
/compaction-gcs/src/main/scala/io/aiven/guardian/kafka/compaction/gcs/StorageClient.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.compaction.gcs
2 |
3 | import com.typesafe.scalalogging.LazyLogging
4 | import io.aiven.guardian.kafka.compaction.StorageInterface
5 | import io.aiven.guardian.kafka.compaction.gcs.models.StorageConfig
6 | import io.aiven.guardian.kafka.gcs.errors.GCSErrors
7 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
8 | import org.apache.pekko
9 |
10 | import scala.annotation.nowarn
11 |
12 | import pekko.NotUsed
13 | import pekko.stream.connectors.googlecloud.storage.scaladsl.GCStorage
14 | import pekko.stream.scaladsl.Source
15 |
16 | class StorageClient(bucketName: String, maybePrefix: Option[String])(implicit storageConfig: StorageConfig)
17 | extends StorageInterface
18 | with LazyLogging {
19 |
20 | /** Retrieve Kafka data from a given storage source
21 | *
22 | * @return
23 | */
24 | @throws(classOf[GCSErrors.ExpectedObjectToExist])
25 | override def retrieveKafkaData: Source[ReducedConsumerRecord, NotUsed] = {
26 |
27 | @nowarn("msg=is never used")
28 | // TODO filter the correct buckets to retrieve
29 | val byteStringSource = GCStorage
30 | .listBucket(bucketName, maybePrefix, versions = false)
31 | .flatMapMerge(
32 | storageConfig.parallelObjectDownloadLimit,
33 | storageObject =>
34 | GCStorage
35 | .download(bucketName, storageObject.name)
36 | .map(
37 | _.getOrElse(
38 | throw GCSErrors.ExpectedObjectToExist(bucketName, maybePrefix)
39 | )
40 | )
41 | )
42 |
43 | // TODO serialization from raw bytes to Kafka Topic Format
44 | ???
45 | }
46 |
47 | /** Checks whether the storage exists and is accessible
48 | */
49 | def checkStorageAccessible: Source[Boolean, NotUsed] =
50 | GCStorage.getBucketSource(bucketName).map(_.isDefined).map {
51 | case false =>
52 | logger.error(s"Failed accessing GCS $bucketName")
53 | false
54 | case true =>
55 | logger.info(s"Successfully accessed GCS $bucketName")
56 | true
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/compaction-gcs/src/main/scala/io/aiven/guardian/kafka/compaction/gcs/models/StorageConfig.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.compaction.gcs.models
2 |
3 | final case class StorageConfig(parallelObjectDownloadLimit: Int)
4 |
--------------------------------------------------------------------------------
/compaction-s3/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
1 | storage-config-s3 {
2 | parallel-object-download-limit = 10
3 | parallel-object-download-limit = ${?STORAGE_CONFIG_S3_PARALLEL_OBJECT_DOWNLOAD_LIMIT}
4 | }
5 |
--------------------------------------------------------------------------------
/compaction-s3/src/main/scala/io/aiven/guardian/kafka/compaction/s3/Config.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.compaction.s3
2 |
3 | import io.aiven.guardian.kafka.compaction.s3.models.StorageConfig
4 | import pureconfig.ConfigSource
5 | import pureconfig.generic.auto._
6 |
7 | import scala.annotation.nowarn
8 |
9 | trait Config {
10 | @nowarn("cat=lint-byname-implicit")
11 | implicit lazy val storageConfigS3: StorageConfig =
12 | ConfigSource.default.at("storage-config-s3").loadOrThrow[StorageConfig]
13 | }
14 |
15 | object Config extends Config
16 |
--------------------------------------------------------------------------------
/compaction-s3/src/main/scala/io/aiven/guardian/kafka/compaction/s3/StorageClient.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.compaction.s3
2 |
3 | import com.typesafe.scalalogging.LazyLogging
4 | import io.aiven.guardian.kafka.compaction.StorageInterface
5 | import io.aiven.guardian.kafka.compaction.s3.models.StorageConfig
6 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
7 | import io.aiven.guardian.kafka.s3.errors.S3Errors
8 | import org.apache.pekko
9 |
10 | import scala.annotation.nowarn
11 |
12 | import pekko.NotUsed
13 | import pekko.stream.connectors.s3.BucketAccess
14 | import pekko.stream.connectors.s3.S3Headers
15 | import pekko.stream.connectors.s3.scaladsl.S3
16 | import pekko.stream.scaladsl.Source
17 |
18 | class StorageClient(bucketName: String, prefix: Option[String], s3Headers: S3Headers)(implicit
19 | storageConfig: StorageConfig
20 | ) extends StorageInterface
21 | with LazyLogging {
22 |
23 | /** Retrieve Kafka data from a given storage source
24 | *
25 | * @return
26 | */
27 | @throws(classOf[S3Errors.ExpectedObjectToExist])
28 | override def retrieveKafkaData: Source[ReducedConsumerRecord, NotUsed] = {
29 | // TODO filter the correct buckets to retrieve
30 | @nowarn("msg=is never used")
31 | val byteStringSource = S3
32 | .listBucket(bucketName, prefix, s3Headers)
33 | .flatMapMerge(
34 | storageConfig.parallelObjectDownloadLimit,
35 | bucketDetails => S3.getObject(bucketName, bucketDetails.key, None, None, s3Headers)
36 | )
37 |
38 | // TODO serialization from raw bytes to Kafka Topic Format
39 | ???
40 | }
41 |
42 | /** Checks whether the storage exists and is accessible
43 | */
44 | def checkStorageAccessible: Source[Boolean, NotUsed] =
45 | S3.checkIfBucketExistsSource(bucketName, s3Headers).map {
46 | case e @ (BucketAccess.AccessDenied | BucketAccess.NotExists) =>
47 | logger.error(s"Accessing S3 $bucketName gave ${e.toString}")
48 | false
49 | case BucketAccess.AccessGranted =>
50 | logger.info(s"Successfully accessed S3 $bucketName")
51 | true
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/compaction-s3/src/main/scala/io/aiven/guardian/kafka/compaction/s3/models/StorageConfig.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.compaction.s3.models
2 |
3 | final case class StorageConfig(parallelObjectDownloadLimit: Int)
4 |
--------------------------------------------------------------------------------
/core-backup/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
1 | pekko.kafka.consumer = {
2 | poll-interval = ${?PEKKO_KAFKA_CONSUMER_POLL_INTERVAL}
3 | poll-timeout = ${?PEKKO_KAFKA_CONSUMER_POLL_TIMEOUT}
4 | stop-timeout = ${?PEKKO_KAFKA_CONSUMER_STOP_TIMEOUT}
5 | close-timeout = ${?PEKKO_KAFKA_CONSUMER_CLOSE_TIMEOUT}
6 | commit-time-warning = ${?PEKKO_KAFKA_CONSUMER_COMMIT_TIME_WARNING}
7 | commit-refresh-interval = ${?PEKKO_KAFKA_CONSUMER_COMMIT_REFRESH_INTERVAL}
8 | use-dispatcher = ${?PEKKO_KAFKA_CONSUMER_USE_DISPATCHER}
9 | wait-close-partition = ${?PEKKO_KAFKA_CONSUMER_WAIT_CLOSE_PARTITION}
10 | position-timeout = ${?PEKKO_KAFKA_CONSUMER_POSITION_TIMEOUT}
11 | offset-for-times-timeout = ${?PEKKO_KAFKA_OFFSET_FOR_TIMES_TIMEOUT}
12 | metadata-request-timeout = ${?PEKKO_KAFKA_METADATA_REQUEST_TIMEOUT}
13 | eos-draining-check-interval = ${?PEKKO_KAFKA_CONSUMER_EOS_DRAINING_CHECK_INTERVAL}
14 | connection-checker = {
15 | enable = ${?PEKKO_KAFKA_CONSUMER_CONNECTION_CHECKER_ENABLE}
16 | max-retries = ${?PEKKO_KAFKA_CONSUMER_CONNECTION_CHECKER_MAX_RETRIES}
17 | backoff-factor = ${?PEKKO_KAFKA_CONSUMER_CONNECTION_CHECKER_BACKOFF_FACTOR}
18 | check-interval = ${?PEKKO_KAFKA_CONSUMER_CONNECTION_CHECKER_CHECK_INTERVAL}
19 | }
20 | partition-handler-warning = ${?PEKKO_KAFKA_CONSUMER_PARTITION_HANDLER_WARNING}
21 | offset-reset-protection = {
22 | enable = ${?PEKKO_KAFKA_CONSUMER_OFFSET_RESET_PROTECTION_ENABLE}
23 | offset-threshold = ${?PEKKO_KAFKA_CONSUMER_OFFSET_RESET_PROTECTION_OFFSET_THRESHOLD}
24 | time-threshold = ${?PEKKO_KAFKA_CONSUMER_OFFSET_RESET_PROTECTION_TIME_THRESHOLD}
25 | }
26 | }
27 |
28 | pekko.kafka.committer = {
29 | max-batch = 100000
30 | max-batch = ${?PEKKO_KAFKA_COMMITTER_MAX_BATCH}
31 | max-interval = 1 hour
32 | max-interval = ${?PEKKO_KAFKA_COMMITTER_MAX_INTERVAL}
33 | parallelism = ${?PEKKO_KAFKA_COMMITTER_PARALLELISM}
34 | parallelism = 10000
35 | }
36 |
37 | backup {
38 | kafka-group-id = ${?BACKUP_KAFKA_GROUP_ID}
39 | time-configuration = {
40 | type = chrono-unit-slice
41 | type = ${?BACKUP_TIME_CONFIGURATION_TYPE}
42 | chrono-unit = hours
43 | chrono-unit = ${?BACKUP_TIME_CONFIGURATION_CHRONO_UNIT}
44 | duration = 1 hour
45 | duration = ${?BACKUP_TIME_CONFIGURATION_DURATION}
46 | }
47 | commit-timeout-buffer-window = 10 seconds
48 | commit-timeout-buffer-window = ${?BACKUP_COMMIT_TIMEOUT_BUFFER}
49 | }
50 |
--------------------------------------------------------------------------------
/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/Config.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup
2 |
3 | import io.aiven.guardian.kafka.backup.configs.Backup
4 | import pureconfig.ConfigSource
5 | import pureconfig.generic.auto._
6 |
7 | import scala.annotation.nowarn
8 |
9 | trait Config {
10 |
11 | @nowarn("cat=lint-byname-implicit")
12 | implicit lazy val backupConfig: Backup = ConfigSource.default.at("backup").loadOrThrow[Backup]
13 | }
14 |
15 | object Config extends Config
16 |
--------------------------------------------------------------------------------
/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/KafkaConsumer.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup
2 |
3 | import com.typesafe.scalalogging.LazyLogging
4 | import io.aiven.guardian.kafka.backup.configs.Backup
5 | import io.aiven.guardian.kafka.backup.configs.ChronoUnitSlice
6 | import io.aiven.guardian.kafka.backup.configs.PeriodFromFirst
7 | import io.aiven.guardian.kafka.configs.KafkaCluster
8 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
9 | import org.apache.kafka.clients.consumer.ConsumerConfig
10 | import org.apache.kafka.clients.consumer.ConsumerRecord
11 | import org.apache.kafka.common.serialization.ByteArrayDeserializer
12 | import org.apache.pekko
13 |
14 | import scala.collection.immutable
15 | import scala.concurrent.Future
16 | import scala.jdk.DurationConverters._
17 |
18 | import java.util.Base64
19 |
20 | import pekko.Done
21 | import pekko.actor.ActorSystem
22 | import pekko.kafka.CommitDelivery
23 | import pekko.kafka.CommitterSettings
24 | import pekko.kafka.ConsumerMessage.CommittableOffset
25 | import pekko.kafka.ConsumerMessage.CommittableOffsetBatch
26 | import pekko.kafka.ConsumerSettings
27 | import pekko.kafka.Subscriptions
28 | import pekko.kafka.scaladsl.Committer
29 | import pekko.kafka.scaladsl.Consumer
30 | import pekko.stream.scaladsl.Sink
31 | import pekko.stream.scaladsl.SourceWithContext
32 |
33 | /** A Kafka Client that uses Pekko Connectors Kafka Consumer under the hood to create a stream of events from a Kafka
34 | * cluster. To configure the Pekko Connectors Kafka Consumer use the standard typesafe configuration i.e.
35 | * pekko.kafka.consumer (note that the `keySerializer` and `valueSerializer` are hardcoded so you cannot override
36 | * this).
37 | * @param configureConsumer
38 | * A way to configure the underlying Kafka consumer settings
39 | * @param configureCommitter
40 | * A way to configure the underlying kafka committer settings
41 | * @param system
42 | * A classic `ActorSystem`
43 | * @param kafkaClusterConfig
44 | * Additional cluster configuration that is needed
45 | */
46 | class KafkaConsumer(
47 | configureConsumer: Option[
48 | ConsumerSettings[Array[Byte], Array[Byte]] => ConsumerSettings[Array[Byte], Array[Byte]]
49 | ] = None,
50 | configureCommitter: Option[
51 | CommitterSettings => CommitterSettings
52 | ] = None
53 | )(implicit system: ActorSystem, kafkaClusterConfig: KafkaCluster, backupConfig: Backup)
54 | extends KafkaConsumerInterface
55 | with LazyLogging {
56 | override type CursorContext = CommittableOffset
57 | override type Control = Consumer.Control
58 | override type MatCombineResult = Consumer.DrainingControl[Done]
59 | override type BatchedCursorContext = CommittableOffsetBatch
60 |
61 | import KafkaConsumer._
62 |
63 | if (kafkaClusterConfig.topics.isEmpty)
64 | logger.warn("Kafka Cluster configuration has no topics set")
65 |
66 | private[kafka] val consumerSettings = {
67 | val base = ConsumerSettings(system, new ByteArrayDeserializer, new ByteArrayDeserializer)
68 | configureConsumer
69 | .fold(base)(block => block(base))
70 | .withProperties(
71 | ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "earliest"
72 | )
73 | .withCommitTimeout {
74 | val baseDuration = backupConfig.timeConfiguration match {
75 | case PeriodFromFirst(duration) => duration
76 | case ChronoUnitSlice(chronoUnit) =>
77 | chronoUnit.getDuration.toScala
78 | }
79 |
80 | baseDuration + backupConfig.commitTimeoutBufferWindow
81 | }
82 | .withGroupId(
83 | backupConfig.kafkaGroupId
84 | )
85 | }
86 |
87 | private[kafka] val subscriptions = Subscriptions.topics(kafkaClusterConfig.topics)
88 |
89 | /** @return
90 | * A `SourceWithContext` that returns a Kafka Stream which automatically handles committing of cursors
91 | */
92 | override def getSource: SourceWithContext[ReducedConsumerRecord, CommittableOffset, Consumer.Control] =
93 | Consumer
94 | .sourceWithOffsetContext(consumerSettings, subscriptions)
95 | .map(consumerRecordToReducedConsumerRecord)
96 |
97 | private[kafka] val committerSettings: CommitterSettings = {
98 | val base = CommitterSettings(system)
99 | configureCommitter
100 | .fold(base)(block => block(base))
101 | .withDelivery(CommitDelivery.waitForAck)
102 | }
103 |
104 | /** @return
105 | * A `Sink` that allows you to commit a `CursorContext` to Kafka to signify you have processed a message
106 | */
107 | override def commitCursor: Sink[CommittableOffsetBatch, Future[Done]] = Committer.sink(committerSettings)
108 |
109 | /** @return
110 | * The result of this function gets directly passed into the `combine` parameter of
111 | * [[pekko.stream.scaladsl.Source.toMat]]
112 | */
113 | override def matCombine: (Consumer.Control, Future[Done]) => Consumer.DrainingControl[Done] =
114 | Consumer.DrainingControl[Done].apply
115 |
116 | /** How to batch an immutable iterable of `CursorContext` into a `BatchedCursorContext`
117 | * @param cursors
118 | * The cursors that need to be batched
119 | * @return
120 | * A collection data structure that represents the batched cursors
121 | */
122 | override def batchCursorContext(cursors: immutable.Iterable[CommittableOffset]): CommittableOffsetBatch =
123 | CommittableOffsetBatch(cursors.toSeq)
124 | }
125 |
126 | object KafkaConsumer {
127 | def consumerRecordToReducedConsumerRecord(
128 | consumerRecord: ConsumerRecord[Array[Byte], Array[Byte]]
129 | ): ReducedConsumerRecord =
130 | ReducedConsumerRecord(
131 | consumerRecord.topic(),
132 | consumerRecord.partition(),
133 | consumerRecord.offset(),
134 | Option(consumerRecord.key()).map(byteArray => Base64.getEncoder.encodeToString(byteArray)),
135 | Base64.getEncoder.encodeToString(consumerRecord.value()),
136 | consumerRecord.timestamp(),
137 | consumerRecord.timestampType()
138 | )
139 | }
140 |
--------------------------------------------------------------------------------
/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/KafkaConsumerInterface.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup
2 |
3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
4 | import org.apache.pekko
5 |
6 | import scala.collection.immutable
7 | import scala.concurrent.Future
8 |
9 | import pekko.Done
10 | import pekko.stream.scaladsl.Sink
11 | import pekko.stream.scaladsl.SourceWithContext
12 |
13 | trait KafkaConsumerInterface {
14 |
15 | /** The type of the context to pass around. In context of a Kafka consumer, this typically holds offset data to be
16 | * automatically committed
17 | */
18 | type CursorContext
19 |
20 | /** The type that represents how to control the given stream, i.e. if you want to shut it down or add metrics
21 | */
22 | type Control
23 |
24 | /** The type that represents the result of the `combine` parameter that is supplied to
25 | * [[pekko.stream.scaladsl.Source.toMat]]
26 | */
27 | type MatCombineResult
28 |
29 | /** The type that represents the result of batching a `CursorContext`
30 | */
31 | type BatchedCursorContext
32 |
33 | /** @return
34 | * A `SourceWithContext` that returns a Kafka Stream which automatically handles committing of cursors
35 | */
36 | def getSource: SourceWithContext[ReducedConsumerRecord, CursorContext, Control]
37 |
38 | /** @return
39 | * A `Sink` that allows you to commit a `CursorContext` to Kafka to signify you have processed a message
40 | */
41 | def commitCursor: Sink[BatchedCursorContext, Future[Done]]
42 |
43 | /** @return
44 | * The result of this function gets directly passed into the `combine` parameter of
45 | * [[pekko.stream.scaladsl.Source.toMat]]
46 | */
47 | def matCombine: (Control, Future[Done]) => MatCombineResult
48 |
49 | /** How to batch an immutable iterable of `CursorContext` into a `BatchedCursorContext`
50 | * @param cursors
51 | * The cursors that need to be batched
52 | * @return
53 | * A collection data structure that represents the batched cursors
54 | */
55 | def batchCursorContext(cursors: immutable.Iterable[CursorContext]): BatchedCursorContext
56 | }
57 |
--------------------------------------------------------------------------------
/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/configs/Backup.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup.configs
2 |
3 | import scala.concurrent.duration.FiniteDuration
4 |
5 | /** @param kafkaGroupId
6 | * The group-id that the Kafka consumer will use
7 | * @param timeConfiguration
8 | * Determines how the backed up objects/files are segregated depending on a time configuration
9 | * @param commitTimeoutBufferWindow
10 | * A buffer that is added ontop of the `timeConfiguration` when setting the Kafka Consumer commit timeout.
11 | * @param compression
12 | * Which compression to use for the backed up data
13 | */
14 | final case class Backup(kafkaGroupId: String,
15 | timeConfiguration: TimeConfiguration,
16 | commitTimeoutBufferWindow: FiniteDuration,
17 | compression: Option[Compression]
18 | )
19 |
--------------------------------------------------------------------------------
/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/configs/Compression.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup.configs
2 |
3 | import io.aiven.guardian.kafka.models.CompressionType
4 |
5 | final case class Compression(`type`: CompressionType, level: Option[Int])
6 |
--------------------------------------------------------------------------------
/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/configs/TimeConfiguration.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup.configs
2 |
3 | import scala.concurrent.duration.FiniteDuration
4 |
5 | import java.time.temporal.ChronoUnit
6 |
7 | sealed trait TimeConfiguration
8 |
9 | /** Backs up objects/files depending on the timestamp fo the first received Kafka message. Suspending/resuming the
10 | * backup client will always create a new object/file
11 | * @param duration
12 | * The maximum span of time for each object/file, when this duration is exceeded a new file is created
13 | */
14 | final case class PeriodFromFirst(duration: FiniteDuration) extends TimeConfiguration
15 |
16 | /** Backs up objects/files by collecting received Kafka messages into a single time slice based on a
17 | * [[java.time.temporal.ChronoUnit]]. When suspending/resuming the backup client, this option will reuse existing
18 | * objects/files if they fall into the currently configured `chronoUnit`.
19 | * @param chronoUnit
20 | * Timestamps for kafka messages that are contained within the configured [[java.time.temporal.ChronoUnit]] will be
21 | * placed into the same object/file.
22 | */
23 | final case class ChronoUnitSlice(chronoUnit: ChronoUnit) extends TimeConfiguration
24 |
--------------------------------------------------------------------------------
/core-backup/src/test/scala/io/aiven/guardian/kafka/backup/BackupClientControlWrapper.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup
2 |
3 | import org.apache.pekko
4 |
5 | import scala.concurrent.ExecutionContext
6 | import scala.concurrent.Future
7 |
8 | import pekko.Done
9 | import pekko.kafka.scaladsl.Consumer
10 | import pekko.stream.Materializer
11 |
12 | /** A wrapper that is designed to make it easier to cleanly shutdown resources in tests
13 | */
14 | class BackupClientControlWrapper[T <: KafkaConsumer](backupClient: BackupClientInterface[T])(implicit
15 | materializer: Materializer,
16 | ec: ExecutionContext
17 | ) {
18 |
19 | private var control: Consumer.DrainingControl[Done] = _
20 |
21 | def run(): Unit =
22 | control = backupClient.backup.run()
23 |
24 | @SuppressWarnings(Array("DisableSyntax.null"))
25 | def shutdown(): Future[Done] =
26 | if (control != null)
27 | control.drainAndShutdown()
28 | else
29 | Future.successful(Done)
30 | }
31 |
--------------------------------------------------------------------------------
/core-backup/src/test/scala/io/aiven/guardian/kafka/backup/BackupClientInterfaceSpec.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup
2 |
3 | import io.aiven.guardian.kafka.backup.configs.Compression
4 | import io.aiven.guardian.pekko.AnyPropTestKit
5 | import org.apache.pekko.actor.ActorSystem
6 |
7 | class BackupClientInterfaceSpec
8 | extends AnyPropTestKit(ActorSystem("BackupClientInterfaceSpec"))
9 | with BackupClientInterfaceTest {
10 | override val compression: Option[Compression] = None
11 | }
12 |
--------------------------------------------------------------------------------
/core-backup/src/test/scala/io/aiven/guardian/kafka/backup/CompressionSpec.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup
2 |
3 | import io.aiven.guardian.kafka.backup.configs.{Compression => CompressionModel}
4 | import io.aiven.guardian.kafka.models.Gzip
5 | import io.aiven.guardian.pekko.AnyPropTestKit
6 | import io.aiven.guardian.pekko.PekkoStreamTestKit
7 | import org.apache.pekko
8 | import org.scalatest.concurrent.ScalaFutures
9 | import org.scalatest.matchers.must.Matchers
10 | import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks
11 |
12 | import scala.concurrent.ExecutionContext
13 | import scala.concurrent.duration._
14 | import scala.language.postfixOps
15 |
16 | import pekko.actor.ActorSystem
17 | import pekko.stream.scaladsl.Compression
18 | import pekko.stream.scaladsl.Source
19 | import pekko.stream.scaladsl.SourceWithContext
20 | import pekko.util.ByteString
21 |
22 | class CompressionSpec
23 | extends AnyPropTestKit(ActorSystem("CompressionSpec"))
24 | with Matchers
25 | with ScalaFutures
26 | with ScalaCheckPropertyChecks
27 | with PekkoStreamTestKit {
28 |
29 | implicit val ec: ExecutionContext = system.dispatcher
30 |
31 | // Due to pekko-streams taking a while to initialize for the first time we need a longer
32 | // increase in the timeout
33 | implicit override val patienceConfig: PatienceConfig = PatienceConfig(10 seconds, 15 millis)
34 |
35 | property("GZip compression works with a SourceWithContext/FlowWithContext") { _ =>
36 | forAll { data: List[String] =>
37 | val asByteString = data.map(ByteString.fromString)
38 | val zippedWithIndex = asByteString.zipWithIndex
39 | val sourceWithContext = SourceWithContext.fromTuples(
40 | Source(zippedWithIndex)
41 | )
42 |
43 | val calculatedFuture = for {
44 | compressed <- sourceWithContext
45 | .unsafeDataVia(BackupClientInterface.compressionFlow(CompressionModel(Gzip, None)))
46 | .asSource
47 | .map { case (byteString, _) => byteString }
48 | .runFold(ByteString.empty)(_ ++ _)
49 | decompressed <- Source.single(compressed).via(Compression.gunzip()).runFold(ByteString.empty)(_ ++ _)
50 | } yield decompressed
51 |
52 | val decompressed = calculatedFuture.futureValue
53 | data.mkString mustEqual decompressed.utf8String
54 | }
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/core-backup/src/test/scala/io/aiven/guardian/kafka/backup/ConfigSpec.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup
2 |
3 | import com.typesafe.config.ConfigFactory
4 | import com.typesafe.config.ConfigValueFactory
5 | import io.aiven.guardian.kafka.backup.configs.Backup
6 | import io.aiven.guardian.kafka.backup.configs.ChronoUnitSlice
7 | import io.aiven.guardian.kafka.backup.configs.PeriodFromFirst
8 | import io.aiven.guardian.kafka.backup.configs.TimeConfiguration
9 | import org.scalacheck.Arbitrary
10 | import org.scalacheck.Gen
11 | import org.scalatest.matchers.must.Matchers
12 | import org.scalatest.propspec.AnyPropSpec
13 | import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks
14 | import pureconfig.ConfigSource
15 | import pureconfig.generic.auto._
16 |
17 | import scala.annotation.nowarn
18 | import scala.concurrent.duration.FiniteDuration
19 |
20 | import java.time.temporal.ChronoUnit
21 |
22 | class ConfigSpec extends AnyPropSpec with Matchers with ScalaCheckPropertyChecks {
23 | implicit val chronoUnitArb: Arbitrary[ChronoUnit] = Arbitrary(
24 | Gen.oneOf(ChronoUnit.values().toList)
25 | )
26 |
27 | property("Valid TimeConfiguration chrono-unit-slice configs should parse correctly") {
28 | forAll { (chronoUnit: ChronoUnit) =>
29 | val conf =
30 | s"""
31 | |time-configuration = {
32 | | type = chrono-unit-slice
33 | | chrono-unit = ${chronoUnit.name.toLowerCase}
34 | |}
35 | |""".stripMargin
36 |
37 | @nowarn("cat=lint-byname-implicit")
38 | val backup = ConfigSource.string(conf).at("time-configuration").loadOrThrow[TimeConfiguration]
39 | backup mustEqual ChronoUnitSlice(chronoUnit)
40 | }
41 | }
42 |
43 | property("Valid TimeConfiguration period-from-first configs should parse correctly") {
44 | forAll { (finiteDuration: FiniteDuration) =>
45 | val conf =
46 | s"""
47 | |time-configuration = {
48 | | type = period-from-first
49 | | duration = ${finiteDuration.toString()}
50 | |}
51 | |""".stripMargin
52 |
53 | @nowarn("cat=lint-byname-implicit")
54 | val backup = ConfigSource.string(conf).at("time-configuration").loadOrThrow[TimeConfiguration]
55 | backup mustEqual PeriodFromFirst(finiteDuration)
56 | }
57 | }
58 |
59 | property("Default Backup configuration loads") {
60 | val config = ConfigFactory.load()
61 |
62 | // Inject mandatory values that have no default into the configuration
63 | val configWithMandatoryValues =
64 | config.withValue("backup.kafka-group-id", ConfigValueFactory.fromAnyRef(MockedBackupClientInterface.KafkaGroupId))
65 |
66 | @nowarn("cat=lint-byname-implicit")
67 | def readConfiguration = ConfigSource.fromConfig(configWithMandatoryValues).at("backup").loadOrThrow[Backup]
68 |
69 | noException should be thrownBy readConfiguration
70 | }
71 |
72 | }
73 |
--------------------------------------------------------------------------------
/core-backup/src/test/scala/io/aiven/guardian/kafka/backup/GzipCompressionBackupClientInterfaceSpec.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup
2 |
3 | import io.aiven.guardian.kafka.backup.configs.Compression
4 | import io.aiven.guardian.kafka.models.Gzip
5 | import io.aiven.guardian.pekko.AnyPropTestKit
6 | import org.apache.pekko.actor.ActorSystem
7 |
8 | class GzipCompressionBackupClientInterfaceSpec
9 | extends AnyPropTestKit(ActorSystem("GzipCompressionBackupClientInterfaceSpec"))
10 | with BackupClientInterfaceTest {
11 | override val compression: Option[Compression] = Some(Compression(Gzip, None))
12 | }
13 |
--------------------------------------------------------------------------------
/core-backup/src/test/scala/io/aiven/guardian/kafka/backup/MockedKafkaConsumerInterface.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.backup
2 |
3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
4 | import org.apache.pekko
5 |
6 | import scala.collection.immutable
7 | import scala.concurrent.Future
8 | import scala.concurrent.duration.FiniteDuration
9 |
10 | import java.time.Instant
11 | import java.time.temporal.ChronoUnit
12 | import java.util.concurrent.ConcurrentLinkedDeque
13 | import java.util.concurrent.atomic.AtomicReference
14 |
15 | import pekko.Done
16 | import pekko.NotUsed
17 | import pekko.stream.scaladsl._
18 |
19 | /** A mocked `KafkaClientInterface` that returns a specific data as its source
20 | *
21 | * @param kafkaData
22 | * The data which the mock will output
23 | * @param commitStorage
24 | * A collection that keeps track of whenever a cursor is committed
25 | * @param stopAfterDuration
26 | * Dont produce any data from `kafkaData` if its offset is after `stopAfterOffset` based off of the first committed
27 | * [[io.aiven.guardian.kafka.models.ReducedConsumerRecord.timestamp]]. Handy to simulate the premature closing of a
28 | * KafkaClient before its completed producing all source elements (i.e. suspend/resume and restart scenarios).
29 | * @param handleOffsets
30 | * Tells the MockedKafkaClientInterface to handleOffsets rather than just ignoring them. This means that the mock
31 | * will only add commits to the `commitStorage` if its later than any currently processed offsets. Furthermore it
32 | * will not replay source data if it has already been committed.
33 | */
34 | class MockedKafkaConsumerInterface(kafkaData: Source[ReducedConsumerRecord, NotUsed],
35 | commitStorage: ConcurrentLinkedDeque[Long] = new ConcurrentLinkedDeque[Long](),
36 | stopAfterDuration: Option[FiniteDuration] = None,
37 | handleOffsets: Boolean = false
38 | ) extends KafkaConsumerInterface {
39 |
40 | /** The type of the context to pass around. In context of a Kafka consumer, this typically holds offset data to be
41 | * automatically committed
42 | */
43 | override type CursorContext = Long
44 |
45 | /** The type that represents how to control the given stream, i.e. if you want to shut it down or add metrics
46 | */
47 | override type Control = Future[NotUsed]
48 |
49 | /** The type that represents the result of the `combine` parameter that is supplied to
50 | * [[pekko.stream.scaladsl.Source.toMat]]
51 | */
52 | override type MatCombineResult = Future[NotUsed]
53 |
54 | /** The type that represents the result of batching a `CursorContext`
55 | */
56 | override type BatchedCursorContext = Long
57 |
58 | private val firstReducedConsumerRecord: AtomicReference[ReducedConsumerRecord] =
59 | new AtomicReference[ReducedConsumerRecord]()
60 |
61 | /** @return
62 | * A `SourceWithContext` that returns a Kafka Stream which automatically handles committing of cursors
63 | */
64 | override def getSource: SourceWithContext[ReducedConsumerRecord, Long, Future[NotUsed]] = {
65 | val source = kafkaData
66 | .prefixAndTail(1)
67 | .flatMapConcat {
68 | case (Seq(head), rest) =>
69 | firstReducedConsumerRecord.set(head)
70 | Source.combine(
71 | Source.single(head),
72 | rest
73 | )(Concat(_))
74 | case _ => Source.empty[ReducedConsumerRecord]
75 | }
76 |
77 | val finalSource = if (handleOffsets) {
78 | source.filter { reducedConsumerRecord =>
79 | (commitStorage.isEmpty || {
80 | reducedConsumerRecord.offset > commitStorage.getLast
81 | }) && {
82 | (stopAfterDuration, Option(firstReducedConsumerRecord.get())) match {
83 | case (Some(afterDuration), Some(firstRecord)) =>
84 | val difference =
85 | ChronoUnit.MILLIS.between(Instant.ofEpochMilli(firstRecord.timestamp),
86 | Instant.ofEpochMilli(reducedConsumerRecord.timestamp)
87 | )
88 | afterDuration.toMillis > difference
89 | case _ => true
90 | }
91 | }
92 | }
93 | } else
94 | source
95 |
96 | SourceWithContext
97 | .fromTuples(finalSource.map { reducedConsumerRecord =>
98 | (reducedConsumerRecord, reducedConsumerRecord.offset)
99 | })
100 | .mapMaterializedValue(Future.successful)
101 | }
102 |
103 | /** @return
104 | * The result of this function gets directly passed into the `combine` parameter of
105 | * [[pekko.stream.scaladsl.Source.toMat]]
106 | */
107 | override def matCombine: (Future[NotUsed], Future[Done]) => Future[NotUsed] = Keep.left
108 |
109 | /** @return
110 | * A `Sink` that allows you to commit a `CursorContext` to Kafka to signify you have processed a message
111 | */
112 | override def commitCursor: Sink[Long, Future[Done]] = Sink.foreach { cursor =>
113 | if (handleOffsets && !commitStorage.isEmpty) {
114 | if (commitStorage.getLast < cursor)
115 | commitStorage.add(cursor)
116 | } else
117 | commitStorage.add(cursor)
118 | }
119 |
120 | /** How to batch an immutable iterable of `CursorContext` into a `BatchedCursorContext`
121 | * @param cursors
122 | * The cursors that need to be batched
123 | * @return
124 | * A collection data structure that represents the batched cursors
125 | */
126 | override def batchCursorContext(cursors: immutable.Iterable[Long]): Long = cursors.max
127 |
128 | }
129 |
--------------------------------------------------------------------------------
/core-cli/src/main/resources/application.conf:
--------------------------------------------------------------------------------
1 | pekko {
2 | loggers = ["org.apache.pekko.event.slf4j.Slf4jLogger"]
3 | loglevel = "INFO"
4 | logging-filter = "org.apache.pekko.event.slf4j.Slf4jLoggingFilter"
5 | }
6 |
--------------------------------------------------------------------------------
/core-cli/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | [%highlight(%-5level)] %d{HH:mm:ss.SSS} %logger{0} - %msg%n
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/core-cli/src/main/scala/io/aiven/guardian/cli/MainUtils.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.cli
2 |
3 | import ch.qos.logback.classic.joran.JoranConfigurator
4 | import ch.qos.logback.core.Context
5 | import org.slf4j.ILoggerFactory
6 |
7 | import scala.concurrent.ExecutionContext
8 | import scala.concurrent.Future
9 | import scala.concurrent.Promise
10 | import scala.concurrent.blocking
11 | import scala.io.StdIn
12 | import scala.util.Failure
13 | import scala.util.Success
14 | import scala.util.Using
15 |
16 | import java.nio.file.Files
17 | import java.nio.file.Path
18 |
19 | object MainUtils {
20 |
21 | /** Hook that lets the user specify the future that will signal the shutdown of the server whenever completed. Adapted
22 | * from
23 | * https://github.com/apache/incubator-pekko-http/blob/94d1b1c153cc39216dae4217fd0e927f04d53cd2/http/src/main/scala/org/apache/pekko/http/scaladsl/server/HttpApp.scala#L164-L176
24 | */
25 | @SuppressWarnings(
26 | Array(
27 | "scalafix:DisableSyntax.null"
28 | )
29 | )
30 | def waitForShutdownSignal(promise: Promise[Unit] = Promise[Unit]())(implicit ec: ExecutionContext): Future[Unit] = {
31 | sys.addShutdownHook {
32 | promise.trySuccess(())
33 | }
34 | Future {
35 | blocking {
36 | if (StdIn.readLine("Press RETURN to stop...\n") != null)
37 | promise.trySuccess(())
38 | }
39 | }
40 | promise.future
41 | }
42 |
43 | /** Allows you to override the default logback.xml file with a custom one
44 | * @see
45 | * https://stackoverflow.com/a/21886322/1519631
46 | */
47 | def setLogbackFile(path: Path, loggerContext: ILoggerFactory): Unit =
48 | Using(Files.newInputStream(path)) { inputStream =>
49 | val configurator = new JoranConfigurator
50 | configurator.setContext(loggerContext.asInstanceOf[Context])
51 | configurator.doConfigure(inputStream)
52 | } match {
53 | case Failure(exception) => throw exception
54 | case Success(value) => value
55 | }
56 |
57 | }
58 |
--------------------------------------------------------------------------------
/core-cli/src/main/scala/io/aiven/guardian/cli/PekkoSettings.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.cli
2 |
3 | import org.apache.pekko.actor.ActorSystem
4 |
5 | import scala.concurrent.ExecutionContext
6 |
7 | trait PekkoSettings {
8 | implicit val actorSystem: ActorSystem = ActorSystem()
9 | implicit val executionContext: ExecutionContext = ExecutionContext.global
10 | }
11 |
--------------------------------------------------------------------------------
/core-cli/src/main/scala/io/aiven/guardian/cli/arguments/PropertiesOpt.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.cli.arguments
2 |
3 | import cats.data.ValidatedNel
4 | import cats.implicits._
5 | import com.monovore.decline.Argument
6 |
7 | import scala.util.Failure
8 | import scala.util.Success
9 | import scala.util.Using
10 |
11 | import java.io.BufferedReader
12 | import java.io.FileNotFoundException
13 | import java.io.FileReader
14 | import java.util.Properties
15 |
16 | object PropertiesOpt {
17 | implicit val propertiesArgument: Argument[Properties] = new Argument[Properties] {
18 | override def read(string: String): ValidatedNel[String, Properties] = {
19 | val prop = new Properties()
20 | Using(new BufferedReader(new FileReader(string))) { reader =>
21 | prop.load(reader)
22 | } match {
23 | case Failure(_: FileNotFoundException) =>
24 | s"Properties file at path $string does not exist".invalidNel
25 | case Failure(_) =>
26 | s"Unable to read file at path $string".invalidNel
27 | case Success(_) => prop.validNel
28 | }
29 | }
30 |
31 | override def defaultMetavar: String = "path"
32 | }
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/core-cli/src/main/scala/io/aiven/guardian/cli/arguments/StorageOpt.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.cli.arguments
2 |
3 | import cats.data.ValidatedNel
4 | import cats.implicits._
5 | import com.monovore.decline.Argument
6 | import enumeratum._
7 |
8 | sealed trait StorageOpt extends EnumEntry with EnumEntry.Lowercase
9 |
10 | object StorageOpt extends Enum[StorageOpt] {
11 | case object S3 extends StorageOpt
12 |
13 | val values: IndexedSeq[StorageOpt] = findValues
14 |
15 | implicit val storageArgument: Argument[StorageOpt] = new Argument[StorageOpt] {
16 | override def read(string: String): ValidatedNel[String, StorageOpt] =
17 | StorageOpt.withNameOption(string).toValidNel("Invalid Storage Argument")
18 |
19 | override def defaultMetavar: String = "storage"
20 | }
21 |
22 | }
23 |
--------------------------------------------------------------------------------
/core-cli/src/main/scala/io/aiven/guardian/cli/options/Options.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.cli.options
2 |
3 | import cats.data.NonEmptyList
4 | import cats.implicits._
5 | import com.monovore.decline.Opts
6 | import com.typesafe.config.ConfigException.Missing
7 | import com.typesafe.config.ConfigFactory
8 | import io.aiven.guardian.cli.arguments.StorageOpt
9 | import io.aiven.guardian.kafka.configs.KafkaCluster
10 | import pureconfig.error.ConfigReaderException
11 |
12 | import java.nio.file.Path
13 |
14 | trait Options {
15 | val storageOpt: Opts[StorageOpt] =
16 | Opts.option[StorageOpt]("storage", help = "Which type of storage to persist kafka topics")
17 |
18 | val dataBucketOpt: Opts[Option[String]] =
19 | Opts.option[String]("s3-data-bucket", help = "S3 Bucket for storage of main backup data").orNone
20 |
21 | val topicsOpt: Opts[Option[NonEmptyList[String]]] =
22 | Opts.options[String]("kafka-topics", help = "Kafka topics to operate on").orNone
23 |
24 | val bootstrapServersOpt: Opts[Option[NonEmptyList[String]]] =
25 | Opts.options[String]("kafka-bootstrap-servers", help = "Kafka bootstrap servers").orNone
26 |
27 | val logbackFileOpt: Opts[Option[Path]] =
28 | Opts.option[Path]("logback-file", help = "Specify logback.xml configuration to override default").orNone
29 |
30 | def optionalPureConfigValue[T](value: () => T): Option[T] =
31 | try Some(value())
32 | catch {
33 | case _: ConfigReaderException[_] =>
34 | None
35 | }
36 |
37 | @SuppressWarnings(
38 | Array(
39 | "scalafix:DisableSyntax.null"
40 | )
41 | )
42 | def checkConfigKeyIsDefined(path: String): Boolean =
43 | try ConfigFactory.load().getAnyRef(path) != null
44 | catch {
45 | case _: Missing => false
46 | }
47 |
48 | val kafkaClusterOpt: Opts[KafkaCluster] = topicsOpt.mapValidated { topics =>
49 | import io.aiven.guardian.kafka.{Config => KafkaConfig}
50 | topics match {
51 | case Some(value) =>
52 | KafkaCluster(value.toList.toSet).validNel
53 | case None if KafkaConfig.kafkaClusterConfig.topics.nonEmpty => KafkaConfig.kafkaClusterConfig.validNel
54 | case _ =>
55 | "kafka-topics is a mandatory value that needs to be configured".invalidNel
56 | }
57 | }
58 |
59 | }
60 |
61 | object Options extends Options
62 |
--------------------------------------------------------------------------------
/core-compaction/src/main/scala/io/aiven/guardian/kafka/compaction/DatabaseInterface.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.compaction
2 |
3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
4 | import org.apache.pekko
5 |
6 | import scala.concurrent.Future
7 |
8 | import pekko.NotUsed
9 | import pekko.stream.javadsl.Flow
10 | import pekko.stream.scaladsl.Source
11 | import pekko.util.ByteString
12 |
13 | trait DatabaseInterface {
14 |
15 | /** Given a source of storage where Kafka messages are contained, stream it into a database.
16 | * @param kafkaStorageSource
17 | * @param encodeKafkaRowToByteString
18 | * @return
19 | * Number of rows updated
20 | */
21 | def streamInsert(kafkaStorageSource: Source[ReducedConsumerRecord, NotUsed],
22 | encodeKafkaRowToByteString: Flow[ReducedConsumerRecord, ByteString, NotUsed]
23 | ): Future[Long]
24 | }
25 |
--------------------------------------------------------------------------------
/core-compaction/src/main/scala/io/aiven/guardian/kafka/compaction/PostgresJDBCDatabase.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.compaction
2 |
3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
4 | import org.apache.pekko
5 | import org.postgresql.copy.CopyManager
6 | import org.postgresql.core.BaseConnection
7 |
8 | import scala.concurrent.ExecutionContext
9 | import scala.concurrent.Future
10 | import scala.concurrent.blocking
11 |
12 | import java.sql.Connection
13 |
14 | import pekko.NotUsed
15 | import pekko.stream.ActorAttributes
16 | import pekko.stream.Materializer
17 | import pekko.stream.javadsl.Flow
18 | import pekko.stream.scaladsl.Source
19 | import pekko.stream.scaladsl.StreamConverters
20 | import pekko.util.ByteString
21 |
22 | /** A Postgres Database backed by JDBC which uses the Postgres COPY command to insert data into the database. Note that
23 | * since this uses JDBC and CopyManager, its implementation is blocking under the hood.
24 | * @param scheduler
25 | * @param materializer
26 | * @param conn
27 | */
28 | class PostgresJDBCDatabase()(implicit executionContext: ExecutionContext, materializer: Materializer, conn: Connection)
29 | extends DatabaseInterface {
30 |
31 | /** Inserts data into a Postgres Database using the COPY method (see
32 | * https://www.postgresql.org/docs/9.4/sql-copy.html). This means the data insertion is buffered and also extremely
33 | * fast since it bypasses internal parts of the Postgres engine which are not necessary.
34 | *
35 | * Since it uses JDBC plus `java.io.InputStream` under the hood, the operation is inherently blocking even though it
36 | * returns a `scala.concurrent.Future`. Due to this we have used blocking IO dispatchers to avoid problems that are
37 | * typical of blocking IO
38 | *
39 | * @return
40 | * Number of rows updated
41 | */
42 | override def streamInsert(kafkaStorageSource: Source[ReducedConsumerRecord, NotUsed],
43 | encodeKafkaRowToByteString: Flow[ReducedConsumerRecord, ByteString, NotUsed]
44 | ): Future[Long] = {
45 | // TODO implement SQL query
46 | val sql = """"""
47 |
48 | // Since this is blocking IO we use a custom dispatcher dealt to handle with this
49 | val sink = StreamConverters
50 | .asInputStream()
51 | .withAttributes(ActorAttributes.dispatcher(ActorAttributes.IODispatcher.dispatcher))
52 |
53 | val postgresSource = kafkaStorageSource.via(encodeKafkaRowToByteString)
54 |
55 | blocking(Future {
56 | postgresSource.runWith(
57 | sink.mapMaterializedValue(inputStream =>
58 | new CopyManager(conn.asInstanceOf[BaseConnection]).copyIn(
59 | sql,
60 | inputStream
61 | )
62 | )
63 | )
64 | })
65 | }
66 |
67 | }
68 |
--------------------------------------------------------------------------------
/core-compaction/src/main/scala/io/aiven/guardian/kafka/compaction/StorageInterface.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.compaction
2 |
3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
4 | import org.apache.pekko
5 |
6 | import pekko.NotUsed
7 | import pekko.stream.scaladsl.Source
8 |
9 | trait StorageInterface {
10 |
11 | /** Retrieve Kafka data from a given storage source
12 | * @return
13 | */
14 | def retrieveKafkaData: Source[ReducedConsumerRecord, NotUsed]
15 |
16 | /** Checks whether the storage exists and is accessible
17 | */
18 | def checkStorageAccessible: Source[Boolean, NotUsed]
19 | }
20 |
--------------------------------------------------------------------------------
/core-gcs/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
1 | gcs-config = {
2 | data-bucket = ${?GCS_CONFIG_DATA_BUCKET}
3 | compaction-bucket = ${?GCS_CONFIG_COMPACTION_BUCKET}
4 | }
5 |
--------------------------------------------------------------------------------
/core-gcs/src/main/scala/io/aiven/guardian/kafka/gcs/Config.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.gcs
2 |
3 | import io.aiven.guardian.kafka.gcs.configs.GCS
4 | import pureconfig._
5 | import pureconfig.generic.auto._
6 |
7 | import scala.annotation.nowarn
8 |
9 | trait Config {
10 | @nowarn("cat=lint-byname-implicit")
11 | implicit lazy val gcsConfig: GCS =
12 | ConfigSource.default.at("gcs-config").loadOrThrow[GCS]
13 | }
14 |
15 | object Config extends Config
16 |
--------------------------------------------------------------------------------
/core-gcs/src/main/scala/io/aiven/guardian/kafka/gcs/configs/GCS.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.gcs.configs
2 |
3 | /** GCS specific configuration used when storing Kafka ConsumerRecords to a GCS bucket
4 | * @param dataBucket
5 | * The bucket where a Kafka Consumer directly streams data into as storage
6 | * @param compactionBucket
7 | * The bucket where compaction results are stored
8 | */
9 | final case class GCS(dataBucket: String, compactionBucket: String)
10 |
--------------------------------------------------------------------------------
/core-gcs/src/main/scala/io/aiven/guardian/kafka/gcs/errors/GCSErrors.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.gcs.errors
2 |
3 | import io.aiven.guardian.kafka.Errors
4 |
5 | sealed abstract class GCSErrors extends Errors
6 |
7 | object GCSErrors {
8 | final case class ExpectedObjectToExist(bucketName: String, maybePrefix: Option[String]) extends GCSErrors {
9 | override def getMessage: String =
10 | ???
11 | }
12 |
13 | }
14 |
--------------------------------------------------------------------------------
/core-restore/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
1 | pekko.kafka.producer {
2 | discovery-method = ${?PEKKO_KAFKA_PRODUCER_DISCOVERY_METHOD}
3 | service-name = ${?PEKKO_KAFKA_PRODUCER_SERVICE_NAME}
4 | resolve-timeout = ${?PEKKO_KAFKA_PRODUCER_RESOLVE_TIMEOUT}
5 | parallelism = ${?PEKKO_KAFKA_PRODUCER_PARALLELISM}
6 | close-timeout = ${?PEKKO_KAFKA_PRODUCER_CLOSE_TIMEOUT}
7 | close-on-producer-stop = ${?PEKKO_KAFKA_PRODUCER_CLOSE_ON_PRODUCER_STOP}
8 | use-dispatcher = ${?PEKKO_KAFKA_PRODUCER_USE_DISPATCHER}
9 | eos-commit-interval = ${?PEKKO_KAFKA_PRODUCER_EOS_COMMIT_INTERVAL}
10 | }
11 |
12 | restore {
13 | from-when = ${?RESTORE_FROM_WHEN}
14 | override-topics = ${?RESTORE_OVERRIDE_TOPICS}
15 | }
16 |
--------------------------------------------------------------------------------
/core-restore/src/main/scala/io/aiven/guardian/kafka/restore/Config.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.restore
2 |
3 | import io.aiven.guardian.kafka.restore.configs.Restore
4 | import pureconfig._
5 | import pureconfig.configurable._
6 | import pureconfig.generic.auto._
7 |
8 | import scala.annotation.nowarn
9 |
10 | import java.time.OffsetDateTime
11 | import java.time.format.DateTimeFormatter
12 |
13 | trait Config {
14 | implicit val localDateConvert: ConfigConvert[OffsetDateTime] = offsetDateTimeConfigConvert(
15 | DateTimeFormatter.ISO_OFFSET_DATE_TIME
16 | )
17 |
18 | @nowarn("cat=lint-byname-implicit")
19 | implicit lazy val restoreConfig: Restore = ConfigSource.default.at("restore").loadOrThrow[Restore]
20 | }
21 |
22 | object Config extends Config
23 |
--------------------------------------------------------------------------------
/core-restore/src/main/scala/io/aiven/guardian/kafka/restore/KafkaProducer.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.restore
2 |
3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
4 | import io.aiven.guardian.kafka.restore.configs.Restore
5 | import org.apache.kafka.clients.producer.ProducerRecord
6 | import org.apache.kafka.common.serialization.ByteArraySerializer
7 | import org.apache.pekko
8 |
9 | import scala.concurrent.Future
10 |
11 | import java.util.Base64
12 |
13 | import pekko.Done
14 | import pekko.actor.ActorSystem
15 | import pekko.kafka.ProducerSettings
16 | import pekko.kafka.scaladsl.Producer
17 | import pekko.stream.scaladsl.Sink
18 |
19 | class KafkaProducer(
20 | configureProducer: Option[
21 | ProducerSettings[Array[Byte], Array[Byte]] => ProducerSettings[Array[Byte], Array[Byte]]
22 | ] = None
23 | )(implicit system: ActorSystem, restoreConfig: Restore)
24 | extends KafkaProducerInterface {
25 |
26 | private[kafka] val producerSettings = {
27 | val base = ProducerSettings(system, new ByteArraySerializer, new ByteArraySerializer)
28 | configureProducer
29 | .fold(base)(block => block(base))
30 | }
31 |
32 | override def getSink: Sink[ReducedConsumerRecord, Future[Done]] =
33 | Producer.plainSink(producerSettings).contramap[ReducedConsumerRecord] { reducedConsumerRecord =>
34 | val topic = restoreConfig.overrideTopics match {
35 | case Some(map) =>
36 | map.getOrElse(reducedConsumerRecord.topic, reducedConsumerRecord.topic)
37 | case None => reducedConsumerRecord.topic
38 | }
39 | val valueAsByteArray = Base64.getDecoder.decode(reducedConsumerRecord.value)
40 | reducedConsumerRecord.key match {
41 | case Some(key) =>
42 | new ProducerRecord[Array[Byte], Array[Byte]](
43 | topic,
44 | Base64.getDecoder.decode(key),
45 | valueAsByteArray
46 | )
47 | case None =>
48 | new ProducerRecord[Array[Byte], Array[Byte]](
49 | topic,
50 | valueAsByteArray
51 | )
52 | }
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/core-restore/src/main/scala/io/aiven/guardian/kafka/restore/KafkaProducerInterface.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.restore
2 |
3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
4 | import org.apache.pekko
5 |
6 | import scala.concurrent.Future
7 |
8 | import pekko.Done
9 | import pekko.stream.scaladsl.Sink
10 |
11 | trait KafkaProducerInterface {
12 | def getSink: Sink[ReducedConsumerRecord, Future[Done]]
13 | }
14 |
--------------------------------------------------------------------------------
/core-restore/src/main/scala/io/aiven/guardian/kafka/restore/RestoreClientInterface.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.restore
2 |
3 | import com.typesafe.scalalogging.LazyLogging
4 | import io.aiven.guardian.kafka.ExtensionsMethods._
5 | import io.aiven.guardian.kafka.Utils
6 | import io.aiven.guardian.kafka.codecs.Circe._
7 | import io.aiven.guardian.kafka.configs.KafkaCluster
8 | import io.aiven.guardian.kafka.models.BackupObjectMetadata
9 | import io.aiven.guardian.kafka.models.Gzip
10 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
11 | import io.aiven.guardian.kafka.restore.configs.Restore
12 | import org.apache.pekko
13 | import org.mdedetrich.pekko.stream.support.CirceStreamSupport
14 | import org.typelevel.jawn.AsyncParser
15 |
16 | import scala.concurrent.ExecutionContext
17 | import scala.concurrent.Future
18 |
19 | import java.time.OffsetDateTime
20 |
21 | import pekko.Done
22 | import pekko.NotUsed
23 | import pekko.actor.ActorSystem
24 | import pekko.stream.Attributes
25 | import pekko.stream.KillSwitches
26 | import pekko.stream.UniqueKillSwitch
27 | import pekko.stream.scaladsl.Compression
28 | import pekko.stream.scaladsl.Concat
29 | import pekko.stream.scaladsl.Flow
30 | import pekko.stream.scaladsl.Keep
31 | import pekko.stream.scaladsl.RunnableGraph
32 | import pekko.stream.scaladsl.Source
33 | import pekko.util.ByteString
34 |
35 | trait RestoreClientInterface[T <: KafkaProducerInterface] extends LazyLogging {
36 | implicit val kafkaProducerInterface: T
37 | implicit val restoreConfig: Restore
38 | implicit val kafkaClusterConfig: KafkaCluster
39 | implicit val system: ActorSystem
40 | val maybeAttributes: Option[Attributes] = None
41 |
42 | def retrieveBackupKeys: Future[List[String]]
43 |
44 | def downloadFlow: Flow[String, ByteString, NotUsed]
45 |
46 | private[kafka] def keysWithOffsetDateTime(keys: List[String]): List[(String, OffsetDateTime)] = keys.map { key =>
47 | (key, Utils.keyToOffsetDateTime(key))
48 | }
49 |
50 | private[kafka] def finalKeys: Future[List[String]] = {
51 | implicit val ec: ExecutionContext = system.dispatcher
52 |
53 | for {
54 | backupKeys <- retrieveBackupKeys
55 | withTime = keysWithOffsetDateTime(backupKeys)
56 | sorted = withTime.sortBy { case (_, time) =>
57 | time
58 | }
59 |
60 | latest = restoreConfig.fromWhen match {
61 | case Some(pickedDate) =>
62 | val index = sorted.indexWhere { case (_, time) =>
63 | time >= pickedDate
64 | }
65 | index match {
66 | case 0 => sorted
67 | case -1 =>
68 | sorted.lastOption match {
69 | case Some((key, value)) =>
70 | // Its still technically possible that the last key can contain a picked value.
71 | List((key, value))
72 | case _ => List.empty
73 | }
74 | case index =>
75 | val (_, rest) = sorted.splitAt(index - 1)
76 | rest
77 | }
78 | case None => sorted
79 | }
80 | } yield latest.map { case (key, _) => key }
81 | }
82 |
83 | private[kafka] def checkTopicInConfig(reducedConsumerRecord: ReducedConsumerRecord): Boolean =
84 | kafkaClusterConfig.topics.contains(reducedConsumerRecord.topic)
85 |
86 | private[kafka] def checkTopicGreaterThanTime(reducedConsumerRecord: ReducedConsumerRecord): Boolean =
87 | restoreConfig.fromWhen match {
88 | case Some(pickedDate) =>
89 | reducedConsumerRecord.toOffsetDateTime >= pickedDate
90 | case None => true
91 | }
92 |
93 | private[kafka] def restoreKey(key: String): Source[ByteString, NotUsed] = {
94 | val source = Source
95 | .single(key)
96 | .via(downloadFlow)
97 |
98 | BackupObjectMetadata.fromKey(key).compression match {
99 | case Some(Gzip) => source.via(Compression.gunzip())
100 | case None => source
101 | }
102 | }
103 |
104 | def restore: RunnableGraph[(UniqueKillSwitch, Future[Done])] = {
105 | val sourceWithCompression = Source.future(finalKeys).flatMapConcat { keys =>
106 | keys.map(key => restoreKey(key)) match {
107 | case first :: Nil => first
108 | case first :: second :: Nil => Source.combine(first, second)(Concat(_))
109 | case first :: second :: rest => Source.combine(first, second, rest: _*)(Concat(_))
110 | case Nil => Source.empty[ByteString]
111 | }
112 | }
113 |
114 | val asReducedConsumerRecord = sourceWithCompression
115 | .via(CirceStreamSupport.decode[Option[ReducedConsumerRecord]](AsyncParser.UnwrapArray, multiValue = true))
116 | .collect {
117 | case Some(reducedConsumerRecord)
118 | if checkTopicInConfig(reducedConsumerRecord) && checkTopicGreaterThanTime(reducedConsumerRecord) =>
119 | reducedConsumerRecord
120 | }
121 |
122 | asReducedConsumerRecord.viaMat(KillSwitches.single)(Keep.right).toMat(kafkaProducerInterface.getSink)(Keep.both)
123 | }
124 |
125 | }
126 |
--------------------------------------------------------------------------------
/core-restore/src/main/scala/io/aiven/guardian/kafka/restore/configs/Restore.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.restore.configs
2 |
3 | import java.time.OffsetDateTime
4 |
5 | /** @param fromWhen
6 | * An optional datetime which only restores topics are are after or equal to that date
7 | * @param overrideTopics
8 | * An optional map that allows you to translate topics that are backed up to a new topic name in the destination
9 | * Kafka cluster. The key is the backed up topic name and the value is the new topic name. If this map doesn't
10 | * contain a key for a topic then its backed up as normal.
11 | */
12 | final case class Restore(fromWhen: Option[OffsetDateTime], overrideTopics: Option[Map[String, String]])
13 |
14 | object Restore {
15 | def empty: Restore = Restore(None, None)
16 | }
17 |
--------------------------------------------------------------------------------
/core-restore/src/test/scala/io/aiven/guardian/kafka/restore/ConfigSpec.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.restore
2 |
3 | import io.aiven.guardian.kafka.Generators.kafkaTopic
4 | import io.aiven.guardian.kafka.restore.configs.Restore
5 | import org.scalacheck.Gen
6 | import org.scalacheck.ops.time.ImplicitJavaTimeGenerators._
7 | import org.scalatest.matchers.must.Matchers
8 | import org.scalatest.propspec.AnyPropSpec
9 | import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks
10 | import pureconfig._
11 | import pureconfig.configurable._
12 | import pureconfig.generic.auto._
13 |
14 | import scala.annotation.nowarn
15 |
16 | import java.time.OffsetDateTime
17 | import java.time.format.DateTimeFormatter
18 |
19 | class ConfigSpec extends AnyPropSpec with Matchers with ScalaCheckPropertyChecks {
20 | implicit val localDateConvert: ConfigConvert[OffsetDateTime] = offsetDateTimeConfigConvert(
21 | DateTimeFormatter.ISO_OFFSET_DATE_TIME
22 | )
23 |
24 | property("Valid Restore configs should parse correctly") {
25 | val overrideMapGen = for {
26 | size <- Gen.choose(1, 10)
27 | keys <- Gen.containerOfN[Set, String](size, kafkaTopic)
28 | values <- Gen.containerOfN[Set, String](size, kafkaTopic)
29 | } yield keys.zip(values).toMap
30 |
31 | val offsetDateTimeGen = arbZonedDateTime.arbitrary.map(_.toOffsetDateTime)
32 |
33 | forAll(offsetDateTimeGen, overrideMapGen) { (fromWhen: OffsetDateTime, overrideTopics: Map[String, String]) =>
34 | val topics = overrideTopics
35 | .map { case (key, value) =>
36 | val k = "\"" + key + "\""
37 | val v = "\"" + value + "\""
38 | s"$k=$v"
39 | }
40 | .mkString("", "\n ", "")
41 |
42 | val conf = s"""
43 | |restore {
44 | | from-when = "${fromWhen.toString}"
45 | | override-topics = {
46 | | $topics
47 | | }
48 | |}
49 | |""".stripMargin
50 |
51 | @nowarn("cat=lint-byname-implicit")
52 | val restore = ConfigSource.string(conf).at("restore").loadOrThrow[Restore]
53 | restore mustEqual Restore(Some(fromWhen), Some(overrideTopics))
54 | }
55 | }
56 |
57 | }
58 |
--------------------------------------------------------------------------------
/core-restore/src/test/scala/io/aiven/guardian/kafka/restore/GzipCompressionRestoreClientInterfaceSpec.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.restore
2 |
3 | import io.aiven.guardian.kafka.backup.configs.Compression
4 | import io.aiven.guardian.kafka.models.Gzip
5 | import io.aiven.guardian.pekko.AnyPropTestKit
6 | import org.apache.pekko.actor.ActorSystem
7 |
8 | class GzipCompressionRestoreClientInterfaceSpec
9 | extends AnyPropTestKit(ActorSystem("GzipCompressionRestoreClientInterfaceSpec"))
10 | with RestoreClientInterfaceTest {
11 | override val compression: Option[Compression] = Some(Compression(Gzip, None))
12 | }
13 |
--------------------------------------------------------------------------------
/core-restore/src/test/scala/io/aiven/guardian/kafka/restore/MockedKafkaProducerInterface.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.restore
2 |
3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
4 | import org.apache.pekko
5 |
6 | import scala.concurrent.Future
7 |
8 | import java.util.concurrent.ConcurrentLinkedQueue
9 |
10 | import pekko.Done
11 | import pekko.stream.scaladsl.Sink
12 |
13 | class MockedKafkaProducerInterface() extends KafkaProducerInterface {
14 | val producedData: ConcurrentLinkedQueue[ReducedConsumerRecord] = new ConcurrentLinkedQueue[ReducedConsumerRecord]()
15 |
16 | override def getSink: Sink[ReducedConsumerRecord, Future[Done]] =
17 | Sink.foreach[ReducedConsumerRecord] { reducedConsumerRecord =>
18 | producedData.add(reducedConsumerRecord)
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/core-restore/src/test/scala/io/aiven/guardian/kafka/restore/MockedRestoreClientInterface.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.restore
2 |
3 | import io.aiven.guardian.kafka.configs.KafkaCluster
4 | import io.aiven.guardian.kafka.restore.configs.Restore
5 | import org.apache.pekko
6 |
7 | import scala.concurrent.Future
8 |
9 | import pekko.NotUsed
10 | import pekko.actor.ActorSystem
11 | import pekko.stream.scaladsl.Flow
12 | import pekko.util.ByteString
13 |
14 | class MockedRestoreClientInterface(backupData: Map[String, ByteString])(implicit
15 | override val kafkaProducerInterface: MockedKafkaProducerInterface,
16 | override val restoreConfig: Restore,
17 | override val kafkaClusterConfig: KafkaCluster,
18 | override val system: ActorSystem
19 | ) extends RestoreClientInterface[MockedKafkaProducerInterface] {
20 |
21 | override def retrieveBackupKeys: Future[List[String]] = Future.successful(
22 | backupData.keys.toList
23 | )
24 |
25 | override def downloadFlow: Flow[String, ByteString, NotUsed] = Flow.fromFunction { key: String =>
26 | backupData(key)
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/core-restore/src/test/scala/io/aiven/guardian/kafka/restore/RestoreClientInterfaceSpec.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.restore
2 |
3 | import io.aiven.guardian.kafka.backup.configs.Compression
4 | import io.aiven.guardian.pekko.AnyPropTestKit
5 | import org.apache.pekko.actor.ActorSystem
6 |
7 | class RestoreClientInterfaceSpec
8 | extends AnyPropTestKit(ActorSystem("RestoreClientInterfaceSpec"))
9 | with RestoreClientInterfaceTest {
10 | override val compression: Option[Compression] = None
11 | }
12 |
--------------------------------------------------------------------------------
/core-s3/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
1 | pekko.connectors.s3 {
2 | buffer = ${?PEKKO_CONNECTORS_S3_BUFFER}
3 | disk-buffer-path = ${?PEKKO_CONNECTORS_S3_DISK_BUFFER_PATH}
4 |
5 | forward-proxy {
6 | scheme = ${?PEKKO_CONNECTORS_S3_FORWARD_PROXY_SCHEME}
7 | host = ${?PEKKO_CONNECTORS_S3_FORWARD_PROXY_HOST}
8 | port = ${?PEKKO_CONNECTORS_S3_FORWARD_PROXY_PORT}
9 |
10 | credentials {
11 | username = ${?PEKKO_CONNECTORS_S3_FORWARD_PROXY_CREDENTIALS_USERNAME}
12 | password = ${?PEKKO_CONNECTORS_S3_FORWARD_PROXY_CREDENTIALS_PASSWORD}
13 | }
14 | }
15 |
16 | aws {
17 | credentials {
18 | access-key-id = ${?PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_ACCESS_KEY_ID}
19 | secret-access-key = ${?PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_SECRET_ACCESS_KEY}
20 | token = ${?PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_TOKEN}
21 | provider = ${?PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_PROVIDER}
22 | }
23 |
24 | region {
25 | default-region = ${?PEKKO_CONNECTORS_S3_REGION_DEFAULT_REGION}
26 | provider = ${?PEKKO_CONNECTORS_S3_REGION_PROVIDER}
27 | }
28 | }
29 |
30 | path-style-access = ${?PEKKO_CONNECTORS_S3_PATH_STYLE_ACCESS}
31 | access-style = ${?PEKKO_CONNECTORS_S3_ACCESS_STYLE}
32 | endpoint-url = ${?PEKKO_CONNECTORS_S3_ENDPOINT_URL}
33 | list-bucket-api-version = ${?PEKKO_CONNECTORS_S3_LIST_BUCKET_API_VERSION}
34 | validate-object-key = ${?PEKKO_CONNECTORS_S3_VALIDATE_OBJECT_KEY}
35 |
36 | retry-settings {
37 | max-retries = ${?PEKKO_CONNECTORS_S3_RETRY_SETTINGS_MAX_RETRIES}
38 | min-backoff = ${?PEKKO_CONNECTORS_S3_RETRY_SETTINGS_MIN_BACKOFF}
39 | max-backoff = ${?PEKKO_CONNECTORS_S3_RETRY_SETTINGS_MAX_BACKOFF}
40 | random-factor = ${?PEKKO_CONNECTORS_S3_RETRY_SETTINGS_RANDOM_FACTOR}
41 | }
42 | }
43 |
44 | s3-headers = {
45 | canned-acl = ${?S3_HEADERS_CANNED_ACL}
46 | storage-class = ${?S3_HEADERS_STORAGE_CLASS}
47 | server-side-encryption = ${?S3_HEADERS_SERVER_SIDE_ENCRYPTION}
48 | }
49 |
50 | s3-config = {
51 | data-bucket = ${?S3_CONFIG_DATA_BUCKET}
52 | data-bucket-prefix = ${?S3_CONFIG_DATA_BUCKET_PREFIX}
53 | error-restart-settings = {
54 | min-backoff = 5 millis
55 | min-backoff = ${?S3_CONFIG_ERROR_RESTART_SETTINGS_MIN_BACKOFF}
56 | max-backoff = 10 seconds
57 | max-backoff = ${?S3_CONFIG_ERROR_RESTART_SETTINGS_MAX_BACKOFF}
58 | random-factor = 0.2
59 | random-factor = ${?S3_CONFIG_ERROR_RESTART_SETTINGS_RANDOM_FACTOR}
60 | max-restarts = ${?S3_CONFIG_ERROR_RESTART_SETTINGS_MAX_RESTARTS}
61 | max-restarts-within = ${?S3_CONFIG_ERROR_RESTART_SETTINGS_MAX_RESTARTS_WITHIN}
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/core-s3/src/main/scala/io/aiven/guardian/kafka/s3/Config.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka
2 | package s3
3 |
4 | import io.aiven.guardian.kafka.PureConfigUtils._
5 | import io.aiven.guardian.kafka.s3.configs.S3
6 | import org.apache.pekko
7 | import pureconfig.ConfigCursor
8 | import pureconfig.ConfigReader
9 | import pureconfig.ConfigReader._
10 | import pureconfig.ConfigSource
11 | import pureconfig.error.UserValidationFailed
12 |
13 | import scala.annotation.nowarn
14 | import scala.concurrent.duration.FiniteDuration
15 |
16 | import pekko.stream.RestartSettings
17 | import pekko.stream.connectors.s3.MetaHeaders
18 | import pekko.stream.connectors.s3.S3Headers
19 | import pekko.stream.connectors.s3.headers.CannedAcl
20 | import pekko.stream.connectors.s3.headers.ServerSideEncryption
21 | import pekko.stream.connectors.s3.headers.StorageClass
22 |
23 | trait Config {
24 |
25 | // TODO Unfortunately the following boilerplate is here because the S3 Pekko Connectors providers no public constructors
26 | // for S3Headers apart from the limited S3Headers(). This means we can't use pureconfig.generic.auto._ and hence
27 | // we have to write this out manually
28 |
29 | implicit val cannedACLConfigReader: ConfigReader[CannedAcl] = (cur: ConfigCursor) =>
30 | cur.asString.flatMap {
31 | case CannedAcl.AuthenticatedRead.value => Right(CannedAcl.AuthenticatedRead)
32 | case CannedAcl.AwsExecRead.value => Right(CannedAcl.AwsExecRead)
33 | case CannedAcl.BucketOwnerFullControl.value => Right(CannedAcl.BucketOwnerFullControl)
34 | case CannedAcl.BucketOwnerRead.value => Right(CannedAcl.BucketOwnerRead)
35 | case CannedAcl.Private.value => Right(CannedAcl.Private)
36 | case CannedAcl.PublicRead.value => Right(CannedAcl.PublicRead)
37 | case CannedAcl.PublicReadWrite.value => Right(CannedAcl.PublicReadWrite)
38 | case rest => Left(failure(cur, rest, "CannedAcl"))
39 | }
40 |
41 | implicit val metaHeadersConfigReader: ConfigReader[MetaHeaders] = mapReader[String].map(MetaHeaders.apply)
42 |
43 | implicit val storageClassConfigReader: ConfigReader[StorageClass] = (cur: ConfigCursor) =>
44 | cur.asString.flatMap {
45 | case StorageClass.Standard.storageClass => Right(StorageClass.Standard)
46 | case StorageClass.InfrequentAccess.storageClass => Right(StorageClass.InfrequentAccess)
47 | case StorageClass.Glacier.storageClass => Right(StorageClass.Glacier)
48 | case StorageClass.ReducedRedundancy.storageClass => Right(StorageClass.ReducedRedundancy)
49 | case rest => Left(failure(cur, rest, "StorageClass"))
50 | }
51 |
52 | implicit val serverSideEncryptionReader: ConfigReader[ServerSideEncryption] = (cur: ConfigCursor) =>
53 | cur.fluent.at("type").asString.flatMap {
54 | case "aes256" =>
55 | Right(ServerSideEncryption.aes256())
56 | case "kms" =>
57 | ConfigReader
58 | .forProduct2("key-id", "context") { (keyId: String, context: Option[String]) =>
59 | val base = ServerSideEncryption.kms(keyId)
60 | context.fold(base)(base.withContext)
61 | }
62 | .from(cur)
63 | case "customer-keys" =>
64 | ConfigReader
65 | .forProduct2("key", "md5") { (key: String, md5: Option[String]) =>
66 | val base = ServerSideEncryption.customerKeys(key)
67 | md5.fold(base)(base.withMd5)
68 | }
69 | .from(cur)
70 | }
71 |
72 | implicit val s3HeadersConfigReader: ConfigReader[S3Headers] =
73 | ConfigReader.forProduct5("canned-acl",
74 | "meta-headers",
75 | "storage-class",
76 | "custom-headers",
77 | "server-side-encryption"
78 | ) {
79 | (cannedAcl: Option[CannedAcl],
80 | metaHeaders: Option[MetaHeaders],
81 | storageClass: Option[StorageClass],
82 | customHeaders: Option[Map[String, String]],
83 | serverSideEncryption: Option[ServerSideEncryption]
84 | ) =>
85 | val base = S3Headers()
86 | val base2 = cannedAcl.fold(base)(base.withCannedAcl)
87 | val base3 = metaHeaders.fold(base2)(base2.withMetaHeaders)
88 | val base4 = storageClass.fold(base3)(base3.withStorageClass)
89 | val base5 = customHeaders.fold(base4)(base4.withCustomHeaders)
90 | serverSideEncryption.fold(base5)(base5.withServerSideEncryption)
91 | }
92 |
93 | implicit lazy val s3Headers: S3Headers = ConfigSource.default.at("s3-headers").loadOrThrow[S3Headers]
94 |
95 | // See https://pureconfig.github.io/docs/error-handling.html#validations-in-custom-readers for details
96 | // on custom validation
97 | private val restartSettingsBase = ConfigReader.forProduct5(
98 | "min-backoff",
99 | "max-backoff",
100 | "random-factor",
101 | "max-restarts",
102 | "max-restarts-within"
103 | ) {
104 | (minBackoff: FiniteDuration,
105 | maxBackoff: FiniteDuration,
106 | randomFactor: Double,
107 | maxRestarts: Option[Int],
108 | maxRestartsWithin: Option[FiniteDuration]
109 | ) =>
110 | (minBackoff, maxBackoff, randomFactor, maxRestarts, maxRestartsWithin)
111 | }
112 |
113 | implicit val restartSettingsConfigReader: ConfigReader[RestartSettings] =
114 | ConfigReader.fromCursor[RestartSettings] { cur =>
115 | restartSettingsBase.from(cur).flatMap {
116 | case (_, _, _, Some(_), None) =>
117 | cur.failed(
118 | UserValidationFailed(
119 | "Both max-restarts and max-restarts-within need to exist if defining a maximum restarts configuration, max-restarts-within is missing"
120 | )
121 | )
122 | case (_, _, _, None, Some(_)) =>
123 | cur.failed(
124 | UserValidationFailed(
125 | "Both max-restarts and max-restarts-within need to exist if defining a maximum restarts configuration, max-restarts is missing"
126 | )
127 | )
128 | case (minBackoff, maxBackoff, randomFactor, Some(maxRestarts), Some(maxRestartsWithin)) =>
129 | Right(RestartSettings(minBackoff, maxBackoff, randomFactor).withMaxRestarts(maxRestarts, maxRestartsWithin))
130 | case (minBackoff, maxBackoff, randomFactor, None, None) =>
131 | Right(RestartSettings(minBackoff, maxBackoff, randomFactor))
132 | }
133 | }
134 |
135 | @nowarn("cat=lint-byname-implicit")
136 | implicit lazy val s3Config: S3 = {
137 | import pureconfig.generic.auto._
138 | ConfigSource.default.at("s3-config").loadOrThrow[S3]
139 | }
140 | }
141 |
142 | object Config extends Config
143 |
--------------------------------------------------------------------------------
/core-s3/src/main/scala/io/aiven/guardian/kafka/s3/configs/S3.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.s3.configs
2 |
3 | import org.apache.pekko.stream.RestartSettings
4 |
5 | /** S3 specific configuration used when storing Kafka ConsumerRecords to a S3 bucket
6 | *
7 | * @param dataBucket
8 | * The bucket where a Kafka Consumer directly streams data into as storage
9 | * @param dataBucketPrefix
10 | * Prefix for the data bucket (if any)
11 | * @param errorRestartSettings
12 | * Restart settings that are used whenever an pekko-stream encounters an error
13 | */
14 | final case class S3(dataBucket: String, dataBucketPrefix: Option[String], errorRestartSettings: RestartSettings)
15 |
16 | object S3 {
17 | def apply(dataBucket: String, errorRestartSettings: RestartSettings): S3 = S3(dataBucket, None, errorRestartSettings)
18 | }
19 |
--------------------------------------------------------------------------------
/core-s3/src/main/scala/io/aiven/guardian/kafka/s3/errors/S3Errors.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.s3.errors
2 |
3 | import io.aiven.guardian.kafka.Errors
4 | import org.apache.pekko
5 |
6 | import pekko.http.scaladsl.model.headers.ByteRange
7 | import pekko.stream.connectors.s3.S3Headers
8 |
9 | sealed abstract class S3Errors extends Errors
10 |
11 | object S3Errors {
12 | final case class ExpectedObjectToExist(bucket: String,
13 | key: String,
14 | range: Option[ByteRange],
15 | versionId: Option[String],
16 | s3Headers: S3Headers
17 | ) extends S3Errors {
18 | override def getMessage: String = {
19 | val finalVersionId = versionId.getOrElse("latest")
20 | s"S3 object key:$key and version:$finalVersionId inside bucket:$bucket doesn't exist. S3 headers are ${s3Headers.toString()}"
21 | }
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/core-s3/src/test/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger - %msg%n
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/core-s3/src/test/scala/io/aiven/guardian/kafka/s3/Generators.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.s3
2 |
3 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config}
4 | import org.apache.pekko.stream.RestartSettings
5 | import org.scalacheck.Gen
6 |
7 | import scala.annotation.nowarn
8 | import scala.concurrent.duration._
9 | import scala.language.postfixOps
10 |
11 | object Generators {
12 | val MaxBucketLength: Int = 63
13 |
14 | // See https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html for valid
15 | // bucketnames
16 |
17 | lazy val bucketLetterOrNumberCharGen: Gen[Char] = Gen.frequency(
18 | (1, Gen.numChar),
19 | (1, Gen.alphaLowerChar)
20 | )
21 |
22 | def bucketAllCharGen(useVirtualDotHost: Boolean): Gen[Char] = {
23 | val base = List(
24 | (10, Gen.alphaLowerChar),
25 | (1, Gen.const('-')),
26 | (1, Gen.numChar)
27 | )
28 |
29 | val frequency = if (useVirtualDotHost) (1, Gen.const('.')) +: base else base
30 |
31 | Gen.frequency(frequency: _*)
32 | }
33 |
34 | @nowarn("msg=not.*?exhaustive")
35 | private def checkInvalidDuplicateChars(chars: List[Char]): Boolean =
36 | chars.sliding(2).forall { case Seq(before, after) =>
37 | !(before == '.' && after == '.' || before == '-' && after == '.' || before == '.' && after == '-')
38 | }
39 |
40 | private def checkAlphaChar(c: Char): Boolean =
41 | c >= 'a' && c <= 'z'
42 |
43 | private def allCharCheck(useVirtualDotHost: Boolean, string: String): Boolean =
44 | if (useVirtualDotHost) {
45 | string.forall(char => Character.isDigit(char) || checkAlphaChar(char) || char == '-' || char == '.') &&
46 | checkInvalidDuplicateChars(string.toList)
47 | } else
48 | string.forall(char => Character.isDigit(char) || checkAlphaChar(char) || char == '-')
49 |
50 | def validatePrefix(useVirtualDotHost: Boolean, prefix: Option[String]): Option[String] = {
51 | val withoutWhitespace = prefix match {
52 | case Some(value) if value.trim == "" => None
53 | case Some(value) => Some(value)
54 | case None => None
55 | }
56 |
57 | withoutWhitespace match {
58 | case Some(value) if !(Character.isDigit(value.head) || checkAlphaChar(value.head)) =>
59 | throw new IllegalArgumentException(
60 | s"Invalid starting digit for prefix $value, ${value.head} needs to be an alpha char or digit"
61 | )
62 | case Some(value) if value.length > 1 =>
63 | if (!allCharCheck(useVirtualDotHost, value.drop(1)))
64 | throw new IllegalArgumentException(
65 | s"Prefix $value contains invalid characters"
66 | )
67 | case Some(value) if value.length > MaxBucketLength - 1 =>
68 | throw new IllegalArgumentException(
69 | s"Prefix is too long, it has size ${value.length} where as the max bucket size is $MaxBucketLength"
70 | )
71 | case _ => ()
72 | }
73 |
74 | withoutWhitespace
75 | }
76 |
77 | def bucketNameGen(useVirtualDotHost: Boolean, prefix: Option[String] = None): Gen[String] = {
78 | val finalPrefix = validatePrefix(useVirtualDotHost, prefix)
79 |
80 | for {
81 | range <- {
82 | val maxLength = finalPrefix match {
83 | case Some(p) => MaxBucketLength - p.length
84 | case None => MaxBucketLength
85 | }
86 |
87 | if (maxLength > 3)
88 | Gen.choose(3, maxLength)
89 | else
90 | Gen.const(maxLength)
91 | }
92 | startString = finalPrefix.getOrElse("")
93 |
94 | bucketName <- range match {
95 | case 3 =>
96 | for {
97 | first <- bucketLetterOrNumberCharGen
98 | second <- bucketAllCharGen(useVirtualDotHost)
99 | third <- bucketLetterOrNumberCharGen
100 | } yield startString ++ List(first, second, third).mkString
101 | case _ =>
102 | for {
103 | first <- bucketLetterOrNumberCharGen
104 | last <- bucketLetterOrNumberCharGen
105 | middle <- {
106 | val gen = Gen.listOfN(range - 2, bucketAllCharGen(useVirtualDotHost))
107 | if (useVirtualDotHost) gen.filter(checkInvalidDuplicateChars) else gen
108 | }
109 | } yield startString ++ first.toString ++ middle.mkString ++ last.toString
110 | }
111 | } yield bucketName
112 | }
113 |
114 | val restartSetting: RestartSettings = RestartSettings(
115 | 5 millis,
116 | 10 seconds,
117 | 0.2
118 | )
119 |
120 | def s3ConfigGen(useVirtualDotHost: Boolean, prefix: Option[String] = None): Gen[S3Config] = for {
121 | dataBucket <- bucketNameGen(useVirtualDotHost, prefix)
122 | } yield S3Config(dataBucket, restartSetting)
123 |
124 | }
125 |
--------------------------------------------------------------------------------
/core-s3/src/test/scala/io/aiven/guardian/kafka/s3/Main.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.s3
2 |
3 | import cats.data.NonEmptyList
4 | import cats.implicits._
5 | import com.monovore.decline.Command
6 | import com.monovore.decline.CommandApp
7 | import com.monovore.decline.Opts
8 | import com.typesafe.scalalogging.LazyLogging
9 | import io.aiven.guardian.kafka.s3.Entry.computeAndDeleteBuckets
10 | import org.apache.pekko
11 |
12 | import scala.concurrent._
13 | import scala.concurrent.duration._
14 | import scala.util.control.NonFatal
15 |
16 | import pekko.actor.ActorSystem
17 | import pekko.stream.Attributes
18 | import pekko.stream.connectors.s3.S3Attributes
19 | import pekko.stream.connectors.s3.S3Settings
20 | import pekko.stream.connectors.s3.scaladsl.S3
21 | import pekko.stream.scaladsl.Sink
22 |
23 | class Entry
24 | extends CommandApp(
25 | name = "guardian-s3-test-utils",
26 | header = "Guardian S3 Test Utilities",
27 | main = {
28 | val cleanBucketsCommand = Command(
29 | name = "clean-buckets",
30 | header = "Clean buckets left over by Guardian S3 tests"
31 | ) {
32 | val prefixOpt: Opts[String] =
33 | Opts
34 | .option[String]("prefix", help = "Only delete buckets with specified prefix")
35 |
36 | val excludeBucketsOpt: Opts[Option[NonEmptyList[String]]] =
37 | Opts
38 | .options[String]("exclude-buckets",
39 | help = "Buckets that will always be excluded from cleanup, irrespective of prefix"
40 | )
41 | .orNone
42 |
43 | (prefixOpt, excludeBucketsOpt).tupled
44 | }
45 |
46 | Opts.subcommand(cleanBucketsCommand).map { case (bucketPrefix, excludeBuckets) =>
47 | implicit val system: ActorSystem = ActorSystem()
48 | implicit val ec: ExecutionContext = system.dispatcher
49 | implicit val s3Settings: S3Settings = S3Settings()
50 |
51 | val excludeBucketsSet = excludeBuckets.map(_.toList.toSet).getOrElse(Set.empty)
52 |
53 | try {
54 | Await.result(computeAndDeleteBuckets(bucketPrefix, excludeBucketsSet), Duration.Inf)
55 | System.exit(0)
56 | } catch {
57 | case NonFatal(_) =>
58 | System.exit(1)
59 | }
60 | }
61 | }
62 | )
63 |
64 | object Entry extends LazyLogging {
65 | def computeAndDeleteBuckets(bucketPrefix: String, excludeBuckets: Set[String])(implicit
66 | executionContext: ExecutionContext,
67 | system: ActorSystem,
68 | s3Settings: S3Settings
69 | ): Future[Set[String]] = for {
70 | bucketsToDelete <- computeBucketsToDelete(bucketPrefix, excludeBuckets)
71 | _ <- if (bucketsToDelete.nonEmpty) {
72 | deleteBuckets(bucketsToDelete)
73 | } else
74 | Future {
75 | logger.info("No buckets to delete")
76 | }
77 | } yield bucketsToDelete
78 |
79 | def computeBucketsToDelete(bucketPrefix: String, excludeBuckets: Set[String])(implicit
80 | system: ActorSystem,
81 | s3Settings: S3Settings
82 | ): Future[Set[String]] =
83 | S3.listBuckets()
84 | .withAttributes(S3Attributes.settings(s3Settings))
85 | .runWith(Sink.seq)
86 | .map { allBuckets =>
87 | allBuckets.map(_.name).toSet.filter(fromS3Bucket => fromS3Bucket.startsWith(bucketPrefix)).diff(excludeBuckets)
88 | }(ExecutionContext.parasitic)
89 |
90 | def deleteBuckets(
91 | buckets: Set[String]
92 | )(implicit executionContext: ExecutionContext, system: ActorSystem, s3Settings: S3Settings): Future[Unit] = {
93 | implicit val s3Attrs: Attributes = S3Attributes.settings(s3Settings)
94 | val futures = buckets.map { bucket =>
95 | logger.info(s"Deleting bucket $bucket")
96 | S3TestUtils.cleanAndDeleteBucket(bucket)
97 | }
98 | Future.sequence(futures).map(_ => ())(ExecutionContext.parasitic)
99 | }
100 | }
101 |
102 | object Main extends Entry
103 |
--------------------------------------------------------------------------------
/core-s3/src/test/scala/io/aiven/guardian/kafka/s3/MinioContainer.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.s3
2 |
3 | import com.dimafeng.testcontainers.GenericContainer
4 | import org.testcontainers.containers.wait.strategy.Wait
5 |
6 | import java.time.Duration
7 |
8 | class MinioContainer(accessKey: String, secretKey: String)
9 | extends GenericContainer(
10 | "minio/minio",
11 | exposedPorts = List(9000),
12 | waitStrategy = Some(Wait.forHttp("/minio/health/ready").forPort(9000).withStartupTimeout(Duration.ofSeconds(10))),
13 | command = List("server", "/data"),
14 | env = Map(
15 | "MINIO_ACCESS_KEY" -> accessKey,
16 | "MINIO_SECRET_KEY" -> secretKey
17 | )
18 | ) {
19 |
20 | def getHostAddress: String =
21 | s"http://${container.getHost}:${container.getMappedPort(9000)}"
22 | }
23 |
--------------------------------------------------------------------------------
/core-s3/src/test/scala/io/aiven/guardian/kafka/s3/MinioS3Test.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.s3
2 |
3 | import com.dimafeng.testcontainers.ForAllTestContainer
4 | import org.apache.pekko
5 | import org.scalatest.Suite
6 | import software.amazon.awssdk.auth.credentials.AwsBasicCredentials
7 | import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider
8 | import software.amazon.awssdk.regions.Region
9 | import software.amazon.awssdk.regions.providers.AwsRegionProvider
10 |
11 | import pekko.stream.connectors.s3.AccessStyle
12 | import pekko.stream.connectors.s3.S3Settings
13 | import pekko.testkit.TestKitBase
14 |
15 | trait MinioS3Test extends ForAllTestContainer with TestKitBase { this: Suite =>
16 | private val S3DummyAccessKey = "DUMMY_ACCESS_KEY"
17 | private val S3DummySecretKey = "DUMMY_SECRET_KEY"
18 |
19 | lazy val s3Settings: S3Settings = S3Settings()
20 | .withEndpointUrl(container.getHostAddress)
21 | .withCredentialsProvider(
22 | StaticCredentialsProvider.create(AwsBasicCredentials.create(S3DummyAccessKey, S3DummySecretKey))
23 | )
24 | .withS3RegionProvider(new AwsRegionProvider {
25 | lazy val getRegion: Region = Region.US_EAST_1
26 | })
27 | .withAccessStyle(AccessStyle.PathAccessStyle)
28 |
29 | override lazy val container: MinioContainer = new MinioContainer(S3DummyAccessKey, S3DummySecretKey)
30 | }
31 |
--------------------------------------------------------------------------------
/core-s3/src/test/scala/io/aiven/guardian/kafka/s3/S3TestUtils.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.s3
2 |
3 | import com.typesafe.scalalogging.StrictLogging
4 | import markatta.futiles.Retry
5 | import org.apache.pekko
6 |
7 | import scala.concurrent.ExecutionContext
8 | import scala.concurrent.Future
9 | import scala.concurrent.duration._
10 | import scala.language.postfixOps
11 |
12 | import pekko.actor.ActorSystem
13 | import pekko.stream.Attributes
14 | import pekko.stream.connectors.s3.scaladsl.S3
15 | import pekko.stream.scaladsl.Sink
16 |
17 | object S3TestUtils extends StrictLogging {
18 |
19 | /** Completely cleans a bucket contents as well as deleting it afterwards.
20 | */
21 | def cleanAndDeleteBucket(bucket: String)(implicit system: ActorSystem, s3Attrs: Attributes): Future[Unit] = {
22 | implicit val ec: ExecutionContext = system.dispatcher
23 | for {
24 | _ <- S3.deleteBucketContents(bucket, deleteAllVersions = true).withAttributes(s3Attrs).runWith(Sink.ignore)
25 | multiParts <-
26 | S3.listMultipartUpload(bucket, None).withAttributes(s3Attrs).runWith(Sink.seq)
27 | _ <- Future.sequence(multiParts.map { part =>
28 | S3.deleteUpload(bucket, part.key, part.uploadId)
29 | })
30 | _ <- Retry.retryWithBackOff(
31 | 5,
32 | 100 millis,
33 | throwable => throwable.getMessage.contains("The bucket you tried to delete is not empty")
34 | )(S3.deleteBucket(bucket))
35 | _ = logger.info(s"Completed deleting bucket $bucket")
36 | } yield ()
37 | }
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/core-s3/src/test/scala/org/apache/pekko/stream/connectors/s3/GeneratorsSpec.scala:
--------------------------------------------------------------------------------
1 | package org.apache.pekko.stream.connectors.s3
2 |
3 | import com.typesafe.config.Config
4 | import com.typesafe.config.ConfigFactory
5 | import com.typesafe.config.ConfigValueFactory
6 | import io.aiven.guardian.kafka.s3.Generators
7 | import org.scalacheck.Gen
8 | import org.scalatest.matchers.must.Matchers
9 | import org.scalatest.propspec.AnyPropSpec
10 | import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks
11 |
12 | import scala.annotation.nowarn
13 |
14 | class GeneratorsSpec extends AnyPropSpec with Matchers with ScalaCheckPropertyChecks {
15 |
16 | def createBasicConfigFactory(virtualDotHost: Boolean): Config = {
17 | @nowarn("msg=possible missing interpolator: detected an interpolated expression")
18 | val baseS3SettingsConf =
19 | """
20 | |buffer = "memory"
21 | |disk-buffer-path = ""
22 | |
23 | |aws {
24 | | credentials {
25 | | provider = default
26 | | }
27 | | region {
28 | | provider = default
29 | | }
30 | |}
31 | |access-style = virtual
32 | |list-bucket-api-version = 2
33 | |validate-object-key = true
34 | |retry-settings {
35 | | max-retries = 3
36 | | min-backoff = 200ms
37 | | max-backoff = 10s
38 | | random-factor = 0.0
39 | |}
40 | |multipart-upload {
41 | | retry-settings = ${retry-settings}
42 | |}
43 | |sign-anonymous-requests = true
44 | |""".stripMargin
45 |
46 | val config = ConfigFactory.parseString(baseS3SettingsConf).resolve()
47 | if (virtualDotHost)
48 | config.withValue("access-style", ConfigValueFactory.fromAnyRef("virtual"))
49 | else
50 | config.withValue("access-style", ConfigValueFactory.fromAnyRef("path"))
51 | }
52 |
53 | property("Bucket name generators generates valid bucket names according to S3Settings with virtualDotHost") {
54 | forAll(Generators.bucketNameGen(useVirtualDotHost = true)) { bucket =>
55 | noException must be thrownBy BucketAndKey.validateBucketName(bucket, S3Settings(createBasicConfigFactory(true)))
56 | }
57 | }
58 |
59 | property("Bucket name generators generates valid bucket names according to S3Settings without virtualDotHost") {
60 | forAll(Generators.bucketNameGen(useVirtualDotHost = false)) { bucket =>
61 | noException must be thrownBy BucketAndKey.validateBucketName(bucket, S3Settings(createBasicConfigFactory(false)))
62 | }
63 | }
64 |
65 | def withPrefixGen(useVirtualDotHost: Boolean): Gen[String] = for {
66 | range <- Gen.choose(2, Generators.MaxBucketLength - 3)
67 | firstChar <- Generators.bucketLetterOrNumberCharGen
68 | chars <- Gen.listOfN(range, Generators.bucketAllCharGen(useVirtualDotHost = false))
69 | bucketName <- Generators.bucketNameGen(useVirtualDotHost, Some((firstChar +: chars).mkString))
70 | } yield bucketName
71 |
72 | property(
73 | "Bucket name generators generates valid bucket names according to S3Settings with virtualDotHost and prefix"
74 | ) {
75 | forAll(withPrefixGen(useVirtualDotHost = true)) { bucket =>
76 | noException must be thrownBy BucketAndKey.validateBucketName(bucket, S3Settings(createBasicConfigFactory(true)))
77 | }
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/core/README.md:
--------------------------------------------------------------------------------
1 | # Guardian for Apache Kafka - Core
2 |
3 | This module contains core configuration for setting up the Kafka Consumer
4 |
5 | By default core uses [Alpakka Kafka][alpakka-kafka] to interact with a Kafka Cluster however you can also provide your
6 | own implementation by extending the `aiven.io.guardian.kafka.KafkaClientInterface`. Since Kafka consumers handle auto
7 | commit of cursors the `KafkaClientInterface` uses a `SourceWithContext` so that its possible for the `Source`
8 | to automatically commit cursors when successfully reading topics.
9 |
10 | ## Configuration
11 |
12 | Specification (including environment variable overrides) can be found [here](/src/main/resources/reference.conf).
13 |
14 | The primary `aiven.io.guardian.kafka.KafkaClient` is configured using [Alpakka Kafka][alpakka-kafka] [Consumer
15 | configuration](https://doc.akka.io/docs/alpakka-kafka/current/consumer.html) which also contains the default values.
16 | The committing of Kafka cursors also requires
17 | [CommitterSettings configuration](https://doc.akka.io/docs/alpakka-kafka/current/consumer.html#committer-sink).
18 |
19 | There is also a generic `aiven.io.guardian.kafka.configs.KafkaCluster` configuration at `"kafka-cluster"` for anything not specific
20 | to the kafka consumer, i.e. which topics to backup/compact/restore.
21 |
22 | [alpakka-kafka]: https://doc.akka.io/docs/alpakka-kafka/current/home.html
23 |
--------------------------------------------------------------------------------
/core/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
1 | # See https://github.com/akka/akka-http/issues/3201 and https://discuss.lightbend.com/t/about-nomoreelementsneeded-exception/8599
2 |
3 | pekko.http.client.stream-cancellation-delay = 1000 millis
4 | pekko.http.client.stream-cancellation-delay = ${?PEKKO_HTTP_CLIENT_STREAM_CANCELLATION_DELAY}
5 |
6 | kafka-cluster = {
7 | topics = []
8 | topics = ${?KAFKA_CLUSTER_TOPICS}
9 | }
10 |
--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/Config.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka
2 |
3 | import io.aiven.guardian.kafka.configs.KafkaCluster
4 | import pureconfig.ConfigSource
5 | import pureconfig.generic.auto._
6 |
7 | import scala.annotation.nowarn
8 |
9 | trait Config {
10 |
11 | @nowarn("cat=lint-byname-implicit")
12 | implicit lazy val kafkaClusterConfig: KafkaCluster =
13 | ConfigSource.default.at("kafka-cluster").loadOrThrow[KafkaCluster]
14 | }
15 |
16 | object Config extends Config
17 |
--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/Errors.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka
2 |
3 | trait Errors extends Exception
4 |
5 | object Errors {
6 | case object ExpectedStartOfSource extends Errors {
7 | override def getMessage: String = "Always expect a single element at the start of a stream"
8 | }
9 |
10 | final case class UnhandledStreamCase[T](elems: Seq[T]) extends Errors {
11 | override def getMessage: String = s"Unhandled case for stream ${elems.map(_.toString).mkString(",")}"
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/ExtensionsMethods.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka
2 |
3 | import java.time.OffsetDateTime
4 |
5 | object ExtensionsMethods {
6 |
7 | implicit final class OffsetDateTimeMethods(value: OffsetDateTime) {
8 | def >(other: OffsetDateTime): Boolean = value.compareTo(other) > 0
9 | def >=(other: OffsetDateTime): Boolean = value.compareTo(other) > 0 || value == other
10 | def <(other: OffsetDateTime): Boolean = value.compareTo(other) < 0
11 | def <=(other: OffsetDateTime): Boolean = value.compareTo(other) < 0 || value == other
12 | }
13 |
14 | }
15 |
--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/PureConfigUtils.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka
2 |
3 | import pureconfig.ConfigCursor
4 | import pureconfig.error.CannotConvert
5 | import pureconfig.error.ConfigReaderFailures
6 | import pureconfig.error.ConvertFailure
7 |
8 | object PureConfigUtils {
9 | private[kafka] def failure(cur: ConfigCursor, value: String, `type`: String) = ConfigReaderFailures(
10 | ConvertFailure(
11 | CannotConvert(value, `type`, s"Invalid ${`type`}"),
12 | cur
13 | )
14 | )
15 | }
16 |
--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/Utils.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka
2 |
3 | import scala.annotation.tailrec
4 |
5 | import java.time.OffsetDateTime
6 | import java.time.format.DateTimeParseException
7 |
8 | object Utils {
9 |
10 | private def parseToOffsetDateTime(string: String): Option[OffsetDateTime] =
11 | try
12 | Some(OffsetDateTime.parse(string))
13 | catch {
14 | case _: DateTimeParseException =>
15 | None
16 | }
17 |
18 | @tailrec
19 | def keyToOffsetDateTime(key: String): OffsetDateTime = {
20 | val withoutExtension = key.substring(0, key.lastIndexOf('.'))
21 | parseToOffsetDateTime(withoutExtension) match {
22 | case Some(offsetDateTime) => offsetDateTime
23 | case None => keyToOffsetDateTime(withoutExtension)
24 | }
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/codecs/Circe.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.codecs
2 |
3 | import io.aiven.guardian.kafka.models.ReducedConsumerRecord
4 | import io.circe._
5 | import io.circe.syntax._
6 | import org.apache.kafka.common.record.TimestampType
7 |
8 | trait Circe {
9 | implicit val kafkaTimestampTypeDecoder: Decoder[TimestampType] = (c: HCursor) =>
10 | c.as[Int].flatMap { id =>
11 | TimestampType
12 | .values()
13 | .find(_.id == id)
14 | .toRight(DecodingFailure(s"No TimestampType with $id", c.history))
15 | }
16 |
17 | implicit val kafkaTimestampTypeEncoder: Encoder[TimestampType] = Encoder.instance[TimestampType](_.id.asJson)
18 |
19 | implicit val reducedConsumerRecordDecoder: Decoder[ReducedConsumerRecord] = Decoder.forProduct7(
20 | "topic",
21 | "partition",
22 | "offset",
23 | "key",
24 | "value",
25 | "timestamp",
26 | "timestamp_type"
27 | )(ReducedConsumerRecord.apply)
28 |
29 | implicit val reducedConsumerRecordEncoder: Encoder[ReducedConsumerRecord] = Encoder.forProduct7(
30 | "topic",
31 | "partition",
32 | "offset",
33 | "key",
34 | "value",
35 | "timestamp",
36 | "timestamp_type"
37 | )(x => ReducedConsumerRecord.unapply(x).get)
38 | }
39 |
40 | object Circe extends Circe
41 |
--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/configs/KafkaCluster.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.configs
2 |
3 | /** @param topics
4 | * The set of topics to subscribe to (and hence backup and restore)
5 | */
6 | final case class KafkaCluster(topics: Set[String])
7 |
--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/models/BackupObjectMetadata.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.models
2 |
3 | final case class BackupObjectMetadata(compression: Option[CompressionType])
4 |
5 | object BackupObjectMetadata {
6 | def fromKey(key: String): BackupObjectMetadata =
7 | if (key.endsWith(".gz"))
8 | BackupObjectMetadata(Some(Gzip))
9 | else
10 | BackupObjectMetadata(None)
11 | }
12 |
--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/models/CompressionType.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.models
2 |
3 | sealed trait CompressionType {
4 | val pretty: String
5 | }
6 |
7 | case object Gzip extends CompressionType {
8 | override val pretty: String = "Gzip"
9 | }
10 |
--------------------------------------------------------------------------------
/core/src/main/scala/io/aiven/guardian/kafka/models/ReducedConsumerRecord.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.models
2 |
3 | import org.apache.kafka.common.record.TimestampType
4 |
5 | import java.time.Instant
6 | import java.time.OffsetDateTime
7 | import java.time.ZoneId
8 |
9 | /** A `ConsumerRecord` that only contains the necessary data for guardian
10 | *
11 | * @param topic
12 | * The kafka topic (same as `ConsumerRecord` `topic`)
13 | * @param offset
14 | * The kafka offset (same as `ConsumerRecord` `offset`)
15 | * @param key
16 | * Base64 encoded version of the original ConsumerRecord key as a byte array
17 | * @param value
18 | * Base64 encoded version of the original ConsumerRecord value as a byte array
19 | * @param timestamp
20 | * The timestamp value (same as `ConsumerRecord` `timestamp`)
21 | * @param timestampType
22 | * The timestamp type (same as `ConsumerRecord` `timestampType`)
23 | */
24 | final case class ReducedConsumerRecord(topic: String,
25 | partition: Int,
26 | offset: Long,
27 | key: Option[String],
28 | value: String,
29 | timestamp: Long,
30 | timestampType: TimestampType
31 | ) {
32 | def toOffsetDateTime: OffsetDateTime =
33 | Instant.ofEpochMilli(this.timestamp).atZone(ZoneId.of("UTC")).toOffsetDateTime
34 | }
35 |
--------------------------------------------------------------------------------
/core/src/test/resources/application.conf:
--------------------------------------------------------------------------------
1 | pekko {
2 | log-dead-letters-during-shutdown = false
3 | log-dead-letters = 0
4 | }
5 |
--------------------------------------------------------------------------------
/core/src/test/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | [%highlight(%-5level)] %d{HH:mm:ss.SSS} %logger{0} - %msg%n
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/core/src/test/scala/io/aiven/guardian/kafka/ConfigSpec.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka
2 |
3 | import io.aiven.guardian.kafka.configs.KafkaCluster
4 | import org.scalacheck.Arbitrary
5 | import org.scalacheck.Gen
6 | import org.scalatest.matchers.must.Matchers
7 | import org.scalatest.propspec.AnyPropSpec
8 | import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks
9 | import pureconfig.ConfigSource
10 |
11 | class ConfigSpec extends AnyPropSpec with Matchers with ScalaCheckPropertyChecks {
12 | implicit val kafkaClusterArb: Arbitrary[KafkaCluster] = Arbitrary(
13 | Gen.containerOf[Set, String](Gen.alphaStr).map(topics => KafkaCluster(topics))
14 | )
15 |
16 | property("Valid KafkaClusterConfig configs should parse correctly") {
17 | forAll { (kafkaClusterConfig: KafkaCluster) =>
18 | val conf =
19 | s"""
20 | |kafka-cluster = {
21 | | topics = [${kafkaClusterConfig.topics.map(topic => s""""$topic"""").mkString(",")}]
22 | |}
23 | |""".stripMargin
24 |
25 | noException should be thrownBy ConfigSource.string(conf).at("kafka-cluster")
26 | }
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/core/src/test/scala/io/aiven/guardian/kafka/KafkaClusterTest.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka
2 |
3 | import com.dimafeng.testcontainers.ForAllTestContainer
4 | import com.dimafeng.testcontainers.KafkaContainer
5 | import io.aiven.guardian.kafka.TestUtils.KafkaFutureToCompletableFuture
6 | import io.aiven.guardian.pekko.PekkoStreamTestKit
7 | import org.apache.kafka.clients.CommonClientConfigs
8 | import org.apache.kafka.clients.admin.AdminClient
9 | import org.apache.kafka.clients.admin.NewTopic
10 | import org.apache.kafka.clients.producer.ProducerConfig
11 | import org.apache.kafka.clients.producer.ProducerRecord
12 | import org.apache.kafka.common.serialization.ByteArraySerializer
13 | import org.apache.pekko
14 | import org.scalatest.Suite
15 |
16 | import scala.concurrent.ExecutionContext
17 | import scala.concurrent.Future
18 | import scala.concurrent.duration.FiniteDuration
19 | import scala.concurrent.duration._
20 | import scala.jdk.CollectionConverters._
21 | import scala.jdk.FutureConverters._
22 | import scala.language.postfixOps
23 |
24 | import pekko.Done
25 | import pekko.kafka.ConsumerSettings
26 | import pekko.kafka.ProducerSettings
27 | import pekko.kafka.scaladsl.Producer
28 | import pekko.stream.scaladsl.Source
29 |
30 | trait KafkaClusterTest extends ForAllTestContainer with PekkoStreamTestKit { this: Suite =>
31 |
32 | /** Timeout constant to wait for both Pekko Streams plus initialization of consumer/kafka cluster
33 | */
34 | val KafkaInitializationTimeoutConstant: FiniteDuration = PekkoStreamInitializationConstant + (2.5 seconds)
35 |
36 | override lazy val container: KafkaContainer = new KafkaContainer()
37 |
38 | def baseKafkaConfig: Some[ConsumerSettings[Array[Byte], Array[Byte]] => ConsumerSettings[Array[Byte], Array[Byte]]] =
39 | Some(
40 | _.withBootstrapServers(
41 | container.bootstrapServers
42 | )
43 | )
44 |
45 | /** This config ensures that our producer is atomic since we only ever send a single kafka topic per request and there
46 | * can only be a single request at a given time
47 | * @return
48 | */
49 | def baseProducerConfig
50 | : Some[ProducerSettings[Array[Byte], Array[Byte]] => ProducerSettings[Array[Byte], Array[Byte]]] =
51 | Some(
52 | _.withBootstrapServers(
53 | container.bootstrapServers
54 | ).withProperties(
55 | Map(
56 | ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG -> true.toString,
57 | ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION -> 1.toString,
58 | ProducerConfig.BATCH_SIZE_CONFIG -> 0.toString
59 | )
60 | ).withParallelism(1)
61 | )
62 |
63 | def createProducer(): ProducerSettings[Array[Byte], Array[Byte]] =
64 | ProducerSettings(system, new ByteArraySerializer, new ByteArraySerializer)
65 | .withBootstrapServers(container.bootstrapServers)
66 |
67 | /** Call this function to send a message after the next step of configured time period to trigger a rollover so the
68 | * current object will finish processing
69 | * @param duration
70 | * @param producerSettings
71 | * @param topic
72 | * @return
73 | */
74 | def sendTopicAfterTimePeriod(duration: FiniteDuration,
75 | producerSettings: ProducerSettings[Array[Byte], Array[Byte]],
76 | topic: String
77 | ): Future[Done] = pekko.pattern.after(duration) {
78 | Source(
79 | List(
80 | new ProducerRecord[Array[Byte], Array[Byte]](topic, "1".getBytes, "1".getBytes)
81 | )
82 | ).runWith(Producer.plainSink(producerSettings))
83 | }
84 |
85 | protected var adminClient: AdminClient = _
86 |
87 | override def afterStart(): Unit = {
88 | super.afterStart()
89 | adminClient = AdminClient.create(
90 | Map[String, AnyRef](
91 | CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> container.bootstrapServers
92 | ).asJava
93 | )
94 | }
95 |
96 | override def beforeStop(): Unit = {
97 | adminClient.close()
98 | super.beforeStop()
99 | }
100 |
101 | def createTopics(topics: Set[String])(implicit executionContext: ExecutionContext): Future[Unit] =
102 | for {
103 | currentTopics <- adminClient.listTopics().names().toCompletableFuture.asScala
104 | topicsToCreate = topics.diff(currentTopics.asScala.toSet)
105 | _ <- adminClient
106 | .createTopics(topicsToCreate.map { topic =>
107 | new NewTopic(topic, 1, 1.toShort)
108 | }.asJava)
109 | .all()
110 | .toCompletableFuture
111 | .asScala
112 | } yield ()
113 |
114 | def cleanTopics(topics: Set[String])(implicit executionContext: ExecutionContext): Future[Unit] =
115 | for {
116 | currentTopics <- adminClient.listTopics().names().toCompletableFuture.asScala
117 | topicsToDelete = topics.intersect(currentTopics.asScala.toSet)
118 | _ <- adminClient.deleteTopics(topicsToDelete.asJava).all().toCompletableFuture.asScala
119 | } yield ()
120 |
121 | case object TerminationException extends Exception("termination-exception")
122 | }
123 |
--------------------------------------------------------------------------------
/core/src/test/scala/io/aiven/guardian/kafka/TestUtils.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka
2 |
3 | import com.typesafe.scalalogging.LazyLogging
4 | import org.apache.kafka.common.KafkaFuture
5 | import org.apache.pekko
6 |
7 | import scala.collection.immutable
8 | import scala.collection.mutable
9 | import scala.collection.mutable.ListBuffer
10 | import scala.concurrent.ExecutionContext
11 | import scala.concurrent.Future
12 | import scala.jdk.DurationConverters._
13 | import scala.util.Failure
14 | import scala.util.Success
15 |
16 | import java.time.OffsetDateTime
17 | import java.time.temporal.ChronoUnit
18 | import java.util.concurrent.CompletableFuture
19 |
20 | import pekko.actor.ActorSystem
21 |
22 | object TestUtils {
23 |
24 | // Taken from https://stackoverflow.com/a/56763206/1519631
25 | implicit final class KafkaFutureToCompletableFuture[T](kafkaFuture: KafkaFuture[T]) {
26 | @SuppressWarnings(Array("DisableSyntax.null"))
27 | def toCompletableFuture: CompletableFuture[T] = {
28 | val wrappingFuture = new CompletableFuture[T]
29 | kafkaFuture.whenComplete { (value, throwable) =>
30 | if (throwable != null)
31 | wrappingFuture.completeExceptionally(throwable)
32 | else
33 | wrappingFuture.complete(value)
34 | }
35 | wrappingFuture
36 | }
37 | }
38 |
39 | implicit final class ScalaFutureExtensionMethods[T](future: Future[T]) extends LazyLogging {
40 | def onCompleteLogError(f: () => Unit)(implicit executor: ExecutionContext): Unit =
41 | future.onComplete { result =>
42 | result match {
43 | case Failure(exception) => logger.error("Future resulted in error", exception)
44 | case Success(_) => ()
45 | }
46 | f()
47 | }
48 | }
49 |
50 | /** The standard Scala groupBy returns an `immutable.Map` which is unordered, this version returns an ordered
51 | * `ListMap` for when preserving insertion order is important
52 | */
53 | implicit class GroupBy[A](val t: IterableOnce[A]) {
54 | def orderedGroupBy[K](f: A => K): immutable.ListMap[K, List[A]] = {
55 | var m = immutable.ListMap.empty[K, ListBuffer[A]]
56 | for (elem <- t.iterator) {
57 | val key = f(elem)
58 | m = m.updatedWith(key) {
59 | case Some(value) => Some(value.addOne(elem))
60 | case None => Some(mutable.ListBuffer[A](elem))
61 | }
62 | }
63 | m.map { case (k, v) => (k, v.toList) }
64 | }
65 | }
66 |
67 | final case class UnsupportedTimeUnit(chronoUnit: ChronoUnit) extends Exception(s"$chronoUnit not supported")
68 |
69 | private def recurseUntilHitTimeUnit(previousChronoUnit: ChronoUnit, buffer: BigDecimal)(implicit
70 | system: ActorSystem
71 | ): Future[Unit] = {
72 | val now = OffsetDateTime.now()
73 | val (current, max) = previousChronoUnit match {
74 | case ChronoUnit.SECONDS =>
75 | (now.getSecond, 59)
76 | case ChronoUnit.MINUTES =>
77 | (now.getMinute, 59)
78 | case ChronoUnit.HOURS =>
79 | (now.getHour, 23)
80 | case ChronoUnit.DAYS =>
81 | (now.getDayOfWeek.getValue - 1, 6)
82 | case ChronoUnit.MONTHS =>
83 | (now.getMonth.getValue - 1, 11)
84 | case _ => throw UnsupportedTimeUnit(previousChronoUnit)
85 | }
86 |
87 | if (BigDecimal(current) / BigDecimal(max) * BigDecimal(100) <= buffer)
88 | Future.successful(())
89 | else
90 | pekko.pattern.after(previousChronoUnit.getDuration.toScala)(recurseUntilHitTimeUnit(previousChronoUnit, buffer))
91 | }
92 |
93 | def waitForStartOfTimeUnit(chronoUnit: ChronoUnit, buffer: BigDecimal = BigDecimal(5))(implicit
94 | system: ActorSystem
95 | ): Future[Unit] = {
96 | val allEnums = ChronoUnit.values()
97 | val previousEnum = allEnums(chronoUnit.ordinal - 1)
98 | recurseUntilHitTimeUnit(previousEnum, buffer)
99 | }
100 |
101 | }
102 |
--------------------------------------------------------------------------------
/core/src/test/scala/io/aiven/guardian/pekko/AnyPropTestKit.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.pekko
2 |
3 | import org.apache.pekko
4 | import org.scalatest.fixture
5 | import org.scalatest.propspec.FixtureAnyPropSpecLike
6 |
7 | import pekko.actor.ActorSystem
8 | import pekko.testkit.TestKitBase
9 |
10 | class AnyPropTestKit(_system: ActorSystem)
11 | extends FixtureAnyPropSpecLike
12 | with TestKitBase
13 | with fixture.TestDataFixture {
14 | implicit val system: ActorSystem = _system
15 | }
16 |
--------------------------------------------------------------------------------
/core/src/test/scala/io/aiven/guardian/pekko/PekkoHttpTestKit.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.pekko
2 |
3 | import org.apache.pekko
4 | import org.scalatest.Suite
5 |
6 | import pekko.actor.ActorSystem
7 | import pekko.http.scaladsl.Http
8 |
9 | trait PekkoHttpTestKit extends PekkoStreamTestKit { this: Suite =>
10 | implicit val system: ActorSystem
11 |
12 | override protected def afterAll(): Unit =
13 | Http(system)
14 | .shutdownAllConnectionPools()
15 | .foreach { _ =>
16 | super.afterAll()
17 | }(system.dispatcher)
18 | }
19 |
--------------------------------------------------------------------------------
/core/src/test/scala/io/aiven/guardian/pekko/PekkoStreamTestKit.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.pekko
2 |
3 | import com.typesafe.scalalogging.CanLog
4 | import com.typesafe.scalalogging.Logger
5 | import com.typesafe.scalalogging.LoggerTakingImplicit
6 | import org.apache.pekko
7 | import org.scalatest.BeforeAndAfterAll
8 | import org.scalatest.Suite
9 | import org.scalatest.TestData
10 |
11 | import scala.concurrent.duration._
12 | import scala.language.postfixOps
13 |
14 | import pekko.actor.ActorSystem
15 | import pekko.testkit.TestKit
16 | import pekko.testkit.TestKitBase
17 |
18 | trait PekkoStreamTestKit extends TestKitBase with BeforeAndAfterAll { this: Suite =>
19 | implicit val system: ActorSystem
20 |
21 | override protected def afterAll(): Unit =
22 | TestKit.shutdownActorSystem(system)
23 |
24 | /** If its not possible to determine whether a Stream has finished in a test and instead you need to use a manual
25 | * wait, make sure you wait at least this period of time for akka-streams to initialize properly.
26 | */
27 | val PekkoStreamInitializationConstant: FiniteDuration = 1 second
28 |
29 | private implicit case object CanLogTestData extends CanLog[TestData] {
30 | override def logMessage(originalMsg: String, context: TestData): String =
31 | s"${context.name}: $originalMsg"
32 | }
33 |
34 | lazy val logger: LoggerTakingImplicit[TestData] = Logger.takingImplicit[TestData](getClass.getName)
35 | }
36 |
--------------------------------------------------------------------------------
/dependency-check/suppression.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 | ^pkg:maven/org\.mdedetrich/akka\-stream\-json_2\.13@.*$
8 | cpe:/a:akka:akka
9 |
10 |
11 |
14 | ^pkg:maven/org\.mdedetrich/akka\-stream\-circe_2\.13@.*$
15 | cpe:/a:akka:akka
16 |
17 |
18 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/application/design.md:
--------------------------------------------------------------------------------
1 | # Design
2 |
3 | Each application is contained within a corresponding sbt submodule, i.e. the application for `backup` is contained
4 | within the `cli-backup` sbt submodule. The `core-cli` sbt submodule contains common cli arguments (i.e. `kafka-topics`).
5 |
6 | Scala packaging has been disabled for these submodules which means that when publishing/packaging Guardian it won't push
7 | any built `.jar` files. This is because its unnecessary since you are meant to run these applications as a binary and
8 | not include it as a library. By the same token this also means that the cli modules are built with global inlining
9 | using `"-opt-inline-from:**"`, see [here](https://www.lightbend.com/blog/scala-inliner-optimizer) for more info.
10 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/application/index.md:
--------------------------------------------------------------------------------
1 | # Application
2 |
3 | Guardian also becomes packaged as various application/s that lets you run it using a CLI interface. Currently, the
4 | binaries provided are
5 |
6 | * restore: A continuously running binary that performs the restore operation.
7 | * backup: A binary which when executed allows you to restore an existing backup.
8 |
9 | The CLI follows POSIX guidelines which means you can use `--help` as an argument to provide information on all of the
10 | parameters.
11 |
12 | @@toc { depth=2 }
13 |
14 | @@@ index
15 |
16 | * [design](design.md)
17 | * [packaging](packaging.md)
18 | * [logging](logging.md)
19 |
20 | @@@
21 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/application/logging.md:
--------------------------------------------------------------------------------
1 | # Logging
2 |
3 | The CLI provides its own default
4 | logback `logback.xml` @github[logging file](/core-cli/src/main/resources/logback.xml) which has sane defaults for
5 | typical usage. It's also possible to provide a custom `logback.xml` configuration file using the `--logback-file`
6 | command line argument.
7 |
8 | For more details about logback and/or the `logback.xml` configuration format read the
9 | @ref:[general architecture section on logging](../general-architecture/logging.md).
10 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/application/packaging.md:
--------------------------------------------------------------------------------
1 | # Packaging
2 |
3 | Guardian is currently packaged using [sbt-native-packager](https://github.com/sbt/sbt-native-packager) to provide the
4 | following formats by using the sbt shell.
5 |
6 | * `rpm`
7 | * restore: `cliRestore/rpm:packageBin`. Created `rpm` file will be contained
8 | in `cli-restore/target/rpm/RPMS/noarch/`
9 | * backup: `cliBackup/rpm:packageBin`. Created `rpm` file will be contained in `cli-backup/target/rpm/RPMS/noarch/`
10 | NOTE: In order to build packages you need to have the [rpm-tools](https://rpm.org/) (specifically `rpmbuild`)
11 | installed and available on `PATH`. Please consult your Linux distribution for more info
12 | * `zip`
13 | * restore: `cliRestore/universal:packageBin`. Created `zip` file will be contained
14 | in `cli-restore/target/universal/`
15 | * backup: `cliBackup/universal:packageBin`. Created `zip` file will be contained in `cli-backup/target/universal/`
16 | * `tar`
17 | * restore: `cliRestore/universal:packageZipTarball`. Created `tar` file will be contained
18 | in `cli-restore/target/universal/`
19 | * backup: `cliBackup/universal:packageZipTarball`. Created `tar` file will be contained
20 | in `cli-backup/target/universal/`
21 | * `Xz`
22 | * restore: `cliRestore/universal:packageXzTarball`. Created `xz` file will be contained
23 | in `cli-restore/target/universal/`
24 | * backup: `cliBackup/universal:packageXzTarball`. Created `xz` file will be contained
25 | in `cli-backup/target/universal/`
26 |
27 | Note that for these packages formats you need to have JRE installed on your system to run the package. For more details
28 | about packaging read the [docs](https://sbt-native-packager.readthedocs.io/en/latest/)
29 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/backup/configuration.md:
--------------------------------------------------------------------------------
1 | # Configuration
2 |
3 | ## Reference
4 |
5 | @@snip (/core-backup/src/main/resources/reference.conf)
6 |
7 | Scala API doc @apidoc[kafka.backup.configs.Backup]
8 |
9 | ## Explanation
10 |
11 | * `pekko.kafka.consumer`: See @extref:[documentation](pekko-connectors-kafka-docs:consumer.html#settings)
12 | * `pekko.kafka.consumer.kafka-clients`: See @extref:[documentation](kafka-docs:documentation.html#consumerconfigs)
13 | * `backup`:
14 | * `kafka-group-id`: The group id for the Kafka consumer that's used in restore tool
15 | * `time-configuration`: How to slice the persisted keys/files based by time
16 | * `type`: The type of time configuration. Either `period-from-first` or `chrono-unit-slice`
17 | * `period-from-first`: Guardian will split up the backup keys/files determined by the `duration` specified.
18 | The key/filename will be determined by the timestamp of the first message received from the Kafka consumer
19 | with each further key/filename being incremented by the configured `duration`. If guardian is shut down
20 | then it will terminate and complete stream with the final element in the JSON array being a `null`
21 | * This is done so it's possible to determine if a backup has been terminated by shut down of Guardian
22 | and also because it's not really possible to resume using arbitrary durations.
23 | * `chrono-unit-slice`: Guardian will split up the backup keys/files determined by the `chrono-unit` which
24 | represent intervals such as days and weeks. As such when using this setting its possible for Guardian to
25 | resume from a previous uncompleted backup.
26 | * `duration`: If configuration is `period-from-first` then this determines max period of time for each time
27 | slice.
28 | * `chrono-unit`: if configuration is `chrono-unit-slice` the `chrono-unit` determines
29 | * `commit-timeout-buffer-window`: Guardian sets the commit timeout of the Kafka consumer based on the `time-configuration`
30 | since Guardian does manual committing of cursors. The buffer gets added onto the `time-configuration` to give
31 | some headroom for any theoretical delays.
32 | * `compression`: The compression format to use for the data being backed up. Note that changes in compression
33 | configuration will not apply for any currently existing backups that need to be completed, only for future
34 | new backups.
35 | * `type`: Which compression to use.
36 | * `gzip`. Standard [Gzip](https://en.wikipedia.org/wiki/Gzip) compression
37 | * `level`: The level of compression to use
38 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/backup/design.md:
--------------------------------------------------------------------------------
1 | # Design
2 |
3 | The format for backups is in JSON consisting of a large JSON array filled with JSON objects that have the following
4 | format.
5 |
6 | ```json
7 | {
8 | "topic": "kafka topic",
9 | "partition": 0,
10 | "offset": 0,
11 | "key": "a2V5",
12 | "value": "dmFsdWU=",
13 | "timestamp": 0,
14 | "timestamp_type": 0
15 | }
16 | ```
17 |
18 | The `key` and `value` are Base64 encoded byte arrays (in the above example `"a2V5"` decodes to the string `key`
19 | and `"dmFsdWU="` decodes to the string `value`). This is due to the fact that the backup tool can make no assumptions on
20 | the format of the key or value, so we encode the raw byte arrays.
21 |
22 | One thing to note is that its possible for the last JSON object in the JSON array to be `null`, see for more info.
23 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/backup/index.md:
--------------------------------------------------------------------------------
1 | # Backup
2 |
3 | The backup module is responsible for backing up a specific set of Kafka topics into a persistent storage. The backup
4 | runs as a continuous stream that is split depending on time buckets which are configurable.
5 |
6 | @@project-info { projectId="coreBackup" }
7 |
8 | @@toc { depth=2 }
9 |
10 | @@@ index
11 |
12 | * [configuration](configuration.md)
13 | * [design](design.md)
14 |
15 | @@@
16 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/ci.md:
--------------------------------------------------------------------------------
1 | # CI - Continuous Integration
2 |
3 | Guardian uses github actions to perform CI whenever a pull request is made and when a pull request is merged into
4 | master. CI is also responsible for publishing github github. The integration with github actions for the main build is
5 | performed using [sbt-github-actions][sbt-github-actions-link].
6 |
7 | ## Design
8 |
9 | One thing to note about [sbt-github-actions][sbt-github-actions-link] is that it generates the github workflow files
10 | directly from the sbt @github[build definition file](/build.sbt).
11 | This means that the `build.sbt` is the source of truth and hence [sbt-github-actions][sbt-github-actions-link] also
12 | checks that the github workflow is in sync with `build.sbt` as part of the CI process.
13 |
14 | Essentially that means any changes to `build.sbt` (such as updating Scala versions) can also cause changes in github
15 | workflow actions. Likewise if you need to do any custom changes to
16 | the @github[ci.yaml](/.github/workflows/ci.yml) file you need to do this in `build.sbt` using
17 | [sbt-github-actions][sbt-github-actions-link] SBT dsl.
18 |
19 | To regenerate the relevant github workflow files after changes to `build.sbt` are done you need to run
20 |
21 | ```
22 | githubWorkflowGenerate
23 | ```
24 |
25 | In the sbt shell. For more information go [here](https://github.com/djspiewak/sbt-github-actions#generative-plugin)
26 |
27 | ## Scalafmt
28 |
29 | In addition and separately to [sbt-github-actions][sbt-github-actions-link] Guardian also has
30 | a [scalafmt][scalafmt-link] pipeline that checks the code is correctly formatted on each PR. This allows the
31 | @github[scalafmt pipeline](/.github/workflows/format.yml) to run at the same time the main build
32 | does. Furthermore, it uses [scalafmt-native](https://scalameta.org/scalafmt/docs/installation.html#native-image) for
33 | improved runtime performance (typically it takes 5-10 seconds to check the entire project is formatted).
34 |
35 | This means that if you ever update the scalafmt version in
36 | the @github[configuration file](/.scalafmt.conf#L1) you also need to update it in the
37 | @github[scalafmt-pipeline](/.github/workflows/format.yml#L26).
38 |
39 | [sbt-github-actions-link]: https://github.com/djspiewak/sbt-github-actions
40 | [scalafmt-link]: https://scalameta.org/scalafmt/
41 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/doc-generation.md:
--------------------------------------------------------------------------------
1 | # Document Generation
2 |
3 | Guardian uses [sbt-paradox][sbt-paradox-link] as the main plugin for generating documentation which is hosted
4 | using [github pages][github-pages-link]. In addition various other plugins are used which are noted below
5 |
6 | * [sbt-paradox-api-doc](https://github.com/lightbend/sbt-paradox-apidoc): Allows you to directly link to Scala
7 | documentation using the `@@apidoc` directive
8 | * [sbt-paradox-project-info](https://github.com/lightbend/sbt-paradox-project-info): Provides an `@@projectInfo`
9 | directive that derives common information about the project (such as dependencies, project info etc etc)
10 | * [sbt-site](https://github.com/sbt/sbt-site): Used in conjunction with [sbt-paradox][sbt-paradox-link] to generate the
11 | final site structure
12 | * [sbt-ghpages](https://github.com/sbt/sbt-ghpages): Used for uploading the final site
13 | to [github-pages][github-pages-link].
14 | * [sbt-unidoc](https://github.com/sbt/sbt-unidoc): Used to aggregate/concatenate documentation Scala API documentation
15 | from various sbt modules into a single documentation result
16 |
17 | ## Design
18 |
19 | [sbt-paradox][sbt-paradox-link] generates documentation using standard [Markdown](https://www.markdownguide.org/). The
20 | documentation can be found in the @github[docs-folder](/docs). Note that this folder also corresponds to a sbt-module
21 | which is also named `docs` which also means that commands related to documentation are run in that sbt sub-project
22 | (i.e. `docs/makeSite` generates the documentation site).
23 |
24 | Guardian also uses [scaladoc][scaladoc-link] which is already included within Scala compiler/SBT to generate Scala API
25 | documentation. [scaladoc][scaladoc-link] is analogous to Java's own [javadoc](https://en.wikipedia.org/wiki/Javadoc)
26 | which generates API documentation that is written within the code itself.
27 |
28 | One advantage of using [sbt-paradox][sbt-paradox-link] and its various plugins as the main driver for documentation
29 | generation is it that checks at document generation (i.e. compile time) that the docs are well-formed. This checking
30 | includes
31 |
32 | * references to other links
33 | * references to specific Scala API documentation directly using Scala classes/objects/traits
34 | * TOC (table of contents) are well-formed (e.g. you don't have markdown files in `docs` which aren't referenced
35 | anywhere)
36 | * references to versions from Guardians various Scala submodules are always up-to-date
37 | * references to code snippets
38 |
39 | [sbt-paradox-link]: https://github.com/lightbend/paradox
40 | [github-pages-link]: https://pages.github.com/
41 | [scaladoc-link]: https://docs.scala-lang.org/style/scaladoc.html
42 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/general-architecture/index.md:
--------------------------------------------------------------------------------
1 | # General Architecture
2 |
3 | General documentation about how Guardian for Apache Kafka is architected lives here.
4 |
5 | @@toc { depth=2 }
6 |
7 | @@@ index
8 |
9 | * [logging](logging.md)
10 |
11 | @@@
12 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/general-architecture/logging.md:
--------------------------------------------------------------------------------
1 | # Logging
2 |
3 | Guardian for Apache Kafka uses [logback](https://logback.qos.ch/index.html) to perform logging. This means if you are
4 | using the modules as libraries you need to provide a `logback.xml` in your classpath (typically this is done by putting
5 | the `logback.xml` in your `/src/main/resources` folder). Note that the Guardian modules do not provide a default
6 | `logback.xml` for deployed artifacts since this is typically the responsibility of an application to configure and
7 | provide.
8 |
9 | If you want examples of `logback.xml` configuration you can have a look at the
10 | official [logback page](https://logback.qos.ch/manual/configuration.html) but you can also use existing `logback.xml`'s
11 | from either the @github[cli](/core-cli/src/main/resources/logback.xml) or the
12 | @github[tests](/core/src/test/resources/logback.xml) as a reference.
13 |
14 | @@@ warning
15 |
16 | As documented at @extref:[pekko logback configuration](pekko-docs:logging.html#logback-configuration) it is highly recommended
17 | to use an `AsyncAppender` in your configuration as this offsets the logging to a background thread otherwise you will
18 | end up blocking the core pekko/pekko-streams library whenever a log is made.
19 |
20 | @@@
21 |
22 | ## Logback adapter for pekko/pekko-streams
23 |
24 | By default, pekko/pekko-streams uses its own asynchronous logger however they provide a
25 | @extref:[logging adapter](pekko-docs:logging.html#slf4j) which has already been preconfigured for use in Guardian.
26 |
27 | ## CLI/Application
28 |
29 | Note that unlike the core libraries, the CLI application does provide a default `logback.xml`. For more details read
30 | @ref:[application logging](../application/logging.md).
31 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/index.md:
--------------------------------------------------------------------------------
1 | # Guardian for Apache Kafka Documentation
2 |
3 | Guardian for Apache Kafka is an open source utility for backing up [Apache Kafka](https://kafka.apache.org/) clusters.
4 | It is built using [Scala](https://www.scala-lang.org/) entirely
5 | with [Pekko-Streams](https://pekko.apache.org/docs/pekko/current/stream/index.html)
6 | to ensure that the tool runs reliably and as desired with large datasets in different scenarios.
7 |
8 | @@toc { depth=2 }
9 |
10 | @@@ index
11 |
12 | * [overview](overview.md)
13 | * [security](security.md)
14 | * [license-report](license-report.md)
15 | * [ci](ci.md)
16 | * [doc-generation](doc-generation.md)
17 | * [general-architecture](general-architecture/index.md)
18 | * [testing](testing/index.md)
19 | * [application](application/index.md)
20 | * [backup](backup/index.md)
21 | * [persistence](persistence/index.md)
22 | * [restore](restore/index.md)
23 |
24 | @@@
25 |
26 | ## Trademarks
27 |
28 | Apache Kafka is either a registered trademark or trademark of the Apache Software Foundation in the United States and/or
29 | other countries.
30 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/overview.md:
--------------------------------------------------------------------------------
1 | # Overview
2 |
3 | Guardian for Apache Kafka is an open source utility for backing up [Apache Kafka](https://kafka.apache.org/) clusters.
4 | It is built using [Scala](https://www.scala-lang.org/) entirely
5 | with [Pekko-Streams](https://pekko.apache.org/docs/pekko/current/stream/index.html)
6 | to ensure that the tool runs as desired with large datasets in different scenarios.
7 |
8 | ## Versions
9 |
10 | The core modules are compiled against:
11 |
12 | * Pekko Streams $pekko.version$+ (@extref:[Reference](pekko-docs:stream/index.html), [Github](https://github.com/apache/incubator-pekko))
13 | * Pekko Streams Circe $pekko-stream-circe.version$+ ([Github](https://github.com/mdedetrich/pekko-streams-circe))
14 | * PureConfig $pure-config.version$+ ([Reference](https://pureconfig.github.io/docs/), [Github](https://github.com/pureconfig/pureconfig))
15 | * ScalaLogging $scala-logging.version$+ ([Github](https://github.com/lightbend/scala-logging))
16 |
17 | The cli modules are compiled against:
18 |
19 | * Decline $decline.version$+ ([Reference](https://ben.kirw.in/decline/), [Github](https://github.com/bkirwi/decline))
20 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/persistence/design.md:
--------------------------------------------------------------------------------
1 | # Design
2 |
3 | Storage mechanisms are implemented via the @apidoc[BackupClientInterface] and @apidoc[RestoreClientInterface]. To add
4 | custom storage mechanisms you need to implement these methods. These interfaces are designed to be as simple as possible
5 | while being completely abstract to allow for any theoretical storage mechanism.
6 |
7 | ## BackupClientInterface
8 |
9 | The @apidoc[BackupClientInterface] implements the entire backup flow including the resuming from a previously terminated
10 | backup. Of note is the @apidoc[BackupClientInterface.State](BackupClientInterface) which is the data structure that is
11 | returned when any previously existing backup for that key exists. This is provided to
12 | @apidoc[BackupClientInterface.backupToStorageSink](BackupClientInterface) indicating whether the backup being performed
13 | is a new backup or resuming from a previous one with the retrieval of the current state being defined by
14 | @apidoc[BackupClientInterface.getCurrentUploadState](BackupClientInterface).
15 |
16 | Note that when implementing @apidoc[BackupClientInterface] you do not need to handle the corner cases regarding the
17 | contents of the byte string when resuming/suspending/terminating, this is automatically handled for you. Essentially you
18 | just need to handle how to store/push `ByteString` into the storage of your choice.
19 |
20 | ## RestoreClientInterface
21 |
22 | The @apidoc[RestoreClientInterface] implements restoration from an existing backup. Implementing this is quite simple,
23 | you need to define @apidoc[RestoreClientInterface.retrieveBackupKeys](RestoreClientInterface) which returns all valid
24 | keys to restore (i.e. don't include currently in progress backup keys) and
25 | @apidoc[RestoreClientInterface.downloadFlow](RestoreClientInterface) which is a pekko-stream `Flow` that takes
26 | a `String` which is the key and outputs the content of that key.
27 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/persistence/index.md:
--------------------------------------------------------------------------------
1 | # Persistence Modules
2 |
3 | Guardian for Apache Kafka has a modular architecture that provides support for different persistence backups.
4 |
5 | @@toc { depth=2 }
6 |
7 | @@@ index
8 |
9 | * [design](design.md)
10 | * [S3](s3/index.md)
11 |
12 | @@@
--------------------------------------------------------------------------------
/docs/src/main/paradox/persistence/s3/configuration.md:
--------------------------------------------------------------------------------
1 | # S3
2 |
3 | ## Reference
4 |
5 | @@snip (/core-s3/src/main/resources/reference.conf)
6 |
7 | Scala API doc @apidoc[kafka.s3.configs.S3]
8 |
9 | ## Explanation
10 |
11 | * `s3-headers`: See @extref:[documentation](pekko-connectors:org/apache/pekko/stream/connectors/s3/headers/index.html)
12 | * `pekko.connectors.s3`: See @extref:[documentation](pekko-connectors-docs:s3.html#configuration)
13 | * `s3-config`: Core S3 configuration
14 | * `data-bucket`: The main S3 bucket where data is backed up and where to restore data from
15 | * `data-bucket-prefix`: S3 prefix configuration to be used when searching for the bucket
16 | * `error-restart-settings`: Specific retry settings when recovering from known errors in S3. See @extref:[apidoc](pekko:org/apache/pekko/stream/RestartSettings.html)
17 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/persistence/s3/index.md:
--------------------------------------------------------------------------------
1 | # S3
2 |
3 | The S3 persistence module allows you to store kafka backups on [AWS S3 Cloud Storage](https://aws.amazon.com/s3/).
4 |
5 | @@project-info { projectId="coreS3" }
6 | @@project-info { projectId="backupS3" }
7 | @@project-info { projectId="restoreS3" }
8 |
9 | @@toc { depth=2 }
10 |
11 | @@@ index
12 |
13 | * [configuration](configuration.md)
14 |
15 | @@@
16 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/restore/configuration.md:
--------------------------------------------------------------------------------
1 | # Configuration
2 |
3 | ## Reference
4 |
5 | @@snip (/core-restore/src/main/resources/reference.conf)
6 |
7 | Scala API doc @apidoc[kafka.restore.configs.Restore]
8 |
9 | ## Explanation
10 |
11 | * `pekko.kafka.producer`: See @extref:[documentation](pekko-connectors-kafka-docs:producer.html#settings)
12 | * `pekko.kafka.producer.kafka-clients`: See @extref:[documentation](kafka-docs:documentation.html#producerconfigs)
13 | * `restore`:
14 | * `from-when`: An `ISO-8601` time that specifies from when topics need to be restored. Note that the time used is
15 | based on the original Kafka timestamp and **NOT** the current time.
16 | * `override-topics`: A mapping of currently backed up topics to a new topic in the destination Kafka cluster
17 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/restore/index.md:
--------------------------------------------------------------------------------
1 | # Restore
2 |
3 | The restore module is responsible for streaming data from a backup storage location into a fresh new cluster in the
4 | circumstance of a disaster recovery. The restore is able to work in any format of backed up files created by Guardian's
5 | restore.
6 |
7 | @@project-info { projectId="coreRestore" }
8 |
9 | @@toc { depth=2 }
10 |
11 | @@@ index
12 |
13 | * [configuration](configuration.md)
14 |
15 | @@@
16 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/security.md:
--------------------------------------------------------------------------------
1 | # Security
2 |
3 | ## OWASP Report
4 |
5 | Guardian uses [sbt-dependency-check](https://github.com/albuch/sbt-dependency-check) to generate
6 | a [dependency-check-report][dependency-check-report-link] which checks direct and transitive dependencies for
7 | vulnerabilities against [NVD](https://nvd.nist.gov/) in the form of a HTML file that can be viewed in a standard
8 | browser.
9 |
10 | ### Generating a report
11 |
12 | You can use the sbt shell to generate a report at any time using
13 |
14 | ```
15 | dependencyCheckAggregate
16 | ```
17 |
18 | This will overwrite the @github[current report file](/dependency-check/dependency-check-report.html)
19 |
20 | ### Suppressing false positives
21 |
22 | Sometimes it is possible that a false positive get generated in the report. To add a false positive, first you need to
23 | open the @github[report file](/dependency-check/dependency-check-report.html) in a supported browser. In the list of found vulnerabilities there
24 | should be a suppress button which when clicked displays a popup containing an `XML` suppression entry. You then add
25 | that `` tag entry to the
26 | existing [suppression-file](https://github.com/aiven/guardian-for-apache-kafka/edit/main/dependency-check/suppression.xml)
27 | . Finally, regenerate the report again using sbt's `dependencyCheckAggregate`
28 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/testing/index.md:
--------------------------------------------------------------------------------
1 | # Testing
2 |
3 | As much as possible, Guardian for Apache Kafka aims to provide as little friction as possible to run tests (ideally you
4 | should be able to run tests directly and only in SBT). As an example this means avoiding handwritten shell scripts to
5 | set up environments since this typically doesn't play well with IDE integrations such
6 | as [Intellij IDEA](https://www.jetbrains.com/idea/) or [Metals](https://scalameta.org/metals/) integrated SBT test.
7 | runner.
8 |
9 | ## ScalaTest
10 |
11 | Guardian for Apache Kafka uses [scalatest](https://www.scalatest.org/) as its testing framework. The primary reasons for
12 | using this testing framework are
13 |
14 | * It's the most supported testing framework in Scala, so much so that its considered a critical dependency whenever a
15 | new Scala release is made
16 | * It provides very handy utilities for testing asynchronous code, for example a
17 | @extref:[PatienceConfig](scalatest:concurrent/AbstractPatienceConfiguration$PatienceConfig.html)
18 | that provides efficient polling of Scala futures with configurable scalable timeouts and intervals.
19 | * Pekko provides @extref:[Testkit](pekko-docs:testing.html#asynchronous-testing-testkit) with direct integration into
20 | ScalaTest for easy testing of pekko-streams.
21 |
22 | ### Property based tests
23 |
24 | Guardian for Apache Kafka emphasises using property based testing over unit based tests. This is mainly due
25 | to the fact that property based tests often reveal more problems due to covering more cases compared to unit
26 | based tests. Here are more [details](https://www.scalatest.org/user_guide/generator_driven_property_checks)
27 | on how property based testing works with Scala.
28 |
29 | Like most random data generation, ScalaTest/ScalaCheck relies on an initial seed to deterministically generate
30 | the data. When a test fails the seed for the failing test is automatically shown (search for `Init Seed: `).
31 | If you want to specify the seed to regenerate the exact same data that caused the test to fail, you need to
32 | specify it as a test argument in `sbt`
33 |
34 | ```sbt
35 | Test / testOptions += Tests.Argument(TestFrameworks.ScalaTest, "-S", "7832168009826873070")
36 | ```
37 |
38 | where `7832168009826873070` happens to be the seed
39 |
40 | This argument can be put into any of the projects within the @github[build](/build.sbt). For example if you
41 | want to only specify the speed in the `core` project you can place it like so
42 |
43 | ```sbt
44 | lazy val core = project
45 | .in(file("core"))
46 | .settings(
47 | librarySettings,
48 | name := s"$baseName-core",
49 | Test / testOptions += Tests.Argument(TestFrameworks.ScalaTest, "-S", "7832168009826873070"),
50 | ```
51 |
52 | Whereas if you want it to apply globally you can just place it in the `guardian` project.
53 |
54 | ## Running test/s until failure
55 |
56 | When diagnosing flaky tests it's very useful to be able to run a test until it fails which sbt allows you to
57 | do with [commands](https://www.scala-sbt.org/1.x/docs/Commands.html). Doing this using a sbt command
58 | is far quicker than other options such a shell script since you don't have to deal with startup time cost for
59 | every test run.
60 |
61 | This is what the base command looks like
62 |
63 | ```sbt
64 | commands += Command.command("testUntilFailed") { state =>
65 | "test" :: "testUntilFailed" :: state
66 | }
67 | ```
68 |
69 | The command will recursively call a specific task (in this case `test`) until it fails. For it to work with
70 | Guardin for Apache Kafka's @github[build](/build.sbt), you need to place it as a setting
71 | within the `guardian` project.
72 |
73 | Note that this works with any command, not just `test`. For example if you want to only run a single test
74 | suite until failure you can do
75 |
76 | ```sbt
77 | commands += Command.command("testUntilFailed") { state =>
78 | "backupS3/testOnly io.aiven.guardian.kafka.backup.s3.MockedKafkaClientBackupClientSpec" :: "testUntilFailed" :: state
79 | }
80 | ```
81 |
82 | Once specified in the @github[build](/build.sbt) file you can then run `testUntilFailed` within the sbt shell.
83 |
84 | ## TestContainers
85 |
86 | [testcontainers](https://www.testcontainers.org/) along with the Scala
87 | wrapper [testcontainers-scala](https://github.com/testcontainers/testcontainers-scala) is used to automate the spinning
88 | up of [docker](https://www.docker.com/) whenever the relevant test is run. As long as you have docker installed on your
89 | system you souldn't have to worry about anhything.
90 |
91 | @@toc { depth=2 }
92 |
93 | @@@ index
94 |
95 | * [s3](s3.md)
96 |
97 | @@@
98 |
--------------------------------------------------------------------------------
/docs/src/main/paradox/testing/s3.md:
--------------------------------------------------------------------------------
1 | # S3 - Testing
2 |
3 | For tests that run against the [AWS S3 service](https://aws.amazon.com/s3/) you need to provide the relevant credentials
4 | to S3. The most typical way to provide these credentials is with the usage of environment variables, e.g.
5 |
6 | ```shell
7 | export PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_PROVIDER=static
8 | export PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_ACCESS_KEY_ID="my key"
9 | export PEKKO_CONNECTORS_S3_AWS_CREDENTIALS_SECRET_ACCESS_KEY="my secret"
10 | export PEKKO_CONNECTORS_S3_REGION_PROVIDER=static
11 | export PEKKO_CONNECTORS_S3_REGION_DEFAULT_REGION=eu-central-1
12 | ```
13 |
14 | ## Utilities
15 |
16 | Guardian provides a utility to help deal with running S3 related tests. Due to the possibility of this tool
17 | making unintentional consequences to your S3 account, it needs to be manually run in sbt. To run the tool
18 | without any parameters do this
19 |
20 | ```sh
21 | sbt "coreS3/test:runMain io.aiven.guardian.kafka.s3.Main"
22 | ```
23 |
24 | Current commands
25 |
26 | * `cleanup-buckets`: Helps in cleaning up S3 buckets that have been inadvertently left over by tests.
27 |
28 | ## Tagging S3 Tests
29 |
30 | Due to a current limitation where there is no way to expose Github secrets to PR's made from external forks, tests which
31 | run against S3 need to be @extref:[Tagged](scalatest:Tag.html)
32 | using @github[RealS3Available](/core-s3/src/test/scala/io/aiven/guardian/kafka/s3/S3Spec.scala#L45-L48).
33 |
--------------------------------------------------------------------------------
/project/LicenseReport.scala:
--------------------------------------------------------------------------------
1 | import sbt._
2 | import sbtlicensereport.SbtLicenseReport
3 | import sbtlicensereport.SbtLicenseReport.autoImportImpl._
4 | import sbtlicensereport.license.{DepModuleInfo, MarkDown}
5 |
6 | object LicenseReport extends AutoPlugin {
7 |
8 | override lazy val projectSettings = Seq(
9 | licenseReportTypes := Seq(MarkDown),
10 | licenseReportMakeHeader := (language => language.header1("License Report")),
11 | licenseConfigurations := Set("compile", "test", "provided"),
12 | licenseDepExclusions := {
13 | case dep: DepModuleInfo if dep.organization == "io.aiven" && dep.name.contains("guardian") =>
14 | true // Inter guardian project dependencies are pointless
15 | case DepModuleInfo(_, "scala-library", _) => true // Scala library is part of Scala language
16 | case DepModuleInfo(_, "scala-reflect", _) => true // Scala reflect is part of Scala language
17 | },
18 | licenseReportColumns := Seq(Column.Category, Column.License, Column.Dependency, Column.Configuration)
19 | )
20 |
21 | override def requires = plugins.JvmPlugin && SbtLicenseReport
22 |
23 | override def trigger = allRequirements
24 |
25 | }
26 |
--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.10.0
2 |
--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2")
2 | addSbtPlugin("com.lightbend.paradox" % "sbt-paradox" % "0.10.7")
3 | addSbtPlugin("com.lightbend.paradox" % "sbt-paradox-apidoc" % "1.1.0")
4 | addSbtPlugin("com.lightbend.paradox" % "sbt-paradox-project-info" % "3.0.1")
5 | addSbtPlugin("com.github.sbt" % "sbt-unidoc" % "0.5.0")
6 | addSbtPlugin("com.github.sbt" % "sbt-ghpages" % "0.8.0")
7 | addSbtPlugin("com.thoughtworks.sbt-api-mappings" % "sbt-api-mappings" % "3.0.2")
8 | addSbtPlugin("com.github.sbt" % "sbt-site-paradox" % "1.7.0")
9 | addSbtPlugin("com.github.sbt" % "sbt-native-packager" % "1.10.0")
10 | addSbtPlugin("com.github.sbt" % "sbt-github-actions" % "0.23.0")
11 | addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.2.1")
12 | addSbtPlugin("com.github.sbt" % "sbt-release" % "1.4.0")
13 | addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.12.1")
14 | addSbtPlugin("org.scoverage" % "sbt-scoverage" % "2.0.11")
15 | addSbtPlugin("org.scoverage" % "sbt-coveralls" % "1.3.11")
16 | addSbtPlugin("net.vonbuchholtz" % "sbt-dependency-check" % "5.1.0")
17 | addSbtPlugin("com.github.sbt" % "sbt-license-report" % "1.5.0")
18 |
19 | // This is here to bump dependencies for sbt-paradox/sbt-site, see
20 | // https://github.com/sirthias/parboiled/issues/175, https://github.com/sirthias/parboiled/issues/128 and
21 | // https://github.com/sirthias/parboiled/pull/195
22 | libraryDependencies ++= Seq(
23 | "org.parboiled" %% "parboiled-scala" % "1.4.1",
24 | "org.parboiled" % "parboiled-java" % "1.4.1"
25 | )
26 |
27 | // See https://github.com/akka/akka-http/pull/3995 and https://github.com/akka/akka-http/pull/3995#issuecomment-1026978593
28 | libraryDependencySchemes += "org.scala-lang.modules" %% "scala-xml" % "always"
29 |
--------------------------------------------------------------------------------
/project/project-info.conf:
--------------------------------------------------------------------------------
1 | project-info {
2 | version: "current"
3 | labels: "https://github.com/aiven/guardian-for-apache-kafka/labels/p%3A"
4 | scaladoc: "https://aiven.github.io/guardian-for-apache-kafka/api/"${project-info.version}"/io/aiven/guardian/"
5 | shared-info {
6 | jdk-versions: ["Adopt OpenJDK 11", "Adopt OpenJDK 17"]
7 | issues: {
8 | url: "https://github.com/aiven/guardian-for-apache-kafka/issues"
9 | text: "Github issues"
10 | }
11 | release-notes: {
12 | url: "https://github.com/aiven/guardian-for-apache-kafka/releases"
13 | text: "GitHub releases"
14 | }
15 | }
16 | backupS3: ${project-info.shared-info} {
17 | title: "Backup S3"
18 | jpms-name: "io.aiven.guardian.kafka.backup.s3"
19 | api-docs: [
20 | {
21 | url: ${project-info.scaladoc}"kafka/backup/s3/index.html"
22 | text: "API (Scaladoc)"
23 | }
24 | ]
25 | }
26 | cliBackup: ${project-info.shared-info} {
27 | title: "CLI Backup"
28 | jpms-name: "io.aiven.guardian.kafka.backup"
29 | }
30 | cliRestore: ${project-info.shared-info} {
31 | title: "CLI Restore"
32 | jpms-name: "io.aiven.guardian.kafka.restore"
33 | }
34 | core: ${project-info.shared-info} {
35 | title: "Core"
36 | jpms-name: "io.aiven.guardian.kafka"
37 | api-docs: [
38 | {
39 | url: ${project-info.scaladoc}"kafka/index.html"
40 | text: "API (Scaladoc)"
41 | }
42 | ]
43 | }
44 | coreBackup: ${project-info.shared-info} {
45 | title: "Core Backup"
46 | jpms-name: "io.aiven.guardian.kafka.backup"
47 | api-docs: [
48 | {
49 | url: ${project-info.scaladoc}"kafka/backup/index.html"
50 | text: "API (Scaladoc)"
51 | }
52 | ]
53 | }
54 | coreCli: ${project-info.shared-info} {
55 | title: "Core CLI"
56 | jpms-name: "io.aiven.guardian.cli"
57 | }
58 | coreRestore: ${project-info.shared-info} {
59 | title: "Core Restore"
60 | jpms-name: "io.aiven.guardian.kafka.restore"
61 | api-docs: [
62 | {
63 | url: ${project-info.scaladoc}"kafka/restore/index.html"
64 | text: "API (Scaladoc)"
65 | }
66 | ]
67 | }
68 | coreS3: ${project-info.shared-info} {
69 | title: "Core S3"
70 | jpms-name: "io.aiven.guardian.kafka.restore"
71 | api-docs: [
72 | {
73 | url: ${project-info.scaladoc}"kafka/s3/index.html"
74 | text: "API (Scaladoc)"
75 | }
76 | ]
77 | }
78 | restoreS3: ${project-info.shared-info} {
79 | title: "Restore S3"
80 | jpms-name: "io.aiven.guardian.kafka.restore.s3"
81 | api-docs: [
82 | {
83 | url: ${project-info.scaladoc}"kafka/restore/s3/index.html"
84 | text: "API (Scaladoc)"
85 | }
86 | ]
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/restore-gcs/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aiven-Open/guardian-for-apache-kafka/9fadf3388140820b161cf28744d1587b91bf0776/restore-gcs/.gitkeep
--------------------------------------------------------------------------------
/restore-s3/src/main/scala/io/aiven/guardian/kafka/restore/s3/RestoreClient.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.restore.s3
2 |
3 | import io.aiven.guardian.kafka.configs.KafkaCluster
4 | import io.aiven.guardian.kafka.restore.KafkaProducerInterface
5 | import io.aiven.guardian.kafka.restore.RestoreClientInterface
6 | import io.aiven.guardian.kafka.restore.configs.Restore
7 | import io.aiven.guardian.kafka.s3.configs.{S3 => S3Config}
8 | import org.apache.pekko
9 |
10 | import scala.concurrent.ExecutionContext
11 | import scala.concurrent.Future
12 |
13 | import pekko.NotUsed
14 | import pekko.actor.ActorSystem
15 | import pekko.stream.connectors.s3.S3Attributes
16 | import pekko.stream.connectors.s3.S3Headers
17 | import pekko.stream.connectors.s3.S3Settings
18 | import pekko.stream.connectors.s3.scaladsl.S3
19 | import pekko.stream.scaladsl.Flow
20 | import pekko.stream.scaladsl.Sink
21 | import pekko.util.ByteString
22 |
23 | class RestoreClient[T <: KafkaProducerInterface](maybeS3Settings: Option[S3Settings])(implicit
24 | override val kafkaProducerInterface: T,
25 | override val restoreConfig: Restore,
26 | override val kafkaClusterConfig: KafkaCluster,
27 | override val system: ActorSystem,
28 | s3Config: S3Config,
29 | s3Headers: S3Headers
30 | ) extends RestoreClientInterface[T] {
31 |
32 | override def retrieveBackupKeys: Future[List[String]] = {
33 | implicit val ec: ExecutionContext = system.dispatcher
34 |
35 | val base = S3.listBucket(s3Config.dataBucket, s3Config.dataBucketPrefix, s3Headers)
36 | for {
37 | bucketContents <- maybeS3Settings
38 | .fold(base)(s3Settings => base.withAttributes(S3Attributes.settings(s3Settings)))
39 | .runWith(Sink.collection)
40 | } yield bucketContents.map(_.key).toList
41 | }
42 |
43 | override def downloadFlow: Flow[String, ByteString, NotUsed] =
44 | Flow[String]
45 | .flatMapConcat { key =>
46 | val base = S3.getObject(s3Config.dataBucket, key, None, None, s3Headers)
47 | maybeS3Settings
48 | .fold(base)(s3Settings => base.withAttributes(S3Attributes.settings(s3Settings)))
49 | }
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/restore-s3/src/test/scala/io/aiven/guardian/kafka/restore/s3/RealS3GzipCompressionRestoreClientSpec.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.restore.s3
2 |
3 | import io.aiven.guardian.kafka.backup.configs.Compression
4 | import io.aiven.guardian.kafka.models.Gzip
5 | import io.aiven.guardian.pekko.AnyPropTestKit
6 | import org.apache.pekko.actor.ActorSystem
7 |
8 | class RealS3GzipCompressionRestoreClientSpec
9 | extends AnyPropTestKit(ActorSystem("RealS3GzipCompressionRestoreClientSpec"))
10 | with RealS3RestoreClientTest {
11 | override val compression: Option[Compression] = Some(Compression(Gzip, None))
12 | }
13 |
--------------------------------------------------------------------------------
/restore-s3/src/test/scala/io/aiven/guardian/kafka/restore/s3/RealS3RestoreClientSpec.scala:
--------------------------------------------------------------------------------
1 | package io.aiven.guardian.kafka.restore.s3
2 |
3 | import io.aiven.guardian.kafka.backup.configs.Compression
4 | import io.aiven.guardian.pekko.AnyPropTestKit
5 | import org.apache.pekko.actor.ActorSystem
6 |
7 | class RealS3RestoreClientSpec
8 | extends AnyPropTestKit(ActorSystem("RealS3RestoreClientSpec"))
9 | with RealS3RestoreClientTest {
10 | override val compression: Option[Compression] = None
11 | }
12 |
--------------------------------------------------------------------------------