├── demo ├── iceberg │ ├── .gitignore │ ├── clients │ │ ├── .gitignore │ │ ├── gradle │ │ │ └── wrapper │ │ │ │ ├── gradle-wrapper.jar │ │ │ │ └── gradle-wrapper.properties │ │ ├── gradle.properties │ │ ├── settings.gradle.kts │ │ ├── .gitattributes │ │ └── app │ │ │ ├── src │ │ │ └── main │ │ │ │ └── resources │ │ │ │ └── log4j2.yaml │ │ │ └── build.gradle.kts │ ├── Makefile │ └── README.md ├── .gitignore ├── kraft-s3-minio │ └── Dockerfile └── fake-gcs-server-configure.sh ├── .github ├── CODEOWNERS ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yml ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── 01_question.md │ ├── 02_bug.md │ └── 03_feature.md └── workflows │ ├── git.yml │ ├── main_push_and_pull_request_workflow.yml │ └── release_pr_workflow.yml ├── docker └── kafka-jmx-exporter.yml ├── gradle.properties ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── .gitattributes ├── e2e ├── README.md ├── src │ └── integration-test │ │ ├── resources │ │ └── log4j.properties │ │ └── java │ │ └── io │ │ └── aiven │ │ └── kafka │ │ └── tieredstorage │ │ └── e2e │ │ ├── internal │ │ ├── RemoteLogMetadataDeserializer.java │ │ └── RemoteSegment.java │ │ ├── GcsSingleBrokerDirectTest.java │ │ ├── AzureSingleBrokerDirectTest.java │ │ ├── GcsSingleBrokerSocks5Test.java │ │ └── AzureSingleBrokerSocks5Test.java └── build.gradle ├── storage ├── gcs │ ├── src │ │ ├── test │ │ │ └── resources │ │ │ │ ├── test_gcs_credentials.json │ │ │ │ └── log4j.properties │ │ ├── integration-test │ │ │ └── resources │ │ │ │ └── log4j.properties │ │ └── main │ │ │ └── java │ │ │ └── io │ │ │ └── aiven │ │ │ └── kafka │ │ │ └── tieredstorage │ │ │ └── storage │ │ │ └── gcs │ │ │ └── ProxiedHttpTransportFactory.java │ └── build.gradle ├── build.gradle ├── core │ ├── src │ │ ├── main │ │ │ └── java │ │ │ │ └── io │ │ │ │ └── aiven │ │ │ │ └── kafka │ │ │ │ └── tieredstorage │ │ │ │ └── storage │ │ │ │ ├── ObjectKey.java │ │ │ │ ├── StorageBackend.java │ │ │ │ ├── InvalidRangeException.java │ │ │ │ ├── StorageBackendException.java │ │ │ │ ├── ObjectUploader.java │ │ │ │ ├── ObjectFetcher.java │ │ │ │ ├── KeyNotFoundException.java │ │ │ │ └── ObjectDeleter.java │ │ └── testFixtures │ │ │ └── java │ │ │ └── io │ │ │ └── aiven │ │ │ └── kafka │ │ │ └── tieredstorage │ │ │ └── storage │ │ │ ├── TestObjectKey.java │ │ │ └── TestUtils.java │ └── build.gradle ├── azure │ ├── src │ │ ├── test │ │ │ ├── resources │ │ │ │ └── log4j.properties │ │ │ └── java │ │ │ │ └── io │ │ │ │ └── aiven │ │ │ │ └── kafka │ │ │ │ └── tieredstorage │ │ │ │ └── storage │ │ │ │ └── azure │ │ │ │ └── MetricCollectorTest.java │ │ └── integration-test │ │ │ ├── resources │ │ │ └── log4j.properties │ │ │ └── java │ │ │ └── io │ │ │ └── aiven │ │ │ └── kafka │ │ │ └── tieredstorage │ │ │ └── storage │ │ │ └── azure │ │ │ ├── AzureBlobStorageConnectionStringTest.java │ │ │ ├── AzureBlobStorageAccountKeyTest.java │ │ │ ├── AzuriteBlobStorageUtils.java │ │ │ └── AzureBlobStorageSasTokenTest.java │ └── build.gradle ├── s3 │ ├── src │ │ ├── test │ │ │ ├── resources │ │ │ │ └── log4j.properties │ │ │ └── java │ │ │ │ └── io │ │ │ │ └── aiven │ │ │ │ └── kafka │ │ │ │ └── tieredstorage │ │ │ │ └── storage │ │ │ │ └── s3 │ │ │ │ └── ByteBufferMarkableInputStreamTest.java │ │ ├── integration-test │ │ │ ├── resources │ │ │ │ └── log4j.properties │ │ │ └── java │ │ │ │ └── io │ │ │ │ └── aiven │ │ │ │ └── kafka │ │ │ │ └── tieredstorage │ │ │ │ └── storage │ │ │ │ └── s3 │ │ │ │ └── S3TestContainer.java │ │ └── main │ │ │ └── java │ │ │ └── io │ │ │ └── aiven │ │ │ └── kafka │ │ │ └── tieredstorage │ │ │ └── storage │ │ │ └── s3 │ │ │ └── ByteBufferMarkableInputStream.java │ └── build.gradle └── filesystem │ ├── src │ ├── test │ │ ├── resources │ │ │ └── log4j.properties │ │ └── java │ │ │ └── io │ │ │ └── aiven │ │ │ └── kafka │ │ │ └── tieredstorage │ │ │ └── storage │ │ │ └── filesystem │ │ │ └── FileSystemStorageConfigTest.java │ └── main │ │ └── java │ │ └── io │ │ └── aiven │ │ └── kafka │ │ └── tieredstorage │ │ └── storage │ │ └── filesystem │ │ └── FileSystemStorageConfig.java │ └── build.gradle ├── .gitignore ├── commons ├── build.gradle └── src │ ├── main │ └── java │ │ └── io │ │ └── aiven │ │ └── kafka │ │ └── tieredstorage │ │ └── config │ │ └── validators │ │ ├── NonEmptyPassword.java │ │ ├── Subclass.java │ │ ├── Null.java │ │ └── ValidUrl.java │ └── test │ └── java │ └── io │ └── aiven │ └── kafka │ └── tieredstorage │ └── config │ └── validators │ ├── SubclassTest.java │ ├── NullTest.java │ ├── NonEmptyPasswordTest.java │ └── ValidUrlTest.java ├── checkstyle └── java.header ├── core └── src │ ├── test │ ├── resources │ │ └── log4j.properties │ └── java │ │ └── io │ │ └── aiven │ │ └── kafka │ │ └── tieredstorage │ │ ├── ChunkTest.java │ │ ├── iceberg │ │ └── AvroSchemaRegistryStructureProviderConfigTest.java │ │ ├── manifest │ │ ├── SegmentIndexV1Test.java │ │ ├── index │ │ │ └── VariableSizeChunkIndexEqualsTest.java │ │ └── SegmentManifestV1Test.java │ │ ├── ClosableInputStreamHolderTest.java │ │ ├── fetch │ │ ├── ChunkKeyTest.java │ │ └── SegmentIndexKeyTest.java │ │ ├── config │ │ └── ChunkManagerFactoryConfigTest.java │ │ ├── transform │ │ ├── DetransformFinisherTest.java │ │ └── DecompressionChunkEnumerationTest.java │ │ └── metadata │ │ └── SegmentCustomMetadataSerdeTest.java │ ├── main │ └── java │ │ └── io │ │ └── aiven │ │ └── kafka │ │ └── tieredstorage │ │ ├── UploadMetricReporter.java │ │ ├── manifest │ │ ├── SegmentEncryptionMetadata.java │ │ ├── SegmentIndex.java │ │ ├── SegmentIndexes.java │ │ ├── serde │ │ │ ├── EncryptionSerdeModule.java │ │ │ ├── DataKeySerializer.java │ │ │ └── DataKeyDeserializer.java │ │ ├── SegmentManifest.java │ │ ├── index │ │ │ ├── VariableSizeChunkIndexBuilder.java │ │ │ ├── FixedSizeChunkIndexBuilder.java │ │ │ ├── ChunkIndex.java │ │ │ └── serde │ │ │ │ ├── TransformedChunksDeserializer.java │ │ │ │ └── TransformedChunksSerializer.java │ │ ├── SegmentFormat.java │ │ ├── SegmentIndexV1.java │ │ └── SegmentIndexesV1Builder.java │ │ ├── SegmentManifestNotFoundException.java │ │ ├── fetch │ │ ├── KeyNotFoundRuntimeException.java │ │ ├── manifest │ │ │ └── SegmentManifestCache.java │ │ ├── ChunkManager.java │ │ ├── index │ │ │ ├── SegmentIndexesCache.java │ │ │ └── SegmentIndexKey.java │ │ ├── ChunkKey.java │ │ ├── ChunkManagerFactory.java │ │ └── cache │ │ │ └── MemoryChunkCache.java │ │ ├── InvalidRecordBatchException.java │ │ ├── transform │ │ ├── DetransformChunkEnumeration.java │ │ ├── TransformChunkEnumeration.java │ │ ├── DecompressionChunkEnumeration.java │ │ ├── DetransformFinisher.java │ │ ├── CompressionChunkEnumeration.java │ │ └── DecryptionChunkEnumeration.java │ │ ├── iceberg │ │ ├── manifest │ │ │ └── BlobTypes.java │ │ ├── AvroSchemaRegistryStructureProviderConfig.java │ │ └── StructureProvider.java │ │ ├── metrics │ │ └── MeasurableValue.java │ │ ├── security │ │ └── DataKeyAndAAD.java │ │ ├── ClosableInputStreamHolder.java │ │ ├── config │ │ ├── KeyPairPaths.java │ │ ├── ChunkCacheConfig.java │ │ └── ChunkManagerFactoryConfig.java │ │ ├── SegmentCompressionChecker.java │ │ └── metadata │ │ └── SegmentCustomMetadataSerde.java │ └── integration-test │ └── java │ └── io │ └── aiven │ └── kafka │ └── tieredstorage │ └── AllOpenedFileInputStreamsAreClosedChecker.java ├── benchmarks ├── src │ └── main │ │ └── resources │ │ └── log4j.properties ├── jmh.sh └── build.gradle ├── settings.gradle ├── SECURITY.md ├── docs └── build.gradle └── Makefile /demo/iceberg/.gitignore: -------------------------------------------------------------------------------- 1 | plugin -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @Aiven-Open/team-helpful-husky 2 | -------------------------------------------------------------------------------- /demo/.gitignore: -------------------------------------------------------------------------------- 1 | # Credentials 2 | .env 3 | 4 | # Demo keys 5 | *.pem -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | About this change - What it does 2 | 3 | Resolves: #xxxxx 4 | Why this way 5 | -------------------------------------------------------------------------------- /docker/kafka-jmx-exporter.yml: -------------------------------------------------------------------------------- 1 | lowercaseOutputName: true 2 | lowercaseOutputLabelNames: true 3 | whitelistObjectNames: ["*:*"] 4 | -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | version=1.2.0-SNAPSHOT 2 | org.gradle.jvmargs=-Xmx4096M 3 | sonatypeUsername= 4 | sonatypePassword= 5 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/tiered-storage-for-apache-kafka/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "gradle" 4 | directory: "/" 5 | schedule: 6 | interval: "monthly" 7 | -------------------------------------------------------------------------------- /demo/kraft-s3-minio/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM apache/kafka:3.9.0 2 | 3 | ARG _VERSION 4 | 5 | USER appuser 6 | 7 | RUN mkdir /var/lib/kafka/tiered-storage-cache 8 | 9 | -------------------------------------------------------------------------------- /demo/iceberg/clients/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | 3 | # Ignore Gradle project-specific cache directory 4 | .gradle 5 | 6 | # Ignore Gradle build output directory 7 | build 8 | -------------------------------------------------------------------------------- /demo/iceberg/clients/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aiven-Open/tiered-storage-for-apache-kafka/HEAD/demo/iceberg/clients/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # 2 | # https://help.github.com/articles/dealing-with-line-endings/ 3 | # 4 | # These are explicitly windows files and should use crlf 5 | *.bat text eol=crlf 6 | 7 | -------------------------------------------------------------------------------- /e2e/README.md: -------------------------------------------------------------------------------- 1 | # End-to-end tests for Kafka tiered storage 2 | 3 | ## Usage 4 | 5 | Docker is needed for running the tests. 6 | 7 | 1. Build the image with < TBD >. 8 | 2. `./gradlew test` 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: Aiven Security Bug Bounty 4 | url: https://hackerone.com/aiven_ltd 5 | about: Our bug bounty program. 6 | -------------------------------------------------------------------------------- /storage/gcs/src/test/resources/test_gcs_credentials.json: -------------------------------------------------------------------------------- 1 | { 2 | "client_id": "test-client-id", 3 | "client_secret": "test-client-secret", 4 | "refresh_token": "x", 5 | "type": "authorized_user" 6 | } 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | target 3 | docs/_build 4 | .idea 5 | *.iml 6 | *.DS_Store 7 | .gradle 8 | .gradletasknamecache 9 | build/ 10 | rpm/ 11 | rpmbuild/ 12 | *.sh 13 | # ignore benchmark outputs 14 | io.aiven.kafka.tieredstorage*/ 15 | -------------------------------------------------------------------------------- /demo/iceberg/clients/gradle.properties: -------------------------------------------------------------------------------- 1 | # This file was generated by the Gradle 'init' task. 2 | # https://docs.gradle.org/current/userguide/build_environment.html#sec:gradle_configuration_properties 3 | 4 | org.gradle.configuration-cache=true 5 | 6 | -------------------------------------------------------------------------------- /demo/iceberg/clients/settings.gradle.kts: -------------------------------------------------------------------------------- 1 | plugins { 2 | // Apply the foojay-resolver plugin to allow automatic download of JDKs 3 | id("org.gradle.toolchains.foojay-resolver-convention") version "0.8.0" 4 | } 5 | 6 | rootProject.name = "clients" 7 | include("app") 8 | -------------------------------------------------------------------------------- /demo/iceberg/clients/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.11.1-bin.zip 4 | networkTimeout=10000 5 | validateDistributionUrl=true 6 | zipStoreBase=GRADLE_USER_HOME 7 | zipStorePath=wrapper/dists 8 | -------------------------------------------------------------------------------- /demo/iceberg/clients/.gitattributes: -------------------------------------------------------------------------------- 1 | # 2 | # https://help.github.com/articles/dealing-with-line-endings/ 3 | # 4 | # Linux start script should use lf 5 | /gradlew text eol=lf 6 | 7 | # These are Windows script files and should use crlf 8 | *.bat text eol=crlf 9 | 10 | # Binary files should be left untouched 11 | *.jar binary 12 | 13 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-all.zip 4 | networkTimeout=10000 5 | zipStoreBase=GRADLE_USER_HOME 6 | zipStorePath=wrapper/dists 7 | distributionSha256Sum=f8b4f4772d302c8ff580bc40d0f56e715de69b163546944f787c87abf209c961 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/01_question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: ❓ Ask a question 3 | about: Got stuck or missing something from the docs? Ask away! 4 | --- 5 | 6 | # What can we help you with? 7 | 8 | 9 | 10 | # Where would you expect to find this information? 11 | 12 | 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/02_bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐜 Report a bug 3 | about: Spotted a problem? Let us know 4 | --- 5 | 6 | # What happened? 7 | 8 | 9 | 10 | # What did you expect to happen? 11 | 12 | 13 | 14 | # What else do we need to know? 15 | 16 | 17 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/03_feature.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 💡 Feature suggestion 3 | about: What would make this even better? 4 | --- 5 | 6 | # What is currently missing? 7 | 8 | 9 | 10 | # How could this be improved? 11 | 12 | 13 | 14 | # Is this a feature you would work on yourself? 15 | 16 | * [ ] I plan to open a pull request for this feature 17 | -------------------------------------------------------------------------------- /demo/iceberg/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: plugin 2 | plugin: 3 | rm -r plugin || true 4 | mkdir -p plugin 5 | ../../gradlew -p ../../ :core:distTar :storage:core:distTar :storage:filesystem:distTar :storage:s3:distTar 6 | tar -xf ../../core/build/distributions/core-*.tgz -C plugin --strip-components=1 7 | tar -xf ../../storage/core/build/distributions/core-*.tgz -C plugin --strip-components=1 8 | tar -xf ../../storage/filesystem/build/distributions/filesystem-*.tgz -C plugin --strip-components=1 9 | tar -xf ../../storage/s3/build/distributions/s3-*.tgz -C plugin --strip-components=1 10 | -------------------------------------------------------------------------------- /demo/iceberg/clients/app/src/main/resources/log4j2.yaml: -------------------------------------------------------------------------------- 1 | Configuration: 2 | Properties: 3 | Property: 4 | - name: "logPattern" 5 | value: "[%d] %p %m (%c)%n" 6 | 7 | # Appenders configuration 8 | # See: https://logging.apache.org/log4j/2.x/manual/appenders.html 9 | Appenders: 10 | Console: 11 | name: STDOUT 12 | PatternLayout: 13 | pattern: "${logPattern}" 14 | 15 | # Loggers configuration 16 | # See: https://logging.apache.org/log4j/2.x/manual/configuration.html#configuring-loggers 17 | Loggers: 18 | Root: 19 | level: INFO 20 | AppenderRef: 21 | - ref: STDOUT 22 | -------------------------------------------------------------------------------- /commons/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | -------------------------------------------------------------------------------- /storage/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | -------------------------------------------------------------------------------- /checkstyle/java.header: -------------------------------------------------------------------------------- 1 | /\* 2 | \* Copyright 202[0-9] Aiven Oy 3 | \* 4 | \* Licensed under the Apache License, Version 2.0 \(the "License"\); 5 | \* you may not use this file except in compliance with the License. 6 | \* You may obtain a copy of the License at 7 | \* 8 | \* http://www.apache.org/licenses/LICENSE-2.0 9 | \* 10 | \* Unless required by applicable law or agreed to in writing, software 11 | \* distributed under the License is distributed on an "AS IS" BASIS, 12 | \* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | \* See the License for the specific language governing permissions and 14 | \* limitations under the License. 15 | \*/ 16 | -------------------------------------------------------------------------------- /.github/workflows/git.yml: -------------------------------------------------------------------------------- 1 | name: Git Checks 2 | 3 | on: [pull_request] 4 | 5 | jobs: 6 | block-fixup: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v3 11 | - name: Block Fixup Commit Merge 12 | uses: alexkappa/block-fixup-merge-action@v2 13 | 14 | - name: Gen docs 15 | run: make docs 16 | 17 | - name: Check for uncommitted documentation changes 18 | run: | 19 | if [[ -n $(git status -s) ]]; then 20 | echo "There are uncommitted changes after the task:" 21 | git status -s 22 | echo "Update the documentation on the proper *Config.java and commit generated docs" 23 | exit 1 24 | else 25 | echo "No changes detected." 26 | fi 27 | shell: bash 28 | -------------------------------------------------------------------------------- /storage/core/src/main/java/io/aiven/kafka/tieredstorage/storage/ObjectKey.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage; 18 | 19 | public interface ObjectKey { 20 | String value(); 21 | } 22 | -------------------------------------------------------------------------------- /core/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2023 Aiven Oy 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | log4j.rootLogger=INFO, stdout 18 | 19 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 20 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 21 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n 22 | -------------------------------------------------------------------------------- /benchmarks/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2023 Aiven Oy 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | log4j.rootLogger=WARN, stdout 18 | 19 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 20 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 21 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n 22 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/UploadMetricReporter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2025 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage; 18 | 19 | @FunctionalInterface 20 | interface UploadMetricReporter { 21 | void report(final ObjectKeyFactory.Suffix suffix, final long bytes); 22 | } 23 | -------------------------------------------------------------------------------- /storage/azure/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2023 Aiven Oy 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | log4j.rootLogger=INFO, stdout 18 | 19 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 20 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 21 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n 22 | -------------------------------------------------------------------------------- /storage/gcs/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2023 Aiven Oy 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | log4j.rootLogger=INFO, stdout 18 | 19 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 20 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 21 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n 22 | -------------------------------------------------------------------------------- /storage/s3/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2023 Aiven Oy 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | log4j.rootLogger=INFO, stdout 18 | 19 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 20 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 21 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n 22 | -------------------------------------------------------------------------------- /storage/filesystem/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2023 Aiven Oy 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | log4j.rootLogger=INFO, stdout 18 | 19 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 20 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 21 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n 22 | -------------------------------------------------------------------------------- /storage/gcs/src/integration-test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2023 Aiven Oy 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | log4j.rootLogger=INFO, stdout 18 | 19 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 20 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 21 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n 22 | -------------------------------------------------------------------------------- /storage/s3/src/integration-test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2023 Aiven Oy 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | log4j.rootLogger=INFO, stdout 18 | 19 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 20 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 21 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n 22 | -------------------------------------------------------------------------------- /storage/azure/src/integration-test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2023 Aiven Oy 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | log4j.rootLogger=INFO, stdout 18 | 19 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 20 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 21 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n 22 | -------------------------------------------------------------------------------- /storage/filesystem/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | archivesBaseName = "storage-filesystem" 18 | 19 | dependencies { 20 | implementation project(":storage:core") 21 | 22 | implementation "commons-io:commons-io:$apacheCommonsIOVersion" 23 | 24 | testImplementation(testFixtures(project(":storage:core"))) 25 | } 26 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/manifest/SegmentEncryptionMetadata.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest; 18 | 19 | import javax.crypto.SecretKey; 20 | 21 | public interface SegmentEncryptionMetadata { 22 | int ivSize(); 23 | 24 | SecretKey dataKey(); 25 | 26 | byte[] aad(); 27 | } 28 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/manifest/SegmentIndex.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest; 18 | 19 | import io.aiven.kafka.tieredstorage.storage.BytesRange; 20 | 21 | public interface SegmentIndex { 22 | 23 | int position(); 24 | 25 | int size(); 26 | 27 | BytesRange range(); 28 | } 29 | -------------------------------------------------------------------------------- /demo/iceberg/README.md: -------------------------------------------------------------------------------- 1 | # Getting started with the Iceberg mode 2 | 3 | You will need the following tools: 4 | - JDK version 17 or newer; 5 | - Docker; 6 | - Make. 7 | 8 | Build the plugin code: 9 | ```shell 10 | make plugin 11 | ``` 12 | 13 | Run the Docker Compose: 14 | ```shell 15 | docker compose -f docker-compose.yml up 16 | ``` 17 | 18 | Wait until all the containers have started. 19 | 20 | Run the demo code: 21 | ```shell 22 | clients/gradlew run -p clients 23 | ``` 24 | 25 | It will create the `people` topic and fill it with some Avro records. 26 | 27 | Wait until the broker uploads some segments to the remote storage. 28 | 29 | Now you can explore and query the Iceberg table using the [Nimtable](http://localhost:3000/data/tables/table?catalog=rest&namespace=default&table=people) (admin:admin) and observe uploaded files in [Minio](http://localhost:9001/browser/warehouse). 30 | 31 | ```sql 32 | SELECT * 33 | FROM `rest`.`default`.`people` 34 | LIMIT 100 35 | ``` 36 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | rootProject.name = 'tiered-storage-for-apache-kafka' 18 | include 'core' 19 | include 'storage' 20 | include 'storage:core' 21 | include 'storage:filesystem' 22 | include 'storage:azure' 23 | include 'storage:gcs' 24 | include 'storage:s3' 25 | include 'e2e' 26 | include 'commons' 27 | include 'docs' 28 | include 'benchmarks' 29 | -------------------------------------------------------------------------------- /storage/core/src/main/java/io/aiven/kafka/tieredstorage/storage/StorageBackend.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage; 18 | 19 | import java.io.Closeable; 20 | 21 | import org.apache.kafka.common.Configurable; 22 | 23 | public interface StorageBackend extends Configurable, ObjectUploader, ObjectFetcher, ObjectDeleter, Closeable { 24 | } 25 | -------------------------------------------------------------------------------- /demo/fake-gcs-server-configure.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | ## 3 | # Copyright 2023 Aiven Oy 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | ## 17 | 18 | curl -X PUT http://fake-gcs-server:4443/_internal/config \ 19 | -H "Content-Type: application/json" \ 20 | -d '{"externalUrl": "http://fake-gcs-server:4443"}' 21 | 22 | curl -X POST http://fake-gcs-server:4443/storage/v1/b?project=test-project \ 23 | -H "Content-Type: application/json" \ 24 | -d '{"name": "test-bucket"}' 25 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/SegmentManifestNotFoundException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2025 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage; 18 | 19 | public class SegmentManifestNotFoundException extends Exception { 20 | public SegmentManifestNotFoundException() { 21 | super(); 22 | } 23 | 24 | public SegmentManifestNotFoundException(final Exception e) { 25 | super(e); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /demo/iceberg/clients/app/build.gradle.kts: -------------------------------------------------------------------------------- 1 | plugins { 2 | application 3 | } 4 | 5 | repositories { 6 | mavenCentral() 7 | maven("https://packages.confluent.io/maven") 8 | } 9 | 10 | dependencies { 11 | implementation("org.apache.kafka:kafka-clients:4.0.0") { 12 | exclude(group = "org.slf4j") 13 | } 14 | implementation("org.apache.avro:avro:1.12.0") { 15 | exclude(group = "org.slf4j") 16 | } 17 | implementation("io.confluent:kafka-avro-serializer:7.9.1") { 18 | exclude(group = "org.slf4j") 19 | } 20 | implementation("com.arakelian:faker:4.0.1") { 21 | exclude(group = "org.slf4j") 22 | } 23 | implementation("org.slf4j:slf4j-api:1.7.36") 24 | implementation("org.apache.logging.log4j:log4j-slf4j-impl:2.24.3") 25 | implementation("com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.16.2") 26 | } 27 | 28 | java { 29 | toolchain { 30 | languageVersion = JavaLanguageVersion.of(11) 31 | } 32 | } 33 | 34 | application { 35 | mainClass = "io.aiven.Clients" 36 | } 37 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/fetch/KeyNotFoundRuntimeException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.fetch; 18 | 19 | import io.aiven.kafka.tieredstorage.storage.KeyNotFoundException; 20 | 21 | public class KeyNotFoundRuntimeException extends RuntimeException { 22 | KeyNotFoundRuntimeException(final KeyNotFoundException keyNotFoundException) { 23 | super(keyNotFoundException); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/InvalidRecordBatchException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage; 18 | 19 | public class InvalidRecordBatchException extends Exception { 20 | public InvalidRecordBatchException(final String message) { 21 | super(message); 22 | } 23 | 24 | public InvalidRecordBatchException(final String message, final Exception e) { 25 | super(message, e); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /storage/core/src/main/java/io/aiven/kafka/tieredstorage/storage/InvalidRangeException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage; 18 | 19 | public class InvalidRangeException extends StorageBackendException { 20 | public InvalidRangeException(final String message) { 21 | super(message); 22 | } 23 | 24 | public InvalidRangeException(final String message, final Throwable e) { 25 | super(message, e); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /storage/core/src/main/java/io/aiven/kafka/tieredstorage/storage/StorageBackendException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage; 18 | 19 | public class StorageBackendException extends Exception { 20 | 21 | public StorageBackendException(final String message) { 22 | super(message); 23 | } 24 | 25 | public StorageBackendException(final String message, final Throwable e) { 26 | super(message, e); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /e2e/src/integration-test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ## 2 | # Copyright 2023 Aiven Oy 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | ## 16 | 17 | log4j.rootLogger=INFO, stdout 18 | 19 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 20 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 21 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 22 | 23 | log4j.logger.org.apache.kafka.clients.consumer.KafkaConsumer=WARN 24 | 25 | org.testcontainers=INFO 26 | tc=INFO 27 | com.github.dockerjava=WARN 28 | com.github.dockerjava.zerodep.shaded.org.apache.hc.client5.http.wire=OFF 29 | -------------------------------------------------------------------------------- /storage/core/src/main/java/io/aiven/kafka/tieredstorage/storage/ObjectUploader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage; 18 | 19 | import java.io.InputStream; 20 | 21 | public interface ObjectUploader { 22 | /** 23 | * @param inputStream content to upload. Not closed as part of the upload. 24 | * @param key path to an object within a storage backend. 25 | * @return number of bytes uploaded 26 | */ 27 | long upload(InputStream inputStream, ObjectKey key) throws StorageBackendException; 28 | } 29 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/manifest/SegmentIndexes.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest; 18 | 19 | import org.apache.kafka.server.log.remote.storage.RemoteStorageManager; 20 | 21 | public interface SegmentIndexes { 22 | SegmentIndex offset(); 23 | 24 | SegmentIndex timestamp(); 25 | 26 | SegmentIndex producerSnapshot(); 27 | 28 | SegmentIndex leaderEpoch(); 29 | 30 | SegmentIndex transaction(); 31 | 32 | SegmentIndex segmentIndex(RemoteStorageManager.IndexType indexType); 33 | } 34 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/transform/DetransformChunkEnumeration.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.transform; 18 | 19 | import java.util.Enumeration; 20 | 21 | /** 22 | * The enumeration of chunks being de-transformed. 23 | * 24 | *

There are supposed to be multiple implementation doing different de-transformations 25 | * (like decompression and decryption). 26 | * These implementations are supposed to be composable. 27 | */ 28 | public interface DetransformChunkEnumeration extends Enumeration { 29 | } 30 | -------------------------------------------------------------------------------- /storage/s3/src/integration-test/java/io/aiven/kafka/tieredstorage/storage/s3/S3TestContainer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage.s3; 18 | 19 | import org.testcontainers.containers.localstack.LocalStackContainer; 20 | import org.testcontainers.utility.DockerImageName; 21 | 22 | final class S3TestContainer { 23 | static LocalStackContainer container() { 24 | return new LocalStackContainer( 25 | DockerImageName.parse("localstack/localstack:2.2.0") 26 | ).withServices(LocalStackContainer.Service.S3); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /core/src/test/java/io/aiven/kafka/tieredstorage/ChunkTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage; 18 | 19 | import org.junit.jupiter.api.Test; 20 | 21 | import static org.assertj.core.api.Assertions.assertThat; 22 | 23 | public class ChunkTest { 24 | @Test 25 | void rangeIsInclusive() { 26 | final Chunk chunk = new Chunk(0, 0, 10, 0, 12); 27 | 28 | assertThat(chunk.range().firstPosition()).isZero(); 29 | assertThat(chunk.range().size()).isEqualTo(12); 30 | assertThat(chunk.range().maybeLastPosition()).hasValue(11); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /storage/core/src/testFixtures/java/io/aiven/kafka/tieredstorage/storage/TestObjectKey.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage; 18 | 19 | import java.util.Objects; 20 | 21 | public class TestObjectKey implements ObjectKey { 22 | private final String key; 23 | 24 | public TestObjectKey(final String key) { 25 | this.key = Objects.requireNonNull(key, "key cannot be null"); 26 | } 27 | 28 | @Override 29 | public String value() { 30 | return key; 31 | } 32 | 33 | @Override 34 | public String toString() { 35 | return key; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/iceberg/manifest/BlobTypes.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2025 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.iceberg.manifest; 18 | 19 | interface BlobTypes { 20 | String OFFSET_INDEX = "aiven-tiered-storage-offset-index"; 21 | String TIMESTAMP_INDEX = "aiven-tiered-storage-timestamp-index"; 22 | String PRODUCER_SNAPSHOT_INDEX = "aiven-tiered-storage-producer-snapshot-index"; 23 | String TRANSACTION_INDEX = "aiven-tiered-storage-transaction-index"; 24 | String LEADER_EPOCH_INDEX = "aiven-tiered-storage-leader-epoch-index"; 25 | String FILE_LIST = "aiven-tiered-storage-file-list"; 26 | } 27 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/fetch/manifest/SegmentManifestCache.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.fetch.manifest; 18 | 19 | import java.io.IOException; 20 | 21 | import org.apache.kafka.common.Configurable; 22 | 23 | import io.aiven.kafka.tieredstorage.manifest.SegmentManifest; 24 | import io.aiven.kafka.tieredstorage.storage.ObjectKey; 25 | import io.aiven.kafka.tieredstorage.storage.StorageBackendException; 26 | 27 | public interface SegmentManifestCache extends Configurable { 28 | SegmentManifest get(final ObjectKey manifestKey) 29 | throws StorageBackendException, IOException; 30 | } 31 | -------------------------------------------------------------------------------- /storage/core/src/main/java/io/aiven/kafka/tieredstorage/storage/ObjectFetcher.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage; 18 | 19 | import java.io.InputStream; 20 | 21 | public interface ObjectFetcher { 22 | /** 23 | * Fetch file. 24 | * 25 | * @param key file key. 26 | */ 27 | InputStream fetch(ObjectKey key) throws StorageBackendException; 28 | 29 | /** 30 | * Fetch file. 31 | * 32 | * @param key file key. 33 | * @param range range with inclusive start/end positions 34 | */ 35 | InputStream fetch(ObjectKey key, BytesRange range) throws StorageBackendException; 36 | } 37 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/fetch/ChunkManager.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.fetch; 18 | 19 | import java.io.IOException; 20 | import java.io.InputStream; 21 | 22 | import io.aiven.kafka.tieredstorage.manifest.SegmentManifest; 23 | import io.aiven.kafka.tieredstorage.storage.ObjectKey; 24 | import io.aiven.kafka.tieredstorage.storage.StorageBackendException; 25 | 26 | public interface ChunkManager { 27 | 28 | InputStream getChunk(final ObjectKey objectKey, 29 | final SegmentManifest manifest, 30 | final int chunkId) throws StorageBackendException, IOException; 31 | } 32 | -------------------------------------------------------------------------------- /storage/filesystem/src/test/java/io/aiven/kafka/tieredstorage/storage/filesystem/FileSystemStorageConfigTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage.filesystem; 18 | 19 | import java.nio.file.Path; 20 | import java.util.Map; 21 | 22 | import org.junit.jupiter.api.Test; 23 | 24 | import static org.assertj.core.api.Assertions.assertThat; 25 | 26 | class FileSystemStorageConfigTest { 27 | @Test 28 | void minimalConfig() { 29 | final FileSystemStorageConfig config = new FileSystemStorageConfig(Map.of( 30 | "root", "." 31 | )); 32 | assertThat(config.root()).isEqualTo(Path.of(".")); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /storage/core/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | plugins { 18 | id 'java-library' 19 | id 'java-test-fixtures' 20 | } 21 | 22 | archivesBaseName = "storage-core" 23 | 24 | dependencies { 25 | compileOnly "org.apache.kafka:kafka-clients:$kafkaVersion" 26 | 27 | testFixturesImplementation "org.junit.jupiter:junit-jupiter-api:$junitVersion" 28 | testFixturesImplementation "org.junit.jupiter:junit-jupiter-params:$junitVersion" 29 | 30 | testFixturesImplementation "org.assertj:assertj-core:$assertJVersion" 31 | 32 | testFixturesImplementation "org.apache.kafka:kafka-clients:$kafkaVersion" 33 | 34 | testFixturesImplementation "org.testcontainers:junit-jupiter:$testcontainersVersion" 35 | } 36 | -------------------------------------------------------------------------------- /storage/core/src/main/java/io/aiven/kafka/tieredstorage/storage/KeyNotFoundException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage; 18 | 19 | public class KeyNotFoundException extends StorageBackendException { 20 | public KeyNotFoundException(final StorageBackend storage, final ObjectKey key) { 21 | super(getMessage(storage, key)); 22 | } 23 | 24 | public KeyNotFoundException(final StorageBackend storage, final ObjectKey key, final Exception e) { 25 | super(getMessage(storage, key), e); 26 | } 27 | 28 | private static String getMessage(final StorageBackend storage, final ObjectKey key) { 29 | return "Key " + key + " does not exists in storage " + storage; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /storage/core/src/testFixtures/java/io/aiven/kafka/tieredstorage/storage/TestUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage; 18 | 19 | import org.junit.jupiter.api.TestInfo; 20 | 21 | public class TestUtils { 22 | public static String testNameToBucketName(final TestInfo testInfo) { 23 | String bucketName = testInfo.getDisplayName() 24 | .toLowerCase() 25 | .replace(" ", "") 26 | .replace(",", "-") 27 | .replace("(", "") 28 | .replace(")", "") 29 | .replace("[", "") 30 | .replace("]", ""); 31 | while (bucketName.length() < 3) { 32 | bucketName += bucketName; 33 | } 34 | return bucketName; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /e2e/src/integration-test/java/io/aiven/kafka/tieredstorage/e2e/internal/RemoteLogMetadataDeserializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.e2e.internal; 18 | 19 | import org.apache.kafka.common.serialization.Deserializer; 20 | import org.apache.kafka.server.log.remote.metadata.storage.serialization.RemoteLogMetadataSerde; 21 | import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata; 22 | 23 | public class RemoteLogMetadataDeserializer implements Deserializer { 24 | private final RemoteLogMetadataSerde serde = new RemoteLogMetadataSerde(); 25 | 26 | @Override 27 | public RemoteLogMetadata deserialize(final String topic, final byte[] data) { 28 | return serde.deserialize(data); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /storage/azure/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | archivesBaseName = "storage-azure" 18 | 19 | dependencies { 20 | implementation project(":storage:core") 21 | 22 | implementation platform("com.azure:azure-sdk-bom:$azureSdkVersion") 23 | implementation ("com.azure:azure-identity") { 24 | exclude group: "com.fasterxml.jackson.core" 25 | exclude group: "org.slf4j" 26 | } 27 | implementation ("com.azure:azure-storage-blob") { 28 | exclude group: "com.fasterxml.jackson.core" 29 | exclude group: "org.slf4j" 30 | } 31 | 32 | implementation project(":commons") 33 | 34 | testImplementation(testFixtures(project(":storage:core"))) 35 | 36 | testImplementation "org.testcontainers:junit-jupiter:$testcontainersVersion" 37 | } 38 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/fetch/index/SegmentIndexesCache.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.fetch.index; 18 | 19 | import java.io.IOException; 20 | import java.io.InputStream; 21 | import java.util.function.Supplier; 22 | 23 | import org.apache.kafka.common.Configurable; 24 | import org.apache.kafka.server.log.remote.storage.RemoteStorageManager.IndexType; 25 | 26 | import io.aiven.kafka.tieredstorage.storage.ObjectKey; 27 | import io.aiven.kafka.tieredstorage.storage.StorageBackendException; 28 | 29 | public interface SegmentIndexesCache extends Configurable { 30 | InputStream get( 31 | final ObjectKey key, 32 | IndexType indexType, 33 | final Supplier indexSupplier 34 | ) throws StorageBackendException, IOException; 35 | } 36 | -------------------------------------------------------------------------------- /storage/core/src/main/java/io/aiven/kafka/tieredstorage/storage/ObjectDeleter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage; 18 | 19 | import java.util.Set; 20 | 21 | public interface ObjectDeleter { 22 | /** 23 | * Delete the object with the specified key. 24 | * 25 | *

If the object doesn't exist, the operation still succeeds as it is idempotent. 26 | */ 27 | void delete(ObjectKey key) throws StorageBackendException; 28 | 29 | /** 30 | * Delete objects from a set of keys. 31 | * 32 | *

If the object doesn't exist, the operation still succeeds as it is idempotent. 33 | */ 34 | default void delete(Set keys) throws StorageBackendException { 35 | for (final var key : keys) { 36 | delete(key); 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/iceberg/AvroSchemaRegistryStructureProviderConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2025 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.iceberg; 18 | 19 | import java.util.Map; 20 | 21 | import org.apache.kafka.common.config.AbstractConfig; 22 | import org.apache.kafka.common.config.ConfigDef; 23 | 24 | public class AvroSchemaRegistryStructureProviderConfig extends AbstractConfig { 25 | static final String SERDE_CONFIG_PREFIX = "serde."; 26 | 27 | public static ConfigDef configDef() { 28 | return new ConfigDef(); 29 | } 30 | 31 | public AvroSchemaRegistryStructureProviderConfig(final Map props) { 32 | super(configDef(), props); 33 | } 34 | 35 | public Map serdeConfig() { 36 | return originalsWithPrefix(SERDE_CONFIG_PREFIX, true); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /e2e/src/integration-test/java/io/aiven/kafka/tieredstorage/e2e/GcsSingleBrokerDirectTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.e2e; 18 | 19 | import com.google.cloud.storage.BucketInfo; 20 | import org.junit.jupiter.api.BeforeAll; 21 | 22 | public class GcsSingleBrokerDirectTest extends GcsSingleBrokerTest { 23 | static final String BUCKET = "test-bucket-direct"; 24 | 25 | @BeforeAll 26 | static void createBucket() { 27 | gcsClient.create(BucketInfo.newBuilder(BUCKET).build()); 28 | } 29 | 30 | @BeforeAll 31 | static void startKafka() throws Exception { 32 | setupKafka(kafka -> rsmPluginBasicSetup(kafka) 33 | .withEnv("KAFKA_RSM_CONFIG_STORAGE_GCS_BUCKET_NAME", BUCKET)); 34 | } 35 | 36 | @Override 37 | protected String bucket() { 38 | return BUCKET; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/manifest/serde/EncryptionSerdeModule.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest.serde; 18 | 19 | import javax.crypto.SecretKey; 20 | 21 | import io.aiven.kafka.tieredstorage.security.RsaEncryptionProvider; 22 | 23 | import com.fasterxml.jackson.databind.Module; 24 | import com.fasterxml.jackson.databind.module.SimpleModule; 25 | 26 | public final class EncryptionSerdeModule { 27 | public static Module create(final RsaEncryptionProvider rsaEncryptionProvider) { 28 | final var module = new SimpleModule(); 29 | 30 | module.addSerializer(SecretKey.class, 31 | new DataKeySerializer(rsaEncryptionProvider::encryptDataKey)); 32 | module.addDeserializer(SecretKey.class, 33 | new DataKeyDeserializer(rsaEncryptionProvider::decryptDataKey)); 34 | 35 | return module; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /storage/gcs/src/main/java/io/aiven/kafka/tieredstorage/storage/gcs/ProxiedHttpTransportFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage.gcs; 18 | 19 | import java.net.InetSocketAddress; 20 | import java.net.Proxy; 21 | 22 | import com.google.api.client.http.HttpTransport; 23 | import com.google.api.client.http.javanet.NetHttpTransport; 24 | import com.google.auth.http.HttpTransportFactory; 25 | 26 | class ProxiedHttpTransportFactory implements HttpTransportFactory { 27 | private final InetSocketAddress proxy; 28 | 29 | ProxiedHttpTransportFactory(final String host, final int port) { 30 | this.proxy = new InetSocketAddress(host, port); 31 | } 32 | 33 | @Override 34 | public HttpTransport create() { 35 | return new NetHttpTransport.Builder() 36 | .setProxy(new Proxy(Proxy.Type.SOCKS, proxy)) 37 | .build(); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /core/src/test/java/io/aiven/kafka/tieredstorage/iceberg/AvroSchemaRegistryStructureProviderConfigTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2025 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.iceberg; 18 | 19 | import java.util.Map; 20 | 21 | import org.junit.jupiter.api.Test; 22 | 23 | import static org.assertj.core.api.Assertions.assertThat; 24 | 25 | class AvroSchemaRegistryStructureProviderConfigTest { 26 | @Test 27 | void minimalConfig() { 28 | final String schemaRegistryUrl = "http://127.0.0.1:8080"; 29 | final Map configs = Map.of( 30 | "serde.schema.registry.url", schemaRegistryUrl, 31 | "serde.a.b.c", "xyz" 32 | ); 33 | final var config = new AvroSchemaRegistryStructureProviderConfig(configs); 34 | assertThat(config.serdeConfig()).isEqualTo(Map.of( 35 | "schema.registry.url", schemaRegistryUrl, 36 | "a.b.c", "xyz" 37 | )); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | We release patches for security vulnerabilities. Which versions are eligible 6 | receiving such patches depend on the CVSS v3.0 Rating: 7 | 8 | | CVSS v3.0 | Supported Versions | 9 | | --------- | ----------------------------------------- | 10 | | 4.0-10.0 | Most recent release | 11 | 12 | ## Reporting a Vulnerability 13 | 14 | Please report (suspected) security vulnerabilities to our **[bug bounty 15 | program](https://bugcrowd.com/aiven-mbb-og)**. You will receive a response from 16 | us within 2 working days. If the issue is confirmed, we will release a patch as 17 | soon as possible depending on impact and complexity. 18 | 19 | ## Qualifying Vulnerabilities 20 | 21 | Any reproducible vulnerability that has a severe effect on the security or 22 | privacy of our users is likely to be in scope for the program. 23 | 24 | We generally **aren't** interested in the following issues: 25 | * Social engineering (e.g. phishing, vishing, smishing) attacks 26 | * Brute force, DoS, text injection 27 | * Missing best practices such as HTTP security headers (CSP, X-XSS, etc.), 28 | email (SPF/DKIM/DMARC records), SSL/TLS configuration. 29 | * Software version disclosure / Banner identification issues / Descriptive 30 | error messages or headers (e.g. stack traces, application or server errors). 31 | * Clickjacking on pages with no sensitive actions 32 | * Theoretical vulnerabilities where you can't demonstrate a significant 33 | security impact with a proof of concept. 34 | -------------------------------------------------------------------------------- /commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/NonEmptyPassword.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.config.validators; 18 | 19 | import java.util.Objects; 20 | 21 | import org.apache.kafka.common.config.ConfigDef; 22 | import org.apache.kafka.common.config.ConfigException; 23 | import org.apache.kafka.common.config.types.Password; 24 | 25 | public class NonEmptyPassword implements ConfigDef.Validator { 26 | @Override 27 | public void ensureValid(final String name, final Object value) { 28 | if (Objects.isNull(value)) { 29 | return; 30 | } 31 | final var pwd = (Password) value; 32 | if (pwd.value() == null || pwd.value().isBlank()) { 33 | throw new ConfigException(name + " value must not be empty"); 34 | } 35 | } 36 | 37 | @Override 38 | public String toString() { 39 | return "Non-empty password text"; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /storage/azure/src/integration-test/java/io/aiven/kafka/tieredstorage/storage/azure/AzureBlobStorageConnectionStringTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage.azure; 18 | 19 | import java.util.Map; 20 | 21 | import io.aiven.kafka.tieredstorage.storage.StorageBackend; 22 | 23 | import static io.aiven.kafka.tieredstorage.storage.azure.AzuriteBlobStorageUtils.connectionString; 24 | 25 | class AzureBlobStorageConnectionStringTest extends AzureBlobStorageTest { 26 | @Override 27 | protected StorageBackend storage() { 28 | final AzureBlobStorage azureBlobStorage = new AzureBlobStorage(); 29 | final Map configs = Map.of( 30 | "azure.container.name", azureContainerName, 31 | "azure.connection.string", connectionString(AZURITE_SERVER, BLOB_STORAGE_PORT) 32 | ); 33 | azureBlobStorage.configure(configs); 34 | return azureBlobStorage; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/metrics/MeasurableValue.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.metrics; 18 | 19 | import java.util.function.Supplier; 20 | 21 | import org.apache.kafka.common.metrics.MetricConfig; 22 | import org.apache.kafka.common.metrics.Sensor; 23 | import org.apache.kafka.common.metrics.stats.Value; 24 | 25 | /** 26 | * Implementation of {@link Value} that allows fetching a value from provided {@code Long} {@link Supplier} 27 | * to avoid unnecessary calls to {@link Sensor#record()} that under the hood has a synchronized block and affects 28 | * performance because of that. 29 | */ 30 | class MeasurableValue extends Value { 31 | private final Supplier value; 32 | 33 | MeasurableValue(final Supplier value) { 34 | this.value = value; 35 | } 36 | 37 | @Override 38 | public double measure(final MetricConfig config, final long now) { 39 | return value.get(); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /benchmarks/jmh.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | base_dir=$(dirname $0) 18 | jmh_project_name="benchmarks" 19 | 20 | if [ ${base_dir} == "." ]; then 21 | gradlew_dir=".." 22 | elif [ ${base_dir##./} == "${jmh_project_name}" ]; then 23 | gradlew_dir="." 24 | else 25 | echo "JMH Benchmarks need to be run from the root of the kafka repository or the 'benchmarks' directory" 26 | exit 27 | fi 28 | 29 | gradleCmd="${gradlew_dir}/gradlew" 30 | libDir="${base_dir}/build/libs" 31 | 32 | echo "running gradlew :benchmarks:clean :benchmarks:shadowJar" 33 | 34 | $gradleCmd :benchmarks:clean :benchmarks:shadowJar 35 | 36 | echo "gradle build done" 37 | 38 | echo "running JMH with args: $@" 39 | 40 | java -jar ${libDir}/kafka-ts-benchmarks-*.jar "$@" 41 | 42 | echo "JMH benchmarks done" 43 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/transform/TransformChunkEnumeration.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.transform; 18 | 19 | import java.util.Enumeration; 20 | 21 | /** 22 | * The enumeration of chunks being transformed. 23 | * 24 | *

There are supposed to be multiple implementation doing different transformations 25 | * (like compression and encryption). 26 | * These implementations are supposed to be composable. 27 | */ 28 | public interface TransformChunkEnumeration extends Enumeration { 29 | /** 30 | * Returns the original (i.e. before all the transformations) chunk size. 31 | * 32 | *

Normally it should be propagated through the chain of transformations. 33 | */ 34 | int originalChunkSize(); 35 | 36 | /** 37 | * Returns a transformed chunk size if it's known. 38 | * 39 | * @return a transformed chunk size; or {@code null} if unknown. 40 | */ 41 | Integer transformedChunkSize(); 42 | } 43 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/iceberg/StructureProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2025 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.iceberg; 18 | 19 | import java.io.IOException; 20 | import java.nio.ByteBuffer; 21 | 22 | import org.apache.kafka.common.Configurable; 23 | import org.apache.kafka.common.header.Headers; 24 | 25 | import io.confluent.kafka.schemaregistry.ParsedSchema; 26 | import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException; 27 | 28 | public interface StructureProvider extends Configurable { 29 | ParsedSchema getSchemaById(final int schemaId) throws RestClientException, IOException; 30 | 31 | ByteBuffer serializeKey(final String topic, final Headers headers, final Object value); 32 | 33 | ByteBuffer serializeValue(final String topic, final Headers headers, final Object value); 34 | 35 | Object deserializeKey(final String topic, final Headers headers, final byte[] data); 36 | 37 | Object deserializeValue(final String topic, final Headers headers, final byte[] data); 38 | } 39 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/manifest/SegmentManifest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest; 18 | 19 | import java.util.Optional; 20 | 21 | import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata; 22 | 23 | import io.aiven.kafka.tieredstorage.manifest.index.ChunkIndex; 24 | 25 | import com.fasterxml.jackson.annotation.JsonSubTypes; 26 | import com.fasterxml.jackson.annotation.JsonTypeInfo; 27 | 28 | /** 29 | * The segment manifest. 30 | * 31 | *

Contains various metadata about an uploaded segment. 32 | */ 33 | @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "version") 34 | @JsonSubTypes({ 35 | @JsonSubTypes.Type(value = SegmentManifestV1.class, name = "1") 36 | }) 37 | public interface SegmentManifest { 38 | ChunkIndex chunkIndex(); 39 | 40 | SegmentIndexes segmentIndexes(); 41 | 42 | boolean compression(); 43 | 44 | Optional encryption(); 45 | 46 | RemoteLogSegmentMetadata remoteLogSegmentMetadata(); 47 | } 48 | -------------------------------------------------------------------------------- /commons/src/test/java/io/aiven/kafka/tieredstorage/config/validators/SubclassTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.config.validators; 18 | 19 | import org.apache.kafka.common.config.ConfigException; 20 | 21 | import org.junit.jupiter.api.Test; 22 | 23 | import static org.assertj.core.api.Assertions.assertThatNoException; 24 | import static org.assertj.core.api.Assertions.assertThatThrownBy; 25 | 26 | class SubclassTest { 27 | @Test 28 | void validSubclass() { 29 | assertThatNoException().isThrownBy(() -> Subclass.of(Object.class).ensureValid("test", String.class)); 30 | } 31 | 32 | @Test 33 | void nullIsValid() { 34 | assertThatNoException().isThrownBy(() -> Subclass.of(Object.class).ensureValid("test", null)); 35 | } 36 | 37 | @Test 38 | void invalidSubclass() { 39 | assertThatThrownBy(() -> Subclass.of(String.class).ensureValid("test", Object.class)) 40 | .isInstanceOf(ConfigException.class) 41 | .hasMessage("test should be a subclass of java.lang.String"); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /e2e/src/integration-test/java/io/aiven/kafka/tieredstorage/e2e/AzureSingleBrokerDirectTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.e2e; 18 | 19 | import com.azure.storage.blob.BlobContainerClient; 20 | import org.junit.jupiter.api.BeforeAll; 21 | 22 | /** 23 | * The Azure Blob Storage test variant that goes to Azurite directly (i.e. not through the SOCKS5 proxy). 24 | */ 25 | class AzureSingleBrokerDirectTest extends AzureSingleBrokerTest { 26 | private static final String AZURE_CONTAINER = "test-container-direct"; 27 | 28 | @BeforeAll 29 | static void createAzureContainer() { 30 | blobServiceClient.createBlobContainer(AZURE_CONTAINER); 31 | } 32 | 33 | @BeforeAll 34 | static void startKafka() throws Exception { 35 | setupKafka(kafka -> rsmPluginBasicSetup(kafka) 36 | .withEnv("KAFKA_RSM_CONFIG_STORAGE_AZURE_CONTAINER_NAME", AZURE_CONTAINER)); 37 | } 38 | 39 | @Override 40 | BlobContainerClient blobContainerClient() { 41 | return blobServiceClient.getBlobContainerClient(AZURE_CONTAINER); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/manifest/index/VariableSizeChunkIndexBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest.index; 18 | 19 | import java.util.ArrayList; 20 | 21 | public class VariableSizeChunkIndexBuilder extends AbstractChunkIndexBuilder { 22 | private final ArrayList transformedChunks = new ArrayList<>(); 23 | 24 | public VariableSizeChunkIndexBuilder(final int originalChunkSize, 25 | final int originalFileSize) { 26 | super(originalChunkSize, originalFileSize); 27 | } 28 | 29 | @Override 30 | protected void addChunk0(final int transformedChunkSize) { 31 | transformedChunks.add(transformedChunkSize); 32 | } 33 | 34 | @Override 35 | protected ChunkIndex finish0(final int finalTransformedChunkSize) { 36 | transformedChunks.add(finalTransformedChunkSize); 37 | return new VariableSizeChunkIndex( 38 | this.originalChunkSize, 39 | this.originalFileSize, 40 | this.transformedChunks 41 | ); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /storage/gcs/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | archivesBaseName = "storage-gcs" 18 | 19 | dependencies { 20 | implementation project(":storage:core") 21 | 22 | implementation ("com.google.cloud:google-cloud-storage:$gcpSdkVersion") { 23 | exclude group: 'com.google.errorprone', module: 'error_prone_annotations' 24 | exclude group: 'org.checkerframework', module: 'checker-qual' 25 | exclude group: 'com.google.code.findbugs', module: 'jsr305' 26 | exclude group: 'com.google.j2objc', module: 'j2objc-annotations' 27 | exclude group: 'org.codehaus.mojo', module: 'animal-sniffer-annotations' 28 | exclude group: 'com.google.guava', module: 'listenablefuture' 29 | } 30 | implementation "com.fasterxml.jackson.core:jackson-databind:$jacksonVersion" 31 | 32 | implementation project(":commons") 33 | 34 | testImplementation(testFixtures(project(":storage:core"))) 35 | 36 | testImplementation "org.testcontainers:junit-jupiter:$testcontainersVersion" 37 | 38 | testImplementation "io.aiven:testcontainers-fake-gcs-server:$testcontainersFakeGcsServerVersion" 39 | } 40 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/security/DataKeyAndAAD.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.security; 18 | 19 | import javax.crypto.SecretKey; 20 | 21 | import java.util.Arrays; 22 | import java.util.Objects; 23 | 24 | public class DataKeyAndAAD { 25 | public final SecretKey dataKey; 26 | public final byte[] aad; 27 | 28 | public DataKeyAndAAD(final SecretKey dataKey, final byte[] aad) { 29 | this.dataKey = dataKey; 30 | this.aad = aad; 31 | } 32 | 33 | @Override 34 | public boolean equals(final Object o) { 35 | if (this == o) { 36 | return true; 37 | } 38 | if (o == null || getClass() != o.getClass()) { 39 | return false; 40 | } 41 | final DataKeyAndAAD that = (DataKeyAndAAD) o; 42 | return Objects.equals(dataKey, that.dataKey) && Arrays.equals(aad, that.aad); 43 | } 44 | 45 | @Override 46 | public int hashCode() { 47 | int result = Objects.hashCode(dataKey); 48 | result = 31 * result + Arrays.hashCode(aad); 49 | return result; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /storage/s3/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | archivesBaseName = "storage-s3" 18 | 19 | ext { 20 | wireMockVersion = "3.13.1" 21 | } 22 | 23 | dependencies { 24 | implementation project(":storage:core") 25 | 26 | def excludeFromAWSDeps = { ModuleDependency dep -> 27 | dep.exclude group: "org.slf4j" 28 | } 29 | implementation ("software.amazon.awssdk:s3:$awsSdkVersion") {excludeFromAWSDeps(it)} 30 | runtimeOnly ("software.amazon.awssdk:sts:$awsSdkVersion") {excludeFromAWSDeps(it)} 31 | // TODO: Needed for Iceberg. We need to figure out how to manage Iceberg-specific dependencies. 32 | runtimeOnly ("software.amazon.awssdk:kms:$awsSdkVersion") {excludeFromAWSDeps(it)} 33 | 34 | implementation project(':commons') 35 | 36 | testImplementation(testFixtures(project(":storage:core"))) 37 | 38 | testImplementation "org.testcontainers:junit-jupiter:$testcontainersVersion" 39 | testImplementation "org.testcontainers:localstack:$testcontainersVersion" 40 | 41 | integrationTestImplementation("org.wiremock:wiremock:$wireMockVersion") { 42 | exclude group: "org.slf4j" 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /commons/src/test/java/io/aiven/kafka/tieredstorage/config/validators/NullTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.config.validators; 18 | 19 | import org.apache.kafka.common.config.ConfigDef; 20 | import org.apache.kafka.common.config.ConfigException; 21 | 22 | import org.junit.jupiter.api.Test; 23 | 24 | import static org.assertj.core.api.Assertions.assertThatNoException; 25 | import static org.assertj.core.api.Assertions.assertThatThrownBy; 26 | 27 | class NullTest { 28 | @Test 29 | void nullIsValid() { 30 | assertThatNoException().isThrownBy(() -> Null.or(ConfigDef.Range.between(1, 2)).ensureValid("test", null)); 31 | } 32 | 33 | @Test 34 | void nonNullCorrectValueIsValid() { 35 | assertThatNoException().isThrownBy(() -> Null.or(ConfigDef.Range.between(1, 2)).ensureValid("test", 1)); 36 | } 37 | 38 | @Test 39 | void invalidValue() { 40 | assertThatThrownBy(() -> Null.or(ConfigDef.Range.between(1, 2)).ensureValid("test", 5)) 41 | .isInstanceOf(ConfigException.class) 42 | .hasMessage("Invalid value 5 for configuration test: Value must be no more than 2"); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/manifest/SegmentFormat.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2025 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest; 18 | 19 | import java.util.Arrays; 20 | 21 | import com.fasterxml.jackson.annotation.JsonCreator; 22 | import com.fasterxml.jackson.annotation.JsonValue; 23 | 24 | public enum SegmentFormat { 25 | KAFKA, 26 | ICEBERG; 27 | 28 | @JsonValue 29 | public String getValue() { 30 | return name().toLowerCase(); 31 | } 32 | 33 | @JsonCreator 34 | public static SegmentFormat fromValue(final String value) { 35 | final String normalizedValue = value.toLowerCase(); 36 | for (final SegmentFormat format : values()) { 37 | if (format.getValue().equals(normalizedValue)) { 38 | return format; 39 | } 40 | } 41 | throw new IllegalArgumentException("Unknown segment format: " + value); 42 | } 43 | 44 | public static String[] allowedConfigValues() { 45 | return Arrays.stream(SegmentFormat.values()) 46 | .map(SegmentFormat::name) 47 | .map(String::toLowerCase) 48 | .toArray(String[]::new); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/ClosableInputStreamHolder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage; 18 | 19 | import java.io.IOException; 20 | import java.io.InputStream; 21 | import java.util.ArrayList; 22 | import java.util.List; 23 | 24 | import org.slf4j.Logger; 25 | import org.slf4j.LoggerFactory; 26 | 27 | class ClosableInputStreamHolder implements AutoCloseable { 28 | private static final Logger log = LoggerFactory.getLogger(ClosableInputStreamHolder.class); 29 | 30 | private final List streams = new ArrayList<>(); 31 | 32 | InputStream add(final InputStream is) { 33 | this.streams.add(is); 34 | return is; 35 | } 36 | 37 | @Override 38 | public void close() { 39 | for (final InputStream is : streams) { 40 | try { 41 | is.close(); 42 | } catch (final IOException e) { 43 | // We want to close all of them or, in case of an error closing some, as much as possible. 44 | // So no rethrowing, only logging. 45 | log.error("Error closing InputStream", e); 46 | } 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/transform/DecompressionChunkEnumeration.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.transform; 18 | 19 | import java.util.Objects; 20 | 21 | import com.github.luben.zstd.Zstd; 22 | 23 | /** 24 | * The chunk de-transformation that does Zstd decompression. 25 | */ 26 | public class DecompressionChunkEnumeration implements DetransformChunkEnumeration { 27 | private final DetransformChunkEnumeration inner; 28 | 29 | public DecompressionChunkEnumeration(final DetransformChunkEnumeration inner) { 30 | this.inner = Objects.requireNonNull(inner, "inner cannot be null"); 31 | } 32 | 33 | @Override 34 | public boolean hasMoreElements() { 35 | return inner.hasMoreElements(); 36 | } 37 | 38 | @Override 39 | public byte[] nextElement() { 40 | final byte[] chunk = inner.nextElement(); 41 | final long decompressedSize = Zstd.decompressedSize(chunk); 42 | if (decompressedSize < 0) { 43 | throw new RuntimeException("Invalid decompressed size: " + decompressedSize); 44 | } 45 | return Zstd.decompress(chunk, (int) decompressedSize); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /e2e/src/integration-test/java/io/aiven/kafka/tieredstorage/e2e/GcsSingleBrokerSocks5Test.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.e2e; 18 | 19 | import com.google.cloud.storage.BucketInfo; 20 | import org.junit.jupiter.api.BeforeAll; 21 | 22 | public class GcsSingleBrokerSocks5Test extends GcsSingleBrokerTest { 23 | static final String BUCKET = "test-bucket-socks5"; 24 | 25 | @BeforeAll 26 | static void createBucket() { 27 | gcsClient.create(BucketInfo.newBuilder(BUCKET).build()); 28 | } 29 | 30 | @BeforeAll 31 | static void startKafka() throws Exception { 32 | setupKafka(kafka -> rsmPluginBasicSetup(kafka) 33 | .withEnv("KAFKA_RSM_CONFIG_STORAGE_GCS_BUCKET_NAME", BUCKET) 34 | .withEnv("KAFKA_RSM_CONFIG_STORAGE_PROXY_HOST", SOCKS5_NETWORK_ALIAS) 35 | .withEnv("KAFKA_RSM_CONFIG_STORAGE_PROXY_PORT", Integer.toString(SOCKS5_PORT)) 36 | .withEnv("KAFKA_RSM_CONFIG_STORAGE_PROXY_USERNAME", SOCKS5_USER) 37 | .withEnv("KAFKA_RSM_CONFIG_STORAGE_PROXY_PASSWORD", SOCKS5_PASSWORD)); 38 | } 39 | 40 | @Override 41 | protected String bucket() { 42 | return BUCKET; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /.github/workflows/main_push_and_pull_request_workflow.yml: -------------------------------------------------------------------------------- 1 | # The workflow to check main after push. 2 | name: Main checks after push and during pull requests 3 | on: 4 | push: 5 | branches: [ 'main' ] 6 | pull_request: 7 | jobs: 8 | build: 9 | strategy: 10 | matrix: 11 | java-version: [ 17, 21 ] 12 | name: Build on ${{ matrix.runs-on }} with jdk ${{ matrix.java-version }} 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout code 16 | uses: actions/checkout@v4 17 | with: 18 | fetch-depth: 0 19 | - name: Set up JDK ${{ matrix.java-version }} 20 | uses: actions/setup-java@v4 21 | with: 22 | java-version: ${{ matrix.java-version }} 23 | distribution: temurin 24 | 25 | - name: Build with Gradle 26 | run: make build 27 | 28 | - name: Run unit tests 29 | run: make test 30 | 31 | - name: Run integration tests 32 | run: make integration_test 33 | 34 | - name: Upload build reports 35 | uses: actions/upload-artifact@v4 36 | if: always() 37 | with: 38 | name: build-reports-${{ matrix.java-version }} 39 | path: '**/build/reports/**' 40 | e2e_test: 41 | strategy: 42 | matrix: 43 | java-version: [ 17 ] 44 | test: [ 'LocalSystem', 'S3', 'Gcs', 'Azure' ] 45 | name: E2E tests for ${{ matrix.test }} with jdk ${{ matrix.java-version }} 46 | runs-on: ubuntu-latest 47 | steps: 48 | - name: Checkout code 49 | uses: actions/checkout@v4 50 | with: 51 | fetch-depth: 0 52 | 53 | - name: Build Docker image 54 | run: make docker_image 55 | 56 | - name: Run E2E tests 57 | timeout-minutes: 30 58 | run: make E2E_TEST=${{ matrix.test }} e2e_test 59 | 60 | # TODO: publish docker image 61 | -------------------------------------------------------------------------------- /commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Subclass.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.config.validators; 18 | 19 | import org.apache.kafka.common.config.ConfigDef; 20 | import org.apache.kafka.common.config.ConfigException; 21 | 22 | /** 23 | * {@link org.apache.kafka.common.config.ConfigDef.Validator} implementation that verifies 24 | * that a config value is a subclass of a specified class. 25 | */ 26 | public class Subclass implements ConfigDef.Validator { 27 | private final Class parentClass; 28 | 29 | public static Subclass of(final Class parentClass) { 30 | return new Subclass(parentClass); 31 | } 32 | 33 | public Subclass(final Class parentClass) { 34 | this.parentClass = parentClass; 35 | } 36 | 37 | @Override 38 | public void ensureValid(final String name, final Object value) { 39 | if (value != null && !(parentClass.isAssignableFrom((Class) value))) { 40 | throw new ConfigException(name + " should be a subclass of " + parentClass.getCanonicalName()); 41 | } 42 | } 43 | 44 | @Override 45 | public String toString() { 46 | return "Any implementation of " + parentClass.getName(); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /core/src/test/java/io/aiven/kafka/tieredstorage/manifest/SegmentIndexV1Test.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest; 18 | 19 | import org.junit.jupiter.api.Test; 20 | 21 | import static org.assertj.core.api.Assertions.assertThat; 22 | 23 | public class SegmentIndexV1Test { 24 | 25 | @Test 26 | void identical() { 27 | final var i1 = new SegmentIndexV1(100, 1000); 28 | final var i2 = new SegmentIndexV1(100, 1000); 29 | assertThat(i1).isEqualTo(i2); 30 | assertThat(i2).isEqualTo(i1); 31 | assertThat(i1).hasSameHashCodeAs(i2); 32 | } 33 | 34 | @Test 35 | void differentPosition() { 36 | final var i1 = new SegmentIndexV1(100, 1000); 37 | final var i2 = new SegmentIndexV1(101, 1000); 38 | assertThat(i1).isNotEqualTo(i2); 39 | assertThat(i2).isNotEqualTo(i1); 40 | assertThat(i1).doesNotHaveSameHashCodeAs(i2); 41 | } 42 | 43 | @Test 44 | void differentSize() { 45 | final var i1 = new SegmentIndexV1(100, 1000); 46 | final var i2 = new SegmentIndexV1(100, 1001); 47 | assertThat(i1).isNotEqualTo(i2); 48 | assertThat(i2).isNotEqualTo(i1); 49 | assertThat(i1).doesNotHaveSameHashCodeAs(i2); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /storage/azure/src/integration-test/java/io/aiven/kafka/tieredstorage/storage/azure/AzureBlobStorageAccountKeyTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage.azure; 18 | 19 | import java.util.Map; 20 | 21 | import io.aiven.kafka.tieredstorage.storage.StorageBackend; 22 | 23 | import static io.aiven.kafka.tieredstorage.storage.azure.AzuriteBlobStorageUtils.endpoint; 24 | 25 | class AzureBlobStorageAccountKeyTest extends AzureBlobStorageTest { 26 | @Override 27 | protected StorageBackend storage() { 28 | final AzureBlobStorage azureBlobStorage = new AzureBlobStorage(); 29 | // The well-known Azurite account name and key. 30 | final String accountName = "devstoreaccount1"; 31 | final String accountKey = 32 | "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="; 33 | final Map configs = Map.of( 34 | "azure.container.name", azureContainerName, 35 | "azure.account.name", accountName, 36 | "azure.account.key", accountKey, 37 | "azure.endpoint.url", endpoint(AZURITE_SERVER, BLOB_STORAGE_PORT) 38 | ); 39 | azureBlobStorage.configure(configs); 40 | return azureBlobStorage; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Null.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.config.validators; 18 | 19 | import org.apache.kafka.common.config.ConfigDef; 20 | 21 | /** 22 | * {@link org.apache.kafka.common.config.ConfigDef.Validator} implementation that is used to combine another validator 23 | * with allowing null as a valid value. Useful for cases like 24 | *

25 |  * {@code Null.or(ConfigDef.Range.between(1, Long.MAX_VALUE))}
26 |  * 
27 | * where existing validator that does not allow null values con be used but null is still a default value. 28 | */ 29 | public class Null implements ConfigDef.Validator { 30 | 31 | private final ConfigDef.Validator validator; 32 | 33 | public static ConfigDef.Validator or(final ConfigDef.Validator validator) { 34 | return new Null(validator); 35 | } 36 | 37 | private Null(final ConfigDef.Validator validator) { 38 | this.validator = validator; 39 | } 40 | 41 | @Override 42 | public void ensureValid(final String name, final Object value) { 43 | if (value != null) { 44 | validator.ensureValid(name, value); 45 | } 46 | } 47 | 48 | @Override 49 | public String toString() { 50 | return "null or " + validator.toString(); 51 | } 52 | } 53 | 54 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/manifest/serde/DataKeySerializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest.serde; 18 | 19 | import javax.crypto.SecretKey; 20 | 21 | import java.io.IOException; 22 | import java.util.Objects; 23 | import java.util.function.Function; 24 | 25 | import io.aiven.kafka.tieredstorage.security.EncryptedDataKey; 26 | 27 | import com.fasterxml.jackson.core.JsonGenerator; 28 | import com.fasterxml.jackson.databind.SerializerProvider; 29 | import com.fasterxml.jackson.databind.ser.std.StdSerializer; 30 | 31 | class DataKeySerializer extends StdSerializer { 32 | private final Function dataKeyEncryptor; 33 | 34 | DataKeySerializer(final Function dataKeyEncryptor) { 35 | super(SecretKey.class); 36 | this.dataKeyEncryptor = Objects.requireNonNull(dataKeyEncryptor, "dataKeyEncryptor cannot be null"); 37 | } 38 | 39 | @Override 40 | public void serialize(final SecretKey value, 41 | final JsonGenerator gen, 42 | final SerializerProvider provider) throws IOException { 43 | final EncryptedDataKey encryptionResult = dataKeyEncryptor.apply(value.getEncoded()); 44 | gen.writeString(encryptionResult.serialize()); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/config/KeyPairPaths.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.config; 18 | 19 | import java.nio.file.Path; 20 | import java.util.Objects; 21 | 22 | public class KeyPairPaths { 23 | public final Path publicKey; 24 | public final Path privateKey; 25 | 26 | KeyPairPaths(final Path publicKey, final Path privateKey) { 27 | this.publicKey = Objects.requireNonNull(publicKey, "publicKey cannot be null"); 28 | this.privateKey = Objects.requireNonNull(privateKey, "privateKey cannot be null"); 29 | } 30 | 31 | @Override 32 | public boolean equals(final Object o) { 33 | if (this == o) { 34 | return true; 35 | } 36 | if (o == null || getClass() != o.getClass()) { 37 | return false; 38 | } 39 | final KeyPairPaths that = (KeyPairPaths) o; 40 | return Objects.equals(publicKey, that.publicKey) && Objects.equals(privateKey, that.privateKey); 41 | } 42 | 43 | @Override 44 | public int hashCode() { 45 | return Objects.hash(publicKey, privateKey); 46 | } 47 | 48 | @Override 49 | public String toString() { 50 | return "KeyPairPaths(" 51 | + "publicKey=" + publicKey 52 | + ", privateKey=" + privateKey 53 | + ")"; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /storage/azure/src/integration-test/java/io/aiven/kafka/tieredstorage/storage/azure/AzuriteBlobStorageUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage.azure; 18 | 19 | import org.testcontainers.containers.GenericContainer; 20 | import org.testcontainers.utility.DockerImageName; 21 | 22 | public class AzuriteBlobStorageUtils { 23 | static GenericContainer azuriteContainer(final int port) { 24 | return 25 | new GenericContainer<>(DockerImageName.parse("mcr.microsoft.com/azure-storage/azurite")) 26 | .withExposedPorts(port) 27 | .withCommand("azurite-blob --blobHost 0.0.0.0"); 28 | } 29 | 30 | 31 | static String endpoint(final GenericContainer azuriteContainer, final int port) { 32 | return "http://127.0.0.1:" + azuriteContainer.getMappedPort(port) + "/devstoreaccount1"; 33 | } 34 | 35 | static String connectionString(final GenericContainer azuriteContainer, final int port) { 36 | // The well-known Azurite connection string. 37 | return "DefaultEndpointsProtocol=http;" 38 | + "AccountName=devstoreaccount1;" 39 | + "AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;" 40 | + "BlobEndpoint=" + endpoint(azuriteContainer, port) + ";"; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/transform/DetransformFinisher.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.transform; 18 | 19 | import java.io.ByteArrayInputStream; 20 | import java.io.InputStream; 21 | import java.io.SequenceInputStream; 22 | import java.util.Enumeration; 23 | 24 | /** 25 | * The detransformation finisher. 26 | * 27 | *

It converts enumeration of {@code byte[]} into enumeration of {@link InputStream}, 28 | * so that it could be used in {@link SequenceInputStream}. 29 | */ 30 | public class DetransformFinisher implements Enumeration { 31 | private final DetransformChunkEnumeration inner; 32 | 33 | public DetransformFinisher(final DetransformChunkEnumeration inner) { 34 | this.inner = inner; 35 | } 36 | 37 | @Override 38 | public boolean hasMoreElements() { 39 | return inner.hasMoreElements(); 40 | } 41 | 42 | @Override 43 | public InputStream nextElement() { 44 | final var chunk = inner.nextElement(); 45 | return new ByteArrayInputStream(chunk); 46 | } 47 | 48 | public InputStream toInputStream() { 49 | if (inner instanceof BaseDetransformChunkEnumeration) { 50 | return ((BaseDetransformChunkEnumeration) inner).inputStream; 51 | } 52 | 53 | return new SequenceInputStream(this); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/manifest/index/FixedSizeChunkIndexBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest.index; 18 | 19 | public class FixedSizeChunkIndexBuilder extends AbstractChunkIndexBuilder { 20 | private final int transformedChunkSize; 21 | 22 | public FixedSizeChunkIndexBuilder(final int originalChunkSize, 23 | final int originalFileSize, 24 | final int transformedChunkSize) { 25 | super(originalChunkSize, originalFileSize); 26 | 27 | checkSize(transformedChunkSize, "Transformed chunk size"); 28 | this.transformedChunkSize = transformedChunkSize; 29 | } 30 | 31 | @Override 32 | protected void addChunk0(final int transformedChunkSize) { 33 | // Sanity check. 34 | if (transformedChunkSize != this.transformedChunkSize) { 35 | throw new IllegalArgumentException("Non-final chunk must be of size " + this.transformedChunkSize 36 | + ", but " + transformedChunkSize + " given"); 37 | } 38 | } 39 | 40 | @Override 41 | protected ChunkIndex finish0(final int finalTransformedChunkSize) { 42 | return new FixedSizeChunkIndex( 43 | originalChunkSize, originalFileSize, transformedChunkSize, finalTransformedChunkSize); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/manifest/index/ChunkIndex.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest.index; 18 | 19 | import java.util.List; 20 | 21 | import io.aiven.kafka.tieredstorage.Chunk; 22 | import io.aiven.kafka.tieredstorage.storage.BytesRange; 23 | 24 | import com.fasterxml.jackson.annotation.JsonSubTypes; 25 | import com.fasterxml.jackson.annotation.JsonTypeInfo; 26 | 27 | /** 28 | * An index that maps chunks in original and transformed files. 29 | * 30 | *

A transformed file is a file that has been through 31 | * some transformations like encryption and compression, chunk by chunk. 32 | * 33 | *

The original file is supposed to be split into chunks 34 | * of constant size (apart from the final one). 35 | */ 36 | @JsonTypeInfo( 37 | use = JsonTypeInfo.Id.NAME, 38 | property = "type") 39 | @JsonSubTypes({ 40 | @JsonSubTypes.Type(value = FixedSizeChunkIndex.class, name = "fixed"), 41 | @JsonSubTypes.Type(value = VariableSizeChunkIndex.class, name = "variable"), 42 | }) 43 | public interface ChunkIndex { 44 | /** 45 | * For a given offset in the original file, finds the corresponding chunk. 46 | */ 47 | Chunk findChunkForOriginalOffset(int offset); 48 | 49 | /** 50 | * Returns all chunks in the index. 51 | */ 52 | List chunks(); 53 | 54 | List chunksForRange(BytesRange bytesRange); 55 | } 56 | -------------------------------------------------------------------------------- /e2e/src/integration-test/java/io/aiven/kafka/tieredstorage/e2e/AzureSingleBrokerSocks5Test.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.e2e; 18 | 19 | import com.azure.storage.blob.BlobContainerClient; 20 | import org.junit.jupiter.api.BeforeAll; 21 | 22 | /** 23 | * The Azure Blob Storage test variant that goes to Azurite through the SOCKS5 proxy. 24 | */ 25 | class AzureSingleBrokerSocks5Test extends AzureSingleBrokerTest { 26 | private static final String AZURE_CONTAINER = "test-container-socks5"; 27 | 28 | @BeforeAll 29 | static void createAzureContainer() { 30 | blobServiceClient.createBlobContainer(AZURE_CONTAINER); 31 | } 32 | 33 | @BeforeAll 34 | static void startKafka() throws Exception { 35 | setupKafka(kafka -> rsmPluginBasicSetup(kafka) 36 | .withEnv("KAFKA_RSM_CONFIG_STORAGE_AZURE_CONTAINER_NAME", AZURE_CONTAINER) 37 | .withEnv("KAFKA_RSM_CONFIG_STORAGE_PROXY_HOST", SOCKS5_NETWORK_ALIAS) 38 | .withEnv("KAFKA_RSM_CONFIG_STORAGE_PROXY_PORT", Integer.toString(SOCKS5_PORT)) 39 | .withEnv("KAFKA_RSM_CONFIG_STORAGE_PROXY_USERNAME", SOCKS5_USER) 40 | .withEnv("KAFKA_RSM_CONFIG_STORAGE_PROXY_PASSWORD", SOCKS5_PASSWORD) 41 | ); 42 | } 43 | 44 | @Override 45 | BlobContainerClient blobContainerClient() { 46 | return blobServiceClient.getBlobContainerClient(AZURE_CONTAINER); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /benchmarks/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // JMH execution borrowed from https://github.com/apache/kafka/blob/trunk/jmh-benchmarks 18 | 19 | plugins { 20 | id 'com.github.johnrengelman.shadow' version '8.1.1' 21 | } 22 | 23 | shadowJar { 24 | archiveBaseName = 'kafka-ts-benchmarks' 25 | } 26 | 27 | ext { 28 | jmhVersion = "1.37" 29 | } 30 | 31 | dependencies { 32 | implementation project(':core') 33 | implementation group: "org.apache.kafka", name: "kafka-storage-api", version: kafkaVersion 34 | implementation group: "org.apache.kafka", name: "kafka-clients", version: kafkaVersion 35 | 36 | implementation "org.openjdk.jmh:jmh-core:$jmhVersion" 37 | implementation "org.openjdk.jmh:jmh-core-benchmarks:$jmhVersion" 38 | annotationProcessor "org.openjdk.jmh:jmh-generator-annprocess:$jmhVersion" 39 | 40 | implementation "org.slf4j:slf4j-log4j12:1.7.36" 41 | } 42 | 43 | jar { 44 | manifest { 45 | attributes "Main-Class": "org.openjdk.jmh.Main" 46 | } 47 | } 48 | 49 | tasks.register('jmh', JavaExec) { 50 | dependsOn ':benchmarks:clean' 51 | dependsOn ':benchmarks:shadowJar' 52 | 53 | mainClass = "-jar" 54 | 55 | doFirst { 56 | if (System.getProperty("jmhArgs")) { 57 | args System.getProperty("jmhArgs").split(' ') 58 | } 59 | args = [shadowJar.getArchiveFile(), *args] 60 | } 61 | } 62 | 63 | javadoc { 64 | enabled = false 65 | } 66 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/fetch/index/SegmentIndexKey.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.fetch.index; 18 | 19 | import java.util.Objects; 20 | 21 | import org.apache.kafka.server.log.remote.storage.RemoteStorageManager; 22 | 23 | import io.aiven.kafka.tieredstorage.storage.ObjectKey; 24 | 25 | public class SegmentIndexKey { 26 | public final ObjectKey indexesKey; 27 | public final RemoteStorageManager.IndexType indexType; 28 | 29 | public SegmentIndexKey(final ObjectKey indexesKey, final RemoteStorageManager.IndexType indexType) { 30 | this.indexesKey = indexesKey; 31 | this.indexType = indexType; 32 | } 33 | 34 | @Override 35 | public String toString() { 36 | return "SegmentIndexKey{" 37 | + "indexesKey=" + indexesKey 38 | + ", indexType=" + indexType 39 | + '}'; 40 | } 41 | 42 | @Override 43 | public boolean equals(final Object o) { 44 | if (this == o) { 45 | return true; 46 | } 47 | if (o == null || getClass() != o.getClass()) { 48 | return false; 49 | } 50 | final SegmentIndexKey that = (SegmentIndexKey) o; 51 | return Objects.equals(indexesKey, that.indexesKey) && indexType == that.indexType; 52 | } 53 | 54 | @Override 55 | public int hashCode() { 56 | return Objects.hash(indexesKey, indexType); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/ValidUrl.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.config.validators; 18 | 19 | import java.net.MalformedURLException; 20 | import java.net.URL; 21 | import java.util.List; 22 | 23 | import org.apache.kafka.common.config.ConfigDef; 24 | import org.apache.kafka.common.config.ConfigException; 25 | 26 | /** 27 | * {@link org.apache.kafka.common.config.ConfigDef.Validator} implementation that verifies that a config value is 28 | * a valid URL with http or https schemes. 29 | */ 30 | public class ValidUrl implements ConfigDef.Validator { 31 | private static final List SUPPORTED_SCHEMAS = List.of("http", "https"); 32 | 33 | @Override 34 | public void ensureValid(final String name, final Object value) { 35 | if (value != null) { 36 | try { 37 | final var url = new URL((String) value); 38 | if (!SUPPORTED_SCHEMAS.contains(url.getProtocol())) { 39 | throw new ConfigException(name, value, "URL must have scheme from the list " + SUPPORTED_SCHEMAS); 40 | } 41 | } catch (final MalformedURLException e) { 42 | throw new ConfigException(name, value, "Must be a valid URL"); 43 | } 44 | } 45 | } 46 | 47 | @Override 48 | public String toString() { 49 | return "Valid URL as defined in rfc2396"; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /docs/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | plugins { 17 | id 'java-library' 18 | } 19 | 20 | dependencies { 21 | implementation "org.apache.kafka:kafka-clients:$kafkaVersion" 22 | implementation project(":core") 23 | implementation project(":storage:s3") 24 | implementation project(":storage:gcs") 25 | implementation project(":storage:azure") 26 | implementation project(":storage:filesystem") 27 | } 28 | 29 | tasks.register('genConfigsDocs', JavaExec) { 30 | classpath = sourceSets.main.runtimeClasspath 31 | mainClass = 'io.aiven.kafka.tieredstorage.misc.ConfigsDocs' 32 | 33 | // Define the outputs formally 34 | outputs.file("$projectDir/configs.rst") 35 | 36 | // Set up the output in the execution phase, not configuration, and avoid removing on clean 37 | doFirst { 38 | standardOutput = new File("$projectDir/configs.rst").newOutputStream() 39 | } 40 | } 41 | 42 | tasks.register('genMetricsDocs', JavaExec) { 43 | classpath = sourceSets.main.runtimeClasspath 44 | mainClass = 'io.aiven.kafka.tieredstorage.misc.MetricsDocs' 45 | 46 | // Define the outputs formally 47 | outputs.file("$projectDir/metrics.rst") 48 | 49 | // Set up the output in the execution phase, not configuration, and avoid removing on clean 50 | doFirst { 51 | standardOutput = new File("$projectDir/metrics.rst").newOutputStream() 52 | } 53 | } 54 | 55 | tasks.named('compileJava') { 56 | finalizedBy 'genConfigsDocs', 'genMetricsDocs' 57 | } -------------------------------------------------------------------------------- /storage/filesystem/src/main/java/io/aiven/kafka/tieredstorage/storage/filesystem/FileSystemStorageConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage.filesystem; 18 | 19 | import java.nio.file.Path; 20 | import java.util.Map; 21 | 22 | import org.apache.kafka.common.config.AbstractConfig; 23 | import org.apache.kafka.common.config.ConfigDef; 24 | 25 | public class FileSystemStorageConfig extends AbstractConfig { 26 | 27 | private static final String ROOT_CONFIG = "root"; 28 | private static final String ROOT_DOC = "Root directory"; 29 | 30 | private static final String OVERWRITE_ENABLED_CONFIG = "overwrite.enabled"; 31 | private static final String OVERWRITE_ENABLED_DOC = "Enable overwriting existing files"; 32 | 33 | public static final ConfigDef configDef() { 34 | return new ConfigDef() 35 | .define( 36 | ROOT_CONFIG, 37 | ConfigDef.Type.STRING, 38 | ConfigDef.NO_DEFAULT_VALUE, 39 | ConfigDef.Importance.HIGH, 40 | ROOT_DOC 41 | ) 42 | .define( 43 | OVERWRITE_ENABLED_CONFIG, 44 | ConfigDef.Type.BOOLEAN, 45 | false, 46 | ConfigDef.Importance.MEDIUM, 47 | OVERWRITE_ENABLED_DOC 48 | ); 49 | } 50 | 51 | FileSystemStorageConfig(final Map props) { 52 | super(configDef(), props); 53 | } 54 | 55 | final Path root() { 56 | return Path.of(getString(ROOT_CONFIG)); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/config/ChunkCacheConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.config; 18 | 19 | import java.util.Map; 20 | 21 | import org.apache.kafka.common.config.ConfigDef; 22 | 23 | public class ChunkCacheConfig extends CacheConfig { 24 | private static final String CACHE_PREFETCH_MAX_SIZE_CONFIG = "prefetch.max.size"; 25 | private static final String CACHE_PREFETCH_MAX_SIZE_DOC = 26 | "The amount of data that should be eagerly prefetched and cached"; 27 | private static final int CACHE_PREFETCHING_SIZE_DEFAULT = 0; //TODO find out what it should be 28 | 29 | public static final ConfigDef configDef(final ConfigDef baseConfig) { 30 | baseConfig.define( 31 | CACHE_PREFETCH_MAX_SIZE_CONFIG, 32 | ConfigDef.Type.INT, 33 | CACHE_PREFETCHING_SIZE_DEFAULT, 34 | ConfigDef.Range.between(0, Integer.MAX_VALUE), 35 | ConfigDef.Importance.MEDIUM, 36 | CACHE_PREFETCH_MAX_SIZE_DOC 37 | ); 38 | return CacheConfig.defBuilder(baseConfig) 39 | .withDefaultRetentionMs(ChunkCacheConfig.CACHE_RETENTION_MS_DEFAULT) 40 | .build(); 41 | } 42 | 43 | public ChunkCacheConfig(final ConfigDef configDef, final Map props) { 44 | super(configDef, props); 45 | } 46 | 47 | public ChunkCacheConfig(final Map props) { 48 | super(configDef(new ConfigDef()), props); 49 | } 50 | 51 | public int cachePrefetchingSize() { 52 | return getInt(CACHE_PREFETCH_MAX_SIZE_CONFIG); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /core/src/test/java/io/aiven/kafka/tieredstorage/ClosableInputStreamHolderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage; 18 | 19 | import java.io.IOException; 20 | import java.io.InputStream; 21 | 22 | import org.junit.jupiter.api.Test; 23 | 24 | import static org.mockito.Mockito.doThrow; 25 | import static org.mockito.Mockito.mock; 26 | import static org.mockito.Mockito.verify; 27 | 28 | class ClosableInputStreamHolderTest { 29 | @Test 30 | void closeAll() throws IOException { 31 | final InputStream is1 = mock(InputStream.class); 32 | final InputStream is2 = mock(InputStream.class); 33 | final InputStream is3 = mock(InputStream.class); 34 | try (final var holder = new ClosableInputStreamHolder()) { 35 | holder.add(is1); 36 | holder.add(is2); 37 | holder.add(is3); 38 | } 39 | verify(is1).close(); 40 | verify(is2).close(); 41 | verify(is3).close(); 42 | } 43 | 44 | @Test 45 | void closeAllEvenWithErrors() throws IOException { 46 | final InputStream is1 = mock(InputStream.class); 47 | final InputStream is2 = mock(InputStream.class); 48 | doThrow(new IOException("test")).when(is2).close(); 49 | final InputStream is3 = mock(InputStream.class); 50 | 51 | try (final var holder = new ClosableInputStreamHolder()) { 52 | holder.add(is1); 53 | holder.add(is2); 54 | holder.add(is3); 55 | } 56 | verify(is1).close(); 57 | verify(is2).close(); 58 | verify(is3).close(); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/manifest/index/serde/TransformedChunksDeserializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest.index.serde; 18 | 19 | import java.io.IOException; 20 | import java.util.Base64; 21 | import java.util.List; 22 | 23 | import com.fasterxml.jackson.core.JsonParser; 24 | import com.fasterxml.jackson.databind.DeserializationContext; 25 | import com.fasterxml.jackson.databind.JsonDeserializer; 26 | import com.github.luben.zstd.Zstd; 27 | 28 | public class TransformedChunksDeserializer extends JsonDeserializer> { 29 | /** 30 | * Max size of an encoded transformed chunks we want to serialize. 31 | * This is mostly for sanity check, it should never be this big. 32 | */ 33 | static final int MAX_DECOMPRESSED_SIZE = 10 * 1024 * 1024; 34 | 35 | @Override 36 | public List deserialize(final JsonParser p, 37 | final DeserializationContext ctxt) throws IOException { 38 | final String base64String = p.getValueAsString(); 39 | final byte[] compressed = Base64.getDecoder().decode(base64String); 40 | 41 | final long decompressedSize = Zstd.decompressedSize(compressed); 42 | if (decompressedSize < 0 || decompressedSize > MAX_DECOMPRESSED_SIZE) { 43 | throw new IOException("Invalid decompressed size: " + decompressedSize); 44 | } 45 | final byte[] binEncoded = new byte[(int) decompressedSize]; 46 | Zstd.decompress(binEncoded, compressed); 47 | 48 | return ChunkSizesBinaryCodec.decode(binEncoded); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /commons/src/test/java/io/aiven/kafka/tieredstorage/config/validators/NonEmptyPasswordTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.config.validators; 18 | 19 | import org.apache.kafka.common.config.ConfigDef; 20 | import org.apache.kafka.common.config.ConfigException; 21 | import org.apache.kafka.common.config.types.Password; 22 | 23 | import org.junit.jupiter.api.Test; 24 | 25 | import static org.assertj.core.api.Assertions.assertThatNoException; 26 | import static org.assertj.core.api.Assertions.assertThatThrownBy; 27 | 28 | class NonEmptyPasswordTest { 29 | 30 | private final ConfigDef.Validator nonEmptyPasswordValidator = new NonEmptyPassword(); 31 | 32 | @Test 33 | void emptyPassword() { 34 | assertThatThrownBy(() -> nonEmptyPasswordValidator.ensureValid("password", new Password(" "))) 35 | .isInstanceOf(ConfigException.class) 36 | .hasMessage("password value must not be empty"); 37 | } 38 | 39 | @Test 40 | void nullPassword() { 41 | assertThatNoException().isThrownBy(() -> nonEmptyPasswordValidator.ensureValid("password", null)); 42 | } 43 | 44 | @Test 45 | void validPassword() { 46 | assertThatNoException() 47 | .isThrownBy(() -> nonEmptyPasswordValidator.ensureValid("password", new Password("pass"))); 48 | } 49 | 50 | @Test 51 | void nullPasswordValue() { 52 | assertThatThrownBy(() -> nonEmptyPasswordValidator.ensureValid("password", new Password(null))) 53 | .isInstanceOf(ConfigException.class) 54 | .hasMessage("password value must not be empty"); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/SegmentCompressionChecker.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage; 18 | 19 | import java.io.File; 20 | import java.io.IOException; 21 | 22 | import org.apache.kafka.common.KafkaException; 23 | import org.apache.kafka.common.record.CompressionType; 24 | import org.apache.kafka.common.record.FileRecords; 25 | import org.apache.kafka.common.record.RecordBatch; 26 | 27 | /** 28 | * Checks if segment are compressed or not. 29 | * To be used when segment files are received on archival. 30 | */ 31 | public class SegmentCompressionChecker { 32 | 33 | /** 34 | * @param file Kafka log segment file 35 | * @return true if log segment is compressed, otherwise returns false 36 | */ 37 | public static boolean check(final File file) throws InvalidRecordBatchException { 38 | try (final FileRecords records = FileRecords.open(file, false, true, 0, false)) { 39 | final RecordBatch batch = fistRecordBatch(records); 40 | return batch.compressionType() != CompressionType.NONE; 41 | } catch (final IOException | KafkaException e) { 42 | throw new InvalidRecordBatchException("Failed to read and validate first batch", e); 43 | } 44 | } 45 | 46 | private static RecordBatch fistRecordBatch(final FileRecords records) throws InvalidRecordBatchException { 47 | final RecordBatch batch = records.firstBatch(); 48 | if (batch == null) { 49 | throw new InvalidRecordBatchException("Record batch is null"); 50 | } 51 | batch.ensureValid(); 52 | return batch; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/manifest/serde/DataKeyDeserializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest.serde; 18 | 19 | import javax.crypto.SecretKey; 20 | import javax.crypto.spec.SecretKeySpec; 21 | 22 | import java.io.IOException; 23 | import java.util.Objects; 24 | import java.util.function.Function; 25 | 26 | import io.aiven.kafka.tieredstorage.security.EncryptedDataKey; 27 | 28 | import com.fasterxml.jackson.core.JsonParseException; 29 | import com.fasterxml.jackson.core.JsonParser; 30 | import com.fasterxml.jackson.databind.DeserializationContext; 31 | import com.fasterxml.jackson.databind.deser.std.StdDeserializer; 32 | 33 | class DataKeyDeserializer extends StdDeserializer { 34 | private final Function keyDecryptor; 35 | 36 | DataKeyDeserializer(final Function keyDecryptor) { 37 | super(SecretKey.class); 38 | this.keyDecryptor = Objects.requireNonNull(keyDecryptor, "keyDecryptor cannot be null"); 39 | } 40 | 41 | @Override 42 | public SecretKey deserialize(final JsonParser p, final DeserializationContext ctxt) throws IOException { 43 | final EncryptedDataKey encryptedDataKey; 44 | try { 45 | encryptedDataKey = EncryptedDataKey.parse(p.getText()); 46 | } catch (final IllegalArgumentException e) { 47 | throw new JsonParseException(p, "Error parsing encrypted data key string", e); 48 | } 49 | final var decryptedKey = keyDecryptor.apply(encryptedDataKey); 50 | return new SecretKeySpec(decryptedKey, "AES"); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /core/src/test/java/io/aiven/kafka/tieredstorage/fetch/ChunkKeyTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.fetch; 18 | 19 | import org.apache.kafka.common.Uuid; 20 | 21 | import org.junit.jupiter.api.Test; 22 | 23 | import static org.assertj.core.api.Assertions.assertThat; 24 | 25 | class ChunkKeyTest { 26 | static final String UUID_1 = "topic/" + Uuid.randomUuid(); 27 | static final String UUID_2 = "topic/" + Uuid.randomUuid(); 28 | 29 | @Test 30 | void identical() { 31 | final var ck1 = new ChunkKey(UUID_1, 0); 32 | final var ck2 = new ChunkKey(UUID_1, 0); 33 | assertThat(ck1).isEqualTo(ck2); 34 | assertThat(ck2).isEqualTo(ck1); 35 | assertThat(ck1).hasSameHashCodeAs(ck2); 36 | } 37 | 38 | @Test 39 | void differentUuid() { 40 | final var ck1 = new ChunkKey(UUID_1, 0); 41 | final var ck2 = new ChunkKey(UUID_2, 0); 42 | assertThat(ck1).isNotEqualTo(ck2); 43 | assertThat(ck2).isNotEqualTo(ck1); 44 | assertThat(ck1).doesNotHaveSameHashCodeAs(ck2); 45 | } 46 | 47 | @Test 48 | void differentChunkIds() { 49 | final var ck1 = new ChunkKey(UUID_1, 0); 50 | final var ck2 = new ChunkKey(UUID_1, 1); 51 | assertThat(ck1).isNotEqualTo(ck2); 52 | assertThat(ck2).isNotEqualTo(ck1); 53 | assertThat(ck1).doesNotHaveSameHashCodeAs(ck2); 54 | } 55 | 56 | @Test 57 | void singlePath() { 58 | assertThat(new ChunkKey("test", 0).path()).isEqualTo("test-0"); 59 | } 60 | 61 | @Test 62 | void pathWitDir() { 63 | assertThat(new ChunkKey("parent/test", 0).path()).isEqualTo("test-0"); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /e2e/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | dependencies { 18 | integrationTestImplementation "org.apache.kafka:kafka-clients:$kafkaVersion" 19 | integrationTestImplementation "org.apache.kafka:kafka-server-common:$kafkaVersion" 20 | integrationTestImplementation "org.apache.kafka:kafka-storage:$kafkaVersion" 21 | integrationTestImplementation "org.apache.kafka:kafka-storage-api:$kafkaVersion" 22 | 23 | integrationTestImplementation "commons-io:commons-io:$apacheCommonsIOVersion" 24 | integrationTestImplementation("software.amazon.awssdk:s3:$awsSdkVersion") { 25 | exclude group: "org.slf4j" 26 | } 27 | integrationTestImplementation ("com.google.cloud:google-cloud-storage:$gcpSdkVersion") { 28 | exclude group: "org.slf4j" 29 | } 30 | 31 | integrationTestImplementation "org.testcontainers:junit-jupiter:$testcontainersVersion" 32 | integrationTestImplementation "org.testcontainers:kafka:$testcontainersVersion" 33 | 34 | integrationTestImplementation "io.aiven:testcontainers-fake-gcs-server:$testcontainersFakeGcsServerVersion" 35 | 36 | integrationTestImplementation platform("com.azure:azure-sdk-bom:$azureSdkVersion") 37 | integrationTestImplementation "com.azure:azure-storage-blob" 38 | 39 | integrationTestRuntimeOnly "org.slf4j:slf4j-log4j12:$slf4jVersion" 40 | } 41 | 42 | tasks.named('test') { 43 | // Use junit platform for unit tests. 44 | useJUnitPlatform() 45 | testLogging { 46 | events 'passed', 'skipped', 'failed' 47 | showStandardStreams = true 48 | showExceptions = true 49 | showStackTraces = true 50 | showCauses = true 51 | exceptionFormat "full" 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /commons/src/test/java/io/aiven/kafka/tieredstorage/config/validators/ValidUrlTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.config.validators; 18 | 19 | import org.apache.kafka.common.config.ConfigDef; 20 | import org.apache.kafka.common.config.ConfigException; 21 | 22 | import org.junit.jupiter.api.Test; 23 | import org.junit.jupiter.params.ParameterizedTest; 24 | import org.junit.jupiter.params.provider.ValueSource; 25 | 26 | import static org.assertj.core.api.Assertions.assertThatNoException; 27 | import static org.assertj.core.api.Assertions.assertThatThrownBy; 28 | 29 | class ValidUrlTest { 30 | 31 | private final ConfigDef.Validator urlValidator = new ValidUrl(); 32 | 33 | @Test 34 | void invalidScheme() { 35 | assertThatThrownBy(() -> urlValidator.ensureValid("test", "ftp://host")).isInstanceOf(ConfigException.class) 36 | .hasMessage("Invalid value ftp://host for configuration test: " 37 | + "URL must have scheme from the list [http, https]"); 38 | } 39 | 40 | @Test 41 | void invalidHost() { 42 | assertThatThrownBy(() -> urlValidator.ensureValid("test", "host")).isInstanceOf(ConfigException.class) 43 | .hasMessage("Invalid value host for configuration test: Must be a valid URL"); 44 | } 45 | 46 | @Test 47 | void nullIsValid() { 48 | assertThatNoException().isThrownBy(() -> urlValidator.ensureValid("test", null)); 49 | } 50 | 51 | @ParameterizedTest 52 | @ValueSource(strings = {"http", "https"}) 53 | void validSchemes(final String scheme) { 54 | assertThatNoException().isThrownBy(() -> urlValidator.ensureValid("test", scheme + "://host")); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/fetch/ChunkKey.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.fetch; 18 | 19 | import java.nio.file.Path; 20 | import java.util.Objects; 21 | 22 | public class ChunkKey { 23 | public final String segmentFileName; 24 | public final int chunkId; 25 | 26 | public ChunkKey(final String objectKeyPath, final int chunkId) { 27 | Objects.requireNonNull(objectKeyPath, "objectKeyPath cannot be null"); 28 | // get last part of segment path + chunk id, as it's used for creating file names 29 | this.segmentFileName = Path.of(objectKeyPath).getFileName().toString(); 30 | this.chunkId = chunkId; 31 | } 32 | 33 | @Override 34 | public boolean equals(final Object o) { 35 | if (this == o) { 36 | return true; 37 | } 38 | if (o == null || getClass() != o.getClass()) { 39 | return false; 40 | } 41 | 42 | final ChunkKey chunkKey = (ChunkKey) o; 43 | 44 | if (chunkId != chunkKey.chunkId) { 45 | return false; 46 | } 47 | return Objects.equals(segmentFileName, chunkKey.segmentFileName); 48 | } 49 | 50 | @Override 51 | public int hashCode() { 52 | int result = segmentFileName.hashCode(); 53 | result = 31 * result + chunkId; 54 | return result; 55 | } 56 | 57 | @Override 58 | public String toString() { 59 | return "ChunkKey(" 60 | + "segmentFileName=" + segmentFileName 61 | + ", chunkId=" + chunkId 62 | + ")"; 63 | } 64 | 65 | public String path() { 66 | return segmentFileName + "-" + chunkId; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /storage/azure/src/test/java/io/aiven/kafka/tieredstorage/storage/azure/MetricCollectorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage.azure; 18 | 19 | import java.util.Map; 20 | 21 | import org.junit.jupiter.api.Test; 22 | import org.junit.jupiter.params.ParameterizedTest; 23 | import org.junit.jupiter.params.provider.ValueSource; 24 | 25 | import static org.assertj.core.api.Assertions.assertThat; 26 | 27 | class MetricCollectorTest { 28 | 29 | @Test 30 | void pathInDevWithAccountName() { 31 | final var props = Map.of("azure.account.name", "test-account", 32 | "azure.container.name", "cont1"); 33 | final var metrics = new MetricCollector(new AzureBlobStorageConfig(props)); 34 | final var matcher = metrics.pathPattern().matcher("/test-account/cont1/test-object"); 35 | assertThat(matcher).matches(); 36 | } 37 | 38 | @Test 39 | void pathInProdWithoutAccountName() { 40 | final var props = Map.of("azure.account.name", "test-account", 41 | "azure.container.name", "cont1"); 42 | final var metrics = new MetricCollector(new AzureBlobStorageConfig(props)); 43 | final var matcher = metrics.pathPattern().matcher("/cont1/test-object"); 44 | assertThat(matcher).matches(); 45 | } 46 | 47 | @ParameterizedTest 48 | @ValueSource(strings = {"comp=test", "comp=test&post=val", "pre=val&comp=test", "pre=val&comp=test&post=val"}) 49 | void uploadQueryWithComp(final String query) { 50 | final var matcher = MetricCollector.MetricsPolicy.UPLOAD_QUERY_PATTERN.matcher(query); 51 | assertThat(matcher.find()).isTrue(); 52 | assertThat(matcher.group("comp")).isEqualTo("test"); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /storage/azure/src/integration-test/java/io/aiven/kafka/tieredstorage/storage/azure/AzureBlobStorageSasTokenTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage.azure; 18 | 19 | import java.time.OffsetDateTime; 20 | import java.util.Map; 21 | 22 | import io.aiven.kafka.tieredstorage.storage.StorageBackend; 23 | 24 | import com.azure.storage.blob.sas.BlobContainerSasPermission; 25 | import com.azure.storage.blob.sas.BlobServiceSasSignatureValues; 26 | 27 | import static io.aiven.kafka.tieredstorage.storage.azure.AzuriteBlobStorageUtils.endpoint; 28 | 29 | class AzureBlobStorageSasTokenTest extends AzureBlobStorageTest { 30 | @Override 31 | protected StorageBackend storage() { 32 | final var permissions = new BlobContainerSasPermission() 33 | .setCreatePermission(true) 34 | .setDeletePermission(true) 35 | .setReadPermission(true) 36 | .setWritePermission(true); 37 | final var sasSignatureValues = 38 | new BlobServiceSasSignatureValues(OffsetDateTime.now().plusDays(1), permissions) 39 | .setStartTime(OffsetDateTime.now().minusMinutes(5)); 40 | final String sasToken = blobServiceClient.getBlobContainerClient(azureContainerName) 41 | .generateSas(sasSignatureValues); 42 | 43 | final AzureBlobStorage azureBlobStorage = new AzureBlobStorage(); 44 | final Map configs = Map.of( 45 | "azure.container.name", azureContainerName, 46 | "azure.sas.token", sasToken, 47 | "azure.endpoint.url", endpoint(AZURITE_SERVER, BLOB_STORAGE_PORT) 48 | ); 49 | azureBlobStorage.configure(configs); 50 | return azureBlobStorage; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/fetch/ChunkManagerFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.fetch; 18 | 19 | import java.util.Map; 20 | 21 | import org.apache.kafka.common.Configurable; 22 | 23 | import io.aiven.kafka.tieredstorage.config.ChunkManagerFactoryConfig; 24 | import io.aiven.kafka.tieredstorage.fetch.cache.ChunkCache; 25 | import io.aiven.kafka.tieredstorage.security.AesEncryptionProvider; 26 | import io.aiven.kafka.tieredstorage.storage.ObjectFetcher; 27 | 28 | public class ChunkManagerFactory implements Configurable { 29 | private ChunkManagerFactoryConfig config; 30 | 31 | @Override 32 | public void configure(final Map configs) { 33 | this.config = new ChunkManagerFactoryConfig(configs); 34 | } 35 | 36 | public ChunkManager initChunkManager(final ObjectFetcher fileFetcher, 37 | final AesEncryptionProvider aesEncryptionProvider) { 38 | final DefaultChunkManager defaultChunkManager = new DefaultChunkManager(fileFetcher, aesEncryptionProvider); 39 | if (config.cacheClass() != null) { 40 | try { 41 | final ChunkCache chunkCache = config 42 | .cacheClass() 43 | .getDeclaredConstructor(ChunkManager.class) 44 | .newInstance(defaultChunkManager); 45 | chunkCache.configure(config.originalsWithPrefix(ChunkManagerFactoryConfig.FETCH_CHUNK_CACHE_PREFIX)); 46 | return chunkCache; 47 | } catch (final ReflectiveOperationException e) { 48 | throw new RuntimeException(e); 49 | } 50 | } else { 51 | return defaultChunkManager; 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /core/src/test/java/io/aiven/kafka/tieredstorage/config/ChunkManagerFactoryConfigTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.config; 18 | 19 | import java.util.Map; 20 | 21 | import org.apache.kafka.common.config.ConfigException; 22 | 23 | import io.aiven.kafka.tieredstorage.fetch.cache.ChunkCache; 24 | 25 | import org.junit.jupiter.api.Test; 26 | import org.junit.jupiter.params.ParameterizedTest; 27 | import org.junit.jupiter.params.provider.ValueSource; 28 | 29 | import static org.assertj.core.api.Assertions.assertThat; 30 | import static org.assertj.core.api.Assertions.assertThatThrownBy; 31 | 32 | class ChunkManagerFactoryConfigTest { 33 | 34 | @Test 35 | void invalidCacheClass() { 36 | assertThatThrownBy(() -> new ChunkManagerFactoryConfig(Map.of("fetch.chunk.cache.class", "java.lang.Object"))) 37 | .isInstanceOf(ConfigException.class) 38 | .hasMessage("fetch.chunk.cache.class should be a subclass of " + ChunkCache.class.getCanonicalName()); 39 | } 40 | 41 | @ParameterizedTest 42 | @ValueSource(strings = { 43 | "io.aiven.kafka.tieredstorage.fetch.cache.MemoryChunkCache", 44 | "io.aiven.kafka.tieredstorage.fetch.cache.DiskChunkCache" 45 | }) 46 | void validCacheClass(final String cacheClass) { 47 | final ChunkManagerFactoryConfig config = new ChunkManagerFactoryConfig( 48 | Map.of("fetch.chunk.cache.class", cacheClass) 49 | ); 50 | assertThat(config.cacheClass().getCanonicalName()).isEqualTo(cacheClass); 51 | } 52 | 53 | @Test 54 | void defaultConfig() { 55 | final ChunkManagerFactoryConfig config = new ChunkManagerFactoryConfig(Map.of()); 56 | assertThat(config.cacheClass()).isNull(); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/config/ChunkManagerFactoryConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.config; 18 | 19 | import java.util.Map; 20 | 21 | import org.apache.kafka.common.config.AbstractConfig; 22 | import org.apache.kafka.common.config.ConfigDef; 23 | 24 | import io.aiven.kafka.tieredstorage.config.validators.Subclass; 25 | import io.aiven.kafka.tieredstorage.fetch.cache.ChunkCache; 26 | import io.aiven.kafka.tieredstorage.fetch.cache.DiskChunkCache; 27 | import io.aiven.kafka.tieredstorage.fetch.cache.MemoryChunkCache; 28 | 29 | public class ChunkManagerFactoryConfig extends AbstractConfig { 30 | 31 | public static final String FETCH_CHUNK_CACHE_PREFIX = "fetch.chunk.cache."; 32 | public static final String FETCH_CHUNK_CACHE_CONFIG = FETCH_CHUNK_CACHE_PREFIX + "class"; 33 | private static final String FETCH_CHUNK_CACHE_DOC = "Chunk cache implementation. There are 2 implementations " 34 | + "included: " + MemoryChunkCache.class.getName() + " and " + DiskChunkCache.class.getName(); 35 | 36 | public static ConfigDef configDef() { 37 | return new ConfigDef() 38 | .define( 39 | FETCH_CHUNK_CACHE_CONFIG, 40 | ConfigDef.Type.CLASS, 41 | null, 42 | Subclass.of(ChunkCache.class), 43 | ConfigDef.Importance.MEDIUM, 44 | FETCH_CHUNK_CACHE_DOC 45 | ); 46 | } 47 | 48 | public ChunkManagerFactoryConfig(final Map originals) { 49 | super(configDef(), originals); 50 | } 51 | 52 | @SuppressWarnings("unchecked") 53 | public Class> cacheClass() { 54 | return (Class>) getClass(FETCH_CHUNK_CACHE_CONFIG); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/transform/CompressionChunkEnumeration.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.transform; 18 | 19 | import java.util.Objects; 20 | 21 | import com.github.luben.zstd.ZstdCompressCtx; 22 | 23 | /** 24 | * The chunk transformation that does Zstd compression. 25 | */ 26 | public class CompressionChunkEnumeration implements TransformChunkEnumeration { 27 | private final TransformChunkEnumeration inner; 28 | 29 | public CompressionChunkEnumeration(final TransformChunkEnumeration inner) { 30 | this.inner = Objects.requireNonNull(inner, "inner cannot be null"); 31 | } 32 | 33 | @Override 34 | public int originalChunkSize() { 35 | return inner.originalChunkSize(); 36 | } 37 | 38 | @Override 39 | public Integer transformedChunkSize() { 40 | // Variable transformed chunk size. 41 | return null; 42 | } 43 | 44 | @Override 45 | public boolean hasMoreElements() { 46 | return inner.hasMoreElements(); 47 | } 48 | 49 | @Override 50 | public byte[] nextElement() { 51 | final var chunk = inner.nextElement(); 52 | try (final ZstdCompressCtx compressCtx = new ZstdCompressCtx()) { 53 | try { 54 | compressCtx.setPledgedSrcSize(chunk.length); 55 | } catch (final NoSuchMethodError e) { 56 | // This may happen if the Zstd library is old enough. 57 | // It's OK if we just skip here, because the operation is advisory. 58 | } 59 | // Allow the de-transformation to know the content size and allocate buffers accordingly. 60 | compressCtx.setContentSize(true); 61 | return compressCtx.compress(chunk); 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/metadata/SegmentCustomMetadataSerde.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.metadata; 18 | 19 | import java.nio.ByteBuffer; 20 | import java.util.NavigableMap; 21 | import java.util.TreeMap; 22 | 23 | import org.apache.kafka.common.protocol.types.Struct; 24 | 25 | import static io.aiven.kafka.tieredstorage.metadata.SegmentCustomMetadataField.CUSTOM_METADATA_SCHEMA; 26 | import static io.aiven.kafka.tieredstorage.metadata.SegmentCustomMetadataField.TAGGED_FIELD_NAME; 27 | 28 | /** 29 | * Serialize and deserialize {@code NavigableMap} based on fields defined by {@code SegmentCustomMetadataField}. 30 | * 31 | *

{@code NavigableMap} is required by {@link org.apache.kafka.common.protocol.types.TaggedFields}, 32 | * therefore is enforced on this API. 33 | */ 34 | public class SegmentCustomMetadataSerde { 35 | 36 | public byte[] serialize(final NavigableMap data) { 37 | if (data.isEmpty()) { 38 | return new byte[] {}; 39 | } 40 | 41 | final var struct = new Struct(CUSTOM_METADATA_SCHEMA); 42 | struct.set(TAGGED_FIELD_NAME, data); 43 | 44 | final var buf = ByteBuffer.allocate(struct.sizeOf()); 45 | struct.writeTo(buf); 46 | return buf.array(); 47 | } 48 | 49 | public NavigableMap deserialize(final byte[] data) { 50 | if (data == null || data.length == 0) { 51 | return new TreeMap<>(); 52 | } 53 | 54 | final var buf = ByteBuffer.wrap(data); 55 | final var struct = CUSTOM_METADATA_SCHEMA.read(buf); 56 | 57 | @SuppressWarnings("unchecked") final var fields = 58 | (NavigableMap) struct.get(TAGGED_FIELD_NAME); 59 | return fields; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /storage/s3/src/test/java/io/aiven/kafka/tieredstorage/storage/s3/ByteBufferMarkableInputStreamTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2025 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.storage.s3; 18 | 19 | import java.io.IOException; 20 | import java.nio.ByteBuffer; 21 | import java.util.Random; 22 | 23 | import org.junit.jupiter.api.Test; 24 | 25 | import static org.assertj.core.api.Assertions.assertThat; 26 | 27 | class ByteBufferMarkableInputStreamTest { 28 | 29 | @Test 30 | void readAfterAllBytesReadWithoutReset() throws IOException { 31 | final byte[] buffer = new byte[10]; 32 | new Random().nextBytes(buffer); 33 | try (ByteBufferMarkableInputStream inputStream = new ByteBufferMarkableInputStream(ByteBuffer.wrap(buffer))){ 34 | assertThat(inputStream.markSupported()).isTrue(); 35 | assertThat(inputStream.available()).isEqualTo(10); 36 | inputStream.readAllBytes(); 37 | assertThat(inputStream.available()).isEqualTo(0); 38 | final int read = inputStream.read(); 39 | assertThat(read).isEqualTo(-1); 40 | } 41 | } 42 | 43 | @Test 44 | void readAfterAllBytesReadAndReset() throws IOException { 45 | final byte[] buffer = new byte[10]; 46 | new Random().nextBytes(buffer); 47 | try (ByteBufferMarkableInputStream inputStream = new ByteBufferMarkableInputStream(ByteBuffer.wrap(buffer))){ 48 | inputStream.mark(0); 49 | inputStream.readAllBytes(); 50 | assertThat(inputStream.available()).isEqualTo(0); 51 | //reset and try to read again 52 | inputStream.reset(); 53 | assertThat(inputStream.available()).isEqualTo(10); 54 | final int read = inputStream.read(); 55 | assertThat(read).isNotEqualTo(-1); 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/manifest/index/serde/TransformedChunksSerializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest.index.serde; 18 | 19 | import java.io.IOException; 20 | import java.util.Base64; 21 | import java.util.List; 22 | 23 | import com.fasterxml.jackson.core.JsonGenerator; 24 | import com.fasterxml.jackson.databind.JsonSerializer; 25 | import com.fasterxml.jackson.databind.SerializerProvider; 26 | import com.github.luben.zstd.ZstdCompressCtx; 27 | 28 | public class TransformedChunksSerializer extends JsonSerializer> { 29 | @Override 30 | public void serialize(final List value, 31 | final JsonGenerator gen, 32 | final SerializerProvider serializers) throws IOException { 33 | final byte[] binEncoded = ChunkSizesBinaryCodec.encode(value); 34 | 35 | if (binEncoded.length > TransformedChunksDeserializer.MAX_DECOMPRESSED_SIZE) { 36 | throw new IllegalArgumentException( 37 | "Encoded index is too big (" + binEncoded.length + "), cannot serialize"); 38 | } 39 | 40 | try (final ZstdCompressCtx compressCtx = new ZstdCompressCtx()) { 41 | try { 42 | compressCtx.setPledgedSrcSize(binEncoded.length); 43 | } catch (final NoSuchMethodError e) { 44 | // This may happen if the Zstd library is old enough. 45 | // It's OK if we just skip here, because the operation is advisory. 46 | } 47 | compressCtx.setContentSize(true); 48 | final byte[] compressed = compressCtx.compress(binEncoded); 49 | final String base64String = Base64.getEncoder().encodeToString(compressed); 50 | gen.writeString(base64String); 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /core/src/integration-test/java/io/aiven/kafka/tieredstorage/AllOpenedFileInputStreamsAreClosedChecker.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage; 18 | 19 | import java.io.IOException; 20 | import java.io.InputStream; 21 | import java.nio.file.Files; 22 | import java.nio.file.OpenOption; 23 | import java.nio.file.Path; 24 | import java.util.ArrayList; 25 | import java.util.Collections; 26 | import java.util.List; 27 | 28 | import org.mockito.MockedStatic; 29 | import org.mockito.Mockito; 30 | 31 | import static org.mockito.ArgumentMatchers.any; 32 | import static org.mockito.Mockito.CALLS_REAL_METHODS; 33 | import static org.mockito.Mockito.verify; 34 | 35 | /** 36 | * Checks that all {@link InputStream}s opened by 37 | * {@link Files#newInputStream(Path, OpenOption...)} are properly closed. 38 | * 39 | *

Should be used in try-with-resources. 40 | */ 41 | class AllOpenedFileInputStreamsAreClosedChecker implements AutoCloseable { 42 | private final MockedStatic mockedFiles; 43 | 44 | private final List opened = Collections.synchronizedList(new ArrayList<>()); 45 | 46 | public AllOpenedFileInputStreamsAreClosedChecker() { 47 | this.mockedFiles = Mockito.mockStatic(Files.class, CALLS_REAL_METHODS); 48 | this.mockedFiles.when(() -> Files.newInputStream(any(Path.class))) 49 | .thenAnswer(invocation -> { 50 | final InputStream spy = Mockito.spy((InputStream) invocation.callRealMethod()); 51 | opened.add(spy); 52 | return spy; 53 | }); 54 | } 55 | 56 | @Override 57 | public void close() throws IOException { 58 | mockedFiles.close(); 59 | 60 | assert !opened.isEmpty(); 61 | for (final InputStream spy : opened) { 62 | verify(spy).close(); 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /core/src/test/java/io/aiven/kafka/tieredstorage/fetch/SegmentIndexKeyTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.fetch; 18 | 19 | import org.apache.kafka.common.Uuid; 20 | import org.apache.kafka.server.log.remote.storage.RemoteStorageManager.IndexType; 21 | 22 | import io.aiven.kafka.tieredstorage.fetch.index.SegmentIndexKey; 23 | import io.aiven.kafka.tieredstorage.storage.ObjectKey; 24 | 25 | import org.junit.jupiter.api.Test; 26 | 27 | import static org.assertj.core.api.Assertions.assertThat; 28 | 29 | class SegmentIndexKeyTest { 30 | static final ObjectKey OBJECT_KEY_1 = () -> "topic/" + Uuid.randomUuid(); 31 | static final ObjectKey OBJECT_KEY_2 = () -> "topic/" + Uuid.randomUuid(); 32 | 33 | @Test 34 | void identical() { 35 | final var ck1 = new SegmentIndexKey(OBJECT_KEY_1, IndexType.OFFSET); 36 | final var ck2 = new SegmentIndexKey(OBJECT_KEY_1, IndexType.OFFSET); 37 | assertThat(ck1).isEqualTo(ck2); 38 | assertThat(ck2).isEqualTo(ck1); 39 | assertThat(ck1).hasSameHashCodeAs(ck2); 40 | } 41 | 42 | @Test 43 | void differentObjectKey() { 44 | final var ck1 = new SegmentIndexKey(OBJECT_KEY_1, IndexType.OFFSET); 45 | final var ck2 = new SegmentIndexKey(OBJECT_KEY_2, IndexType.OFFSET); 46 | assertThat(ck1).isNotEqualTo(ck2); 47 | assertThat(ck2).isNotEqualTo(ck1); 48 | assertThat(ck1).doesNotHaveSameHashCodeAs(ck2); 49 | } 50 | 51 | @Test 52 | void differentIndexTypes() { 53 | final var ck1 = new SegmentIndexKey(OBJECT_KEY_1, IndexType.OFFSET); 54 | final var ck2 = new SegmentIndexKey(OBJECT_KEY_1, IndexType.TIMESTAMP); 55 | assertThat(ck1).isNotEqualTo(ck2); 56 | assertThat(ck2).isNotEqualTo(ck1); 57 | assertThat(ck1).doesNotHaveSameHashCodeAs(ck2); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/manifest/SegmentIndexV1.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest; 18 | 19 | import java.util.Objects; 20 | 21 | import io.aiven.kafka.tieredstorage.storage.BytesRange; 22 | 23 | import com.fasterxml.jackson.annotation.JsonCreator; 24 | import com.fasterxml.jackson.annotation.JsonProperty; 25 | 26 | public class SegmentIndexV1 implements SegmentIndex { 27 | private final int position; 28 | private final int size; 29 | 30 | @JsonCreator 31 | public SegmentIndexV1(@JsonProperty(value = "position", required = true) final int position, 32 | @JsonProperty(value = "size", required = true) final int size) { 33 | this.position = position; 34 | this.size = size; 35 | } 36 | 37 | @Override 38 | @JsonProperty("position") 39 | public int position() { 40 | return position; 41 | } 42 | 43 | @Override 44 | @JsonProperty("size") 45 | public int size() { 46 | return size; 47 | } 48 | 49 | @Override 50 | public BytesRange range() { 51 | return BytesRange.ofFromPositionAndSize(position, size); 52 | } 53 | 54 | @Override 55 | public boolean equals(final Object o) { 56 | if (this == o) { 57 | return true; 58 | } 59 | if (o == null || getClass() != o.getClass()) { 60 | return false; 61 | } 62 | final SegmentIndexV1 that = (SegmentIndexV1) o; 63 | return position == that.position && size == that.size; 64 | } 65 | 66 | @Override 67 | public int hashCode() { 68 | return Objects.hash(position, size); 69 | } 70 | 71 | @Override 72 | public String toString() { 73 | return "SegmentIndexV1{" 74 | + "position=" + position 75 | + ", size=" + size 76 | + '}'; 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /core/src/test/java/io/aiven/kafka/tieredstorage/manifest/index/VariableSizeChunkIndexEqualsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest.index; 18 | 19 | import java.util.List; 20 | 21 | import org.junit.jupiter.api.Test; 22 | 23 | import static org.assertj.core.api.Assertions.assertThat; 24 | 25 | class VariableSizeChunkIndexEqualsTest { 26 | @Test 27 | void identical() { 28 | final var i1 = new VariableSizeChunkIndex(100, 1000, List.of(10, 20, 30)); 29 | final var i2 = new VariableSizeChunkIndex(100, 1000, List.of(10, 20, 30)); 30 | assertThat(i1).isEqualTo(i2); 31 | assertThat(i2).isEqualTo(i1); 32 | assertThat(i1).hasSameHashCodeAs(i2); 33 | } 34 | 35 | @Test 36 | void differentOriginalChunkSize() { 37 | final var i1 = new VariableSizeChunkIndex(100, 1000, List.of(10, 20, 30)); 38 | final var i2 = new VariableSizeChunkIndex(101, 1000, List.of(10, 20, 30)); 39 | assertThat(i1).isNotEqualTo(i2); 40 | assertThat(i2).isNotEqualTo(i1); 41 | assertThat(i1).doesNotHaveSameHashCodeAs(i2); 42 | } 43 | 44 | @Test 45 | void differentOriginalFileSize() { 46 | final var i1 = new VariableSizeChunkIndex(100, 1000, List.of(10, 20, 30)); 47 | final var i2 = new VariableSizeChunkIndex(100, 1001, List.of(10, 20, 30)); 48 | assertThat(i1).isNotEqualTo(i2); 49 | assertThat(i2).isNotEqualTo(i1); 50 | assertThat(i1).doesNotHaveSameHashCodeAs(i2); 51 | } 52 | 53 | @Test 54 | void differentTransformedChunks() { 55 | final var i1 = new VariableSizeChunkIndex(100, 1000, List.of(10, 20, 30)); 56 | final var i2 = new VariableSizeChunkIndex(100, 1000, List.of(10, 20, 31)); 57 | assertThat(i1).isNotEqualTo(i2); 58 | assertThat(i2).isNotEqualTo(i1); 59 | assertThat(i1).doesNotHaveSameHashCodeAs(i2); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /.github/workflows/release_pr_workflow.yml: -------------------------------------------------------------------------------- 1 | # The workflow to create PRs with release commits. 2 | name: Create release PR 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | release_version: 7 | description: "Release version '0.1.2' (without 'v')" 8 | required: true 9 | snapshot_version: 10 | description: "Snapshot version '0.2.0-SNAPSHOT' (without 'v')" 11 | required: true 12 | 13 | jobs: 14 | create_release_pr: 15 | name: Create release PR (job) 16 | runs-on: ubuntu-latest 17 | steps: 18 | - name: Check versions 19 | env: 20 | RELEASE_VERSION: ${{ github.event.inputs.release_version }} 21 | SNAPSHOT_VERSION: ${{ github.event.inputs.snapshot_version }} 22 | run: | 23 | echo "Checking release version..." 24 | if echo $RELEASE_VERSION | grep --invert-match '^[0-9]\+\.[0-9]\+\.[0-9]\+$' > /dev/null; then 25 | echo "Release version is invalid" 26 | exit 1 27 | fi 28 | 29 | echo "Checking snapshot version..." 30 | if echo $SNAPSHOT_VERSION | grep --invert-match '^[0-9]\+\.[0-9]\+\.[0-9]\+-SNAPSHOT$' > /dev/null; then 31 | echo "Snapshot version is invalid" 32 | exit 1 33 | fi 34 | 35 | - name: Checkout main 36 | uses: actions/checkout@v4 37 | with: 38 | ref: main 39 | fetch-depth: 0 40 | 41 | - name: Create release commits 42 | env: 43 | RELEASE_VERSION: ${{ github.event.inputs.release_version }} 44 | SNAPSHOT_VERSION: ${{ github.event.inputs.snapshot_version }} 45 | run: | 46 | git config --local user.name "GitHub Action" 47 | git config --local user.email "action@github.com" 48 | sed -i -e "s/^version=.\+$/version=$RELEASE_VERSION/g" gradle.properties 49 | git add gradle.properties 50 | git commit -m "Release version $RELEASE_VERSION" 51 | sed -i -e "s/^version=.\+$/version=$SNAPSHOT_VERSION/g" gradle.properties 52 | git add gradle.properties 53 | git commit -m "Bump version to $SNAPSHOT_VERSION" 54 | 55 | - name: Create Pull Request 56 | uses: peter-evans/create-pull-request@v4 57 | with: 58 | branch: release-${{ github.event.inputs.release_version }} 59 | delete-branch: true 60 | draft: true 61 | title: Release version ${{ github.event.inputs.release_version }} 62 | body: | 63 | Proposed changelog: 64 | - *fill in* 65 | -------------------------------------------------------------------------------- /storage/s3/src/main/java/io/aiven/kafka/tieredstorage/storage/s3/ByteBufferMarkableInputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package io.aiven.kafka.tieredstorage.storage.s3; 19 | 20 | import java.io.InputStream; 21 | import java.nio.ByteBuffer; 22 | import java.util.Objects; 23 | 24 | /** 25 | * Wraps a {@link ByteBuffer} for access via the {@link InputStream} API. 26 | * Its implement refers to {@link org.apache.kafka.common.utils.ByteBufferInputStream} 27 | * but with marking and resetting operations added. 28 | */ 29 | public class ByteBufferMarkableInputStream extends InputStream { 30 | private final ByteBuffer byteBuffer; 31 | 32 | public ByteBufferMarkableInputStream(final ByteBuffer buffer) { 33 | byteBuffer = Objects.requireNonNull(buffer); 34 | } 35 | 36 | public int read() { 37 | return !this.byteBuffer.hasRemaining() ? -1 : this.byteBuffer.get() & 255; 38 | } 39 | 40 | public int read(final byte[] bytes, final int off, int len) { 41 | if (len == 0) { 42 | return 0; 43 | } else if (!this.byteBuffer.hasRemaining()) { 44 | return -1; 45 | } else { 46 | len = Math.min(len, this.byteBuffer.remaining()); 47 | this.byteBuffer.get(bytes, off, len); 48 | return len; 49 | } 50 | } 51 | 52 | public int available() { 53 | return this.byteBuffer.remaining(); 54 | } 55 | 56 | @Override 57 | public boolean markSupported() { 58 | return true; 59 | } 60 | 61 | @Override 62 | public void mark(final int readLimit) { 63 | byteBuffer.mark(); 64 | } 65 | 66 | @Override 67 | public void reset() { 68 | byteBuffer.reset(); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/fetch/cache/MemoryChunkCache.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.fetch.cache; 18 | 19 | import java.io.ByteArrayInputStream; 20 | import java.io.IOException; 21 | import java.io.InputStream; 22 | import java.util.Map; 23 | 24 | import io.aiven.kafka.tieredstorage.config.ChunkCacheConfig; 25 | import io.aiven.kafka.tieredstorage.fetch.ChunkKey; 26 | import io.aiven.kafka.tieredstorage.fetch.ChunkManager; 27 | 28 | import com.github.benmanes.caffeine.cache.RemovalListener; 29 | import com.github.benmanes.caffeine.cache.Weigher; 30 | import org.slf4j.Logger; 31 | import org.slf4j.LoggerFactory; 32 | 33 | public class MemoryChunkCache extends ChunkCache { 34 | private static final Logger log = LoggerFactory.getLogger(MemoryChunkCache.class); 35 | 36 | public MemoryChunkCache(final ChunkManager chunkManager) { 37 | super(chunkManager); 38 | } 39 | 40 | @Override 41 | public InputStream cachedChunkToInputStream(final byte[] cachedChunk) { 42 | return new ByteArrayInputStream(cachedChunk); 43 | } 44 | 45 | @Override 46 | public byte[] cacheChunk(final ChunkKey chunkKey, final InputStream chunk) throws IOException { 47 | try (chunk) { 48 | return chunk.readAllBytes(); 49 | } 50 | } 51 | 52 | @Override 53 | public RemovalListener removalListener() { 54 | return (key, content, cause) -> log.debug("Deleted cached value for key {} from cache." 55 | + " The reason of the deletion is {}", key, cause); 56 | } 57 | 58 | @Override 59 | public Weigher weigher() { 60 | return (key, value) -> value.length; 61 | } 62 | 63 | @Override 64 | public void configure(final Map configs) { 65 | final ChunkCacheConfig config = new ChunkCacheConfig(configs); 66 | this.cache = buildCache(config); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/transform/DecryptionChunkEnumeration.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.transform; 18 | 19 | import javax.crypto.BadPaddingException; 20 | import javax.crypto.Cipher; 21 | import javax.crypto.IllegalBlockSizeException; 22 | 23 | import java.util.Objects; 24 | import java.util.function.Function; 25 | 26 | /** 27 | * The chunk de-transformation that does decryption. 28 | */ 29 | public class DecryptionChunkEnumeration implements DetransformChunkEnumeration { 30 | private final DetransformChunkEnumeration inner; 31 | private final int ivSize; 32 | private final Function cipherSupplier; 33 | 34 | /** 35 | * @param cipherSupplier a function that takes an encrypted chunk and returns the decryption cypher for it 36 | */ 37 | public DecryptionChunkEnumeration(final DetransformChunkEnumeration inner, 38 | final int ivSize, 39 | final Function cipherSupplier) { 40 | this.inner = Objects.requireNonNull(inner, "inner cannot be null"); 41 | if (ivSize <= 0) { 42 | throw new IllegalArgumentException("ivSize must be positive"); 43 | } 44 | this.ivSize = ivSize; 45 | this.cipherSupplier = Objects.requireNonNull(cipherSupplier, "cipherSupplier cannot be null"); 46 | } 47 | 48 | @Override 49 | public boolean hasMoreElements() { 50 | return inner.hasMoreElements(); 51 | } 52 | 53 | @Override 54 | public byte[] nextElement() { 55 | final var chunk = inner.nextElement(); 56 | final var cipher = cipherSupplier.apply(chunk); 57 | try { 58 | return cipher.doFinal(chunk, ivSize, chunk.length - ivSize); 59 | } catch (final IllegalBlockSizeException | BadPaddingException e) { 60 | throw new RuntimeException(e); 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /e2e/src/integration-test/java/io/aiven/kafka/tieredstorage/e2e/internal/RemoteSegment.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.e2e.internal; 18 | 19 | import java.util.Objects; 20 | 21 | import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentId; 22 | 23 | public final class RemoteSegment { 24 | private final RemoteLogSegmentId remoteLogSegmentId; 25 | private final long startOffset; 26 | private final long endOffset; 27 | 28 | RemoteSegment(final RemoteLogSegmentId remoteLogSegmentId, 29 | final long startOffset, 30 | final long endOffset) { 31 | this.remoteLogSegmentId = remoteLogSegmentId; 32 | this.startOffset = startOffset; 33 | this.endOffset = endOffset; 34 | } 35 | 36 | public RemoteLogSegmentId remoteLogSegmentId() { 37 | return remoteLogSegmentId; 38 | } 39 | 40 | public long startOffset() { 41 | return startOffset; 42 | } 43 | 44 | public long endOffset() { 45 | return endOffset; 46 | } 47 | 48 | @Override 49 | public boolean equals(final Object obj) { 50 | if (obj == this) { 51 | return true; 52 | } 53 | if (obj == null || obj.getClass() != this.getClass()) { 54 | return false; 55 | } 56 | final var that = (RemoteSegment) obj; 57 | return Objects.equals(this.remoteLogSegmentId, that.remoteLogSegmentId) 58 | && this.startOffset == that.startOffset 59 | && this.endOffset == that.endOffset; 60 | } 61 | 62 | @Override 63 | public int hashCode() { 64 | return Objects.hash(remoteLogSegmentId, startOffset, endOffset); 65 | } 66 | 67 | @Override 68 | public String toString() { 69 | return "RemoteSegment[" 70 | + "remoteLogSegmentId=" + remoteLogSegmentId + ", " 71 | + "startOffset=" + startOffset + ", " 72 | + "endOffset=" + endOffset + ']'; 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /core/src/test/java/io/aiven/kafka/tieredstorage/transform/DetransformFinisherTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.transform; 18 | 19 | import java.io.ByteArrayInputStream; 20 | import java.nio.charset.StandardCharsets; 21 | 22 | import org.junit.jupiter.api.Test; 23 | 24 | import static org.assertj.core.api.Assertions.assertThat; 25 | 26 | class DetransformFinisherTest { 27 | 28 | @Test 29 | void sameInputStreamFromBaseDetransform() { 30 | final byte[] bytes = "test".getBytes(StandardCharsets.UTF_8); 31 | final var is = new ByteArrayInputStream(bytes); 32 | final DetransformChunkEnumeration d = new BaseDetransformChunkEnumeration(is); 33 | final DetransformFinisher f = new DetransformFinisher(d); 34 | assertThat(f.toInputStream()) 35 | .isEqualTo(is) 36 | .hasBinaryContent(bytes); 37 | } 38 | 39 | @Test 40 | void inputStreamMutatesWhenChainedDetransform() { 41 | final byte[] bytes = "test".getBytes(StandardCharsets.UTF_8); 42 | final var is = new ByteArrayInputStream(bytes); 43 | final DetransformChunkEnumeration d = new BaseDetransformChunkEnumeration(is); 44 | final DetransformChunkEnumeration noop = new NoopDetransformEnumeration(d); 45 | final DetransformFinisher f = new DetransformFinisher(noop); 46 | assertThat(f.toInputStream()) 47 | .isNotEqualTo(is) 48 | .hasBinaryContent(bytes); 49 | } 50 | 51 | private static class NoopDetransformEnumeration implements DetransformChunkEnumeration { 52 | private final DetransformChunkEnumeration inner; 53 | 54 | public NoopDetransformEnumeration(final DetransformChunkEnumeration inner) { 55 | this.inner = inner; 56 | } 57 | 58 | @Override 59 | public boolean hasMoreElements() { 60 | return inner.hasMoreElements(); 61 | } 62 | 63 | @Override 64 | public byte[] nextElement() { 65 | return inner.nextElement(); 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ## 2 | # Copyright 2023 Aiven Oy 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | ## 16 | VERSION := $(shell grep -o -E '^version=[0-9]+\.[0-9]+\.[0-9]+(-SNAPSHOT)?' gradle.properties | cut -c9-) 17 | IMAGE_NAME := aivenoy/kafka-with-ts-plugin 18 | IMAGE_VERSION := latest 19 | IMAGE_TAG := $(IMAGE_NAME):$(IMAGE_VERSION) 20 | 21 | .PHONY: all clean checkstyle build test integration_test e2e_test docker_image docker_push 22 | 23 | all: clean build test 24 | 25 | clean: 26 | ./gradlew clean 27 | 28 | checkstyle: 29 | ./gradlew checkstyleMain checkstyleTest checkstyleIntegrationTest 30 | 31 | build: build/distributions/tiered-storage-for-apache-kafka-$(VERSION).tgz storage/s3/build/distributions/s3-$(VERSION).tgz storage/gcs/build/distributions/gcs-$(VERSION).tgz storage/azure/build/distributions/azure-$(VERSION).tgz 32 | 33 | build/distributions/tiered-storage-for-apache-kafka-$(VERSION).tgz: 34 | ./gradlew build distTar -x test -x integrationTest 35 | 36 | storage/s3/build/distributions/s3-$(VERSION).tgz: 37 | ./gradlew build :storage:s3:distTar -x test -x integrationTest 38 | 39 | storage/gcs/build/distributions/gcs-$(VERSION).tgz: 40 | ./gradlew build :storage:gcs:distTar -x test -x integrationTest 41 | 42 | storage/azure/build/distributions/azure-$(VERSION).tgz: 43 | ./gradlew build :storage:azure:distTar -x test -x integrationTest 44 | 45 | .PHONY: docs 46 | docs: 47 | ./gradlew :docs:genConfigsDocs 48 | ./gradlew :docs:genMetricsDocs 49 | 50 | test: build 51 | ./gradlew test 52 | 53 | integration_test: build 54 | ./gradlew integrationTest -x :e2e:integrationTest 55 | 56 | E2E_TEST=LocalSystem 57 | 58 | e2e_test: build 59 | ./gradlew :e2e:integrationTest --tests --info $(E2E_TEST)* 60 | 61 | .PHONY: docker_image 62 | docker_image: build 63 | docker build . \ 64 | -f docker/Dockerfile \ 65 | --build-arg _VERSION=$(VERSION) \ 66 | -t $(IMAGE_TAG) 67 | 68 | .PHONY: docker_push 69 | docker_push: 70 | docker push $(IMAGE_TAG) 71 | 72 | # Prepare kernel to capture CPU events 73 | async_profiler_cpu_kernel-prep: 74 | sudo sh -c 'echo 1 >/proc/sys/kernel/perf_event_paranoid' 75 | sudo sh -c 'echo 0 >/proc/sys/kernel/kptr_restrict' 76 | -------------------------------------------------------------------------------- /core/src/test/java/io/aiven/kafka/tieredstorage/manifest/SegmentManifestV1Test.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest; 18 | 19 | import javax.crypto.spec.SecretKeySpec; 20 | 21 | import java.util.Arrays; 22 | 23 | import org.junit.jupiter.api.Test; 24 | 25 | import static org.assertj.core.api.Assertions.assertThat; 26 | 27 | public class SegmentManifestV1Test { 28 | @Test 29 | void identical() { 30 | final byte[] keyBytes = new byte[32]; 31 | final byte[] aad = new byte[32]; 32 | final var i1 = new SegmentEncryptionMetadataV1(new SecretKeySpec(keyBytes, "AES"), aad); 33 | final var i2 = new SegmentEncryptionMetadataV1(new SecretKeySpec(keyBytes, "AES"), aad); 34 | assertThat(i1).isEqualTo(i2); 35 | assertThat(i2).isEqualTo(i1); 36 | assertThat(i1).hasSameHashCodeAs(i2); 37 | } 38 | 39 | @Test 40 | void differentDataKey() { 41 | final byte[] keyBytes1 = new byte[32]; 42 | final byte[] keyBytes2 = new byte[32]; 43 | Arrays.fill(keyBytes2, (byte) 1); 44 | final byte[] aad = new byte[32]; 45 | final var i1 = new SegmentEncryptionMetadataV1(new SecretKeySpec(keyBytes1, "AES"), aad); 46 | final var i2 = new SegmentEncryptionMetadataV1(new SecretKeySpec(keyBytes2, "AES"), aad); 47 | assertThat(i1).isNotEqualTo(i2); 48 | assertThat(i2).isNotEqualTo(i1); 49 | assertThat(i1).doesNotHaveSameHashCodeAs(i2); 50 | } 51 | 52 | @Test 53 | void differentAAD() { 54 | final byte[] keyBytes = new byte[32]; 55 | final byte[] aad1 = new byte[32]; 56 | final byte[] aad2 = new byte[32]; 57 | Arrays.fill(aad2, (byte) 1); 58 | final var i1 = new SegmentEncryptionMetadataV1(new SecretKeySpec(keyBytes, "AES"), aad1); 59 | final var i2 = new SegmentEncryptionMetadataV1(new SecretKeySpec(keyBytes, "AES"), aad2); 60 | assertThat(i1).isNotEqualTo(i2); 61 | assertThat(i2).isNotEqualTo(i1); 62 | assertThat(i1).doesNotHaveSameHashCodeAs(i2); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /core/src/main/java/io/aiven/kafka/tieredstorage/manifest/SegmentIndexesV1Builder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.manifest; 18 | 19 | import java.util.ArrayList; 20 | import java.util.Collections; 21 | import java.util.HashMap; 22 | import java.util.List; 23 | import java.util.Map; 24 | 25 | import static org.apache.kafka.server.log.remote.storage.RemoteStorageManager.IndexType; 26 | 27 | public class SegmentIndexesV1Builder { 28 | private final Map indexes = new HashMap<>(IndexType.values().length); 29 | private int currentPosition = 0; 30 | 31 | public SegmentIndexesV1Builder add(final IndexType indexType, final int size) { 32 | if (indexes.containsKey(indexType)) { 33 | throw new IllegalStateException("Index " + indexType + " is already added"); 34 | } 35 | indexes.put(indexType, new SegmentIndexV1(currentPosition, size)); 36 | currentPosition += size; 37 | return this; 38 | } 39 | 40 | // for testing and logging purposes 41 | public List indexes() { 42 | final var indexTypes = new ArrayList<>(indexes.keySet()); 43 | Collections.sort(indexTypes); 44 | return indexTypes; 45 | } 46 | 47 | public SegmentIndexesV1 build() { 48 | if (indexes.size() < 4) { 49 | throw new IllegalStateException("Not enough indexes have been added; at least 4 required. " 50 | + "Indexes included: " + indexes()); 51 | } 52 | if (indexes.size() == 4 && indexes.containsKey(IndexType.TRANSACTION)) { 53 | throw new IllegalStateException("OFFSET, TIMESTAMP, PRODUCER_SNAPSHOT, " 54 | + "and LEADER_EPOCH indexes are required"); 55 | } 56 | return new SegmentIndexesV1( 57 | indexes.get(IndexType.OFFSET), 58 | indexes.get(IndexType.TIMESTAMP), 59 | indexes.get(IndexType.PRODUCER_SNAPSHOT), 60 | indexes.get(IndexType.LEADER_EPOCH), 61 | indexes.get(IndexType.TRANSACTION) 62 | ); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /core/src/test/java/io/aiven/kafka/tieredstorage/metadata/SegmentCustomMetadataSerdeTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.metadata; 18 | 19 | import java.util.TreeMap; 20 | 21 | import org.apache.kafka.common.protocol.types.SchemaException; 22 | 23 | import org.junit.jupiter.api.Test; 24 | 25 | import static org.assertj.core.api.Assertions.assertThat; 26 | import static org.assertj.core.api.Assertions.assertThatThrownBy; 27 | 28 | class SegmentCustomMetadataSerdeTest { 29 | final SegmentCustomMetadataSerde serde = new SegmentCustomMetadataSerde(); 30 | 31 | @Test 32 | void shouldSerDeEmptyFields() { 33 | final var bytes = serde.serialize(new TreeMap<>()); 34 | assertThat(bytes).isEqualTo(new byte[] {}); 35 | final var fields = serde.deserialize(bytes); 36 | assertThat(fields).isEmpty(); 37 | } 38 | 39 | @Test 40 | void shouldSerDeSomeFields() { 41 | final var input = new TreeMap(); 42 | input.put(0, 100L); // remote_size 43 | input.put(2, "foo"); // object_key 44 | 45 | final var bytes = serde.serialize(input); 46 | assertThat(bytes).hasSize(11); // calculated on the first run 47 | final var fields = serde.deserialize(bytes); 48 | assertThat(fields).hasSize(2) 49 | .containsEntry(0, 100L) 50 | .containsEntry(2, "foo"); 51 | } 52 | 53 | @Test 54 | void shouldFailWhenWrongType() { 55 | final var input = new TreeMap(); 56 | input.put(0, "foo"); // remote_size with wrong type 57 | 58 | assertThatThrownBy(() -> serde.serialize(input)) 59 | .isInstanceOf(SchemaException.class); 60 | } 61 | 62 | @Test 63 | void shouldFailWhenUnknownLocation() { 64 | final var input = new TreeMap(); 65 | input.put(SegmentCustomMetadataField.values().length + 1, "foo"); // unknown location 66 | 67 | assertThatThrownBy(() -> serde.serialize(input)) 68 | .isInstanceOf(SchemaException.class); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /core/src/test/java/io/aiven/kafka/tieredstorage/transform/DecompressionChunkEnumerationTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Aiven Oy 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package io.aiven.kafka.tieredstorage.transform; 18 | 19 | import com.github.luben.zstd.ZstdCompressCtx; 20 | import org.junit.jupiter.api.Test; 21 | import org.junit.jupiter.api.extension.ExtendWith; 22 | import org.mockito.Mock; 23 | import org.mockito.junit.jupiter.MockitoExtension; 24 | 25 | import static org.assertj.core.api.Assertions.assertThat; 26 | import static org.assertj.core.api.Assertions.assertThatThrownBy; 27 | import static org.mockito.Mockito.times; 28 | import static org.mockito.Mockito.verify; 29 | import static org.mockito.Mockito.when; 30 | 31 | @ExtendWith(MockitoExtension.class) 32 | class DecompressionChunkEnumerationTest { 33 | @Mock 34 | DetransformChunkEnumeration inner; 35 | 36 | @Test 37 | void nullInnerEnumeration() { 38 | assertThatThrownBy(() -> new DecompressionChunkEnumeration(null)) 39 | .isInstanceOf(NullPointerException.class) 40 | .hasMessage("inner cannot be null"); 41 | } 42 | 43 | @Test 44 | void hasMoreElementsPropagated() { 45 | final var transform = new DecompressionChunkEnumeration(inner); 46 | when(inner.hasMoreElements()) 47 | .thenReturn(true) 48 | .thenReturn(false); 49 | assertThat(transform.hasMoreElements()).isTrue(); 50 | assertThat(transform.hasMoreElements()).isFalse(); 51 | verify(inner, times(2)).hasMoreElements(); 52 | } 53 | 54 | @Test 55 | void decompress() { 56 | final byte[] data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; 57 | final byte[] compressed; 58 | try (final ZstdCompressCtx compressCtx = new ZstdCompressCtx()) { 59 | compressCtx.setContentSize(true); 60 | compressed = compressCtx.compress(data); 61 | } 62 | 63 | final var transform = new DecompressionChunkEnumeration(inner); 64 | when(inner.nextElement()).thenReturn(compressed); 65 | 66 | assertThat(transform.nextElement()).isEqualTo(data); 67 | } 68 | } 69 | --------------------------------------------------------------------------------