├── .github └── workflows │ └── workflow.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── bin ├── backup-standalone.sh ├── completed_segments.py ├── partition-index.sh ├── restore-standalone.sh ├── segment-index.sh └── segment.sh ├── build.gradle ├── docs ├── Blogposts │ └── 2019-06_Introducing_Kafka_Backup.md ├── Comparing_Kafka_Backup_Solutions.md ├── FAQ.md ├── Kafka_Backup_Architecture.md ├── Tooling.md └── Usage.md ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── settings.gradle ├── src ├── main │ └── java │ │ └── de │ │ └── azapps │ │ └── kafkabackup │ │ ├── cli │ │ ├── PartitionIndexCLI.java │ │ ├── SegmentCLI.java │ │ ├── SegmentIndexCLI.java │ │ └── formatters │ │ │ ├── Base64Formatter.java │ │ │ ├── ByteFormatter.java │ │ │ ├── DetailedRecordFormatter.java │ │ │ ├── ListRecordFormatter.java │ │ │ ├── RawFormatter.java │ │ │ ├── RecordFormatter.java │ │ │ └── UTF8Formatter.java │ │ ├── common │ │ ├── BackupConfig.java │ │ ├── offset │ │ │ ├── EndOffsetReader.java │ │ │ ├── OffsetSink.java │ │ │ ├── OffsetSource.java │ │ │ └── OffsetUtils.java │ │ ├── partition │ │ │ ├── PartitionIndex.java │ │ │ ├── PartitionIndexEntry.java │ │ │ ├── PartitionIndexRestore.java │ │ │ ├── PartitionReader.java │ │ │ ├── PartitionUtils.java │ │ │ └── PartitionWriter.java │ │ ├── record │ │ │ ├── Record.java │ │ │ └── RecordSerde.java │ │ └── segment │ │ │ ├── SegmentIndex.java │ │ │ ├── SegmentIndexEntry.java │ │ │ ├── SegmentIndexRestore.java │ │ │ ├── SegmentReader.java │ │ │ ├── SegmentUtils.java │ │ │ ├── SegmentWriter.java │ │ │ └── UnverifiedSegmentReader.java │ │ ├── sink │ │ ├── BackupSinkConfig.java │ │ ├── BackupSinkConnector.java │ │ └── BackupSinkTask.java │ │ └── source │ │ ├── BackupSourceConfig.java │ │ ├── BackupSourceConnector.java │ │ └── BackupSourceTask.java └── test │ ├── assets │ └── v1 │ │ ├── partitionindex │ │ └── testIndex │ │ ├── records │ │ ├── empty_record │ │ ├── header_record │ │ ├── null_record │ │ └── simple_record │ │ ├── segmentindex │ │ └── testIndex │ │ └── segments │ │ ├── segment_partition_000_from_offset_0000000000_index │ │ └── segment_partition_000_from_offset_0000000000_records │ └── java │ └── de │ └── azapps │ └── kafkabackup │ ├── common │ ├── TestUtils.java │ ├── partition │ │ ├── PartitionIndexTest.java │ │ └── PartitionSerdeTest.java │ ├── record │ │ ├── RecordSerdeTest.java │ │ └── RecordTest.java │ └── segment │ │ ├── SegmentIndexTest.java │ │ └── SegmentSerdeTest.java │ └── sink │ ├── BackupSinkTaskTest.java │ ├── MockEndOffsetReader.java │ ├── MockOffsetSink.java │ └── MockSinkTaskContext.java └── system_test ├── .gitignore ├── 01_simple_roundtrip_test.yaml ├── 02_full_test.yaml ├── 03_start_n_stop.yaml ├── 04_delete_old_segments.yaml ├── README.md ├── archive ├── 001_simple_1partition_test │ ├── connect-backup-sink.properties │ ├── connect-backup-source.properties │ └── test.sh ├── backup_with_burry │ ├── connect-backup-sink.properties │ ├── connect-backup-source.properties │ └── test.sh ├── full_test │ ├── connect-backup-sink.properties │ ├── connect-backup-source.properties │ └── round_trip.sh └── utils.sh ├── docker-compose.yml └── utils ├── Dockerfile ├── kafka-configs ├── restore-to-kafka.properties ├── to-backup-kafka.properties └── zookeeper.properties ├── runutil ├── utils.py └── utils.sh /.github/workflows/workflow.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: [push] 4 | 5 | jobs: 6 | buildAndTest: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v1 10 | # Build 11 | - name: Build Image 12 | run: docker build . --tag kafka-backup-dev:latest 13 | # Get the jar file and upload it as an artifact 14 | - name: Prepare upload kafka-backup.jar 15 | run: | 16 | id=$(docker create kafka-backup-dev:latest) 17 | docker cp $id:/opt/kafka-backup/kafka-backup.jar . 18 | docker rm -v $id 19 | - uses: actions/upload-artifact@v1 20 | with: 21 | name: kafka-backup-jar 22 | path: ./kafka-backup.jar 23 | # Integration Tests 24 | - name: setup env 25 | run: | 26 | echo "::set-env name=GOPATH::$(go env GOPATH)" 27 | echo "::add-path::$(go env GOPATH)/bin" 28 | - uses: actions/setup-go@v2-beta 29 | with: 30 | go-version: '^1.13.1' 31 | # Prepare Tests 32 | - name: Setup Coyote 33 | run: go get github.com/landoop/coyote 34 | - name: Setup Tests 35 | working-directory: ./system_test/ 36 | run: | 37 | docker-compose pull 38 | docker-compose build 39 | mkdir -p /tmp/kafka-backup out 40 | chmod 777 /tmp/kafka-backup 41 | # Run system tests 42 | - name: Simple Roundtrip Test 43 | working-directory: ./system_test/ 44 | run: coyote -c 01_simple_roundtrip_test.yaml --out out/01_simple_roundtrip_test.html 45 | - name: Full Test 46 | working-directory: ./system_test/ 47 | run: coyote -c 02_full_test.yaml --out out/02_full_test.html 48 | - name: Start and Stop Kafka Connect 49 | working-directory: ./system_test/ 50 | run: coyote -c 03_start_n_stop.yaml --out out/03_start_n_stop.html 51 | - name: Delete old Segments before restore 52 | working-directory: ./system_test/ 53 | run: coyote -c 04_delete_old_segments.yaml --out out/04_delete_old_segments.html 54 | # Upload Test Results 55 | - name: Upload Test Results 56 | uses: actions/upload-artifact@v1 57 | with: 58 | name: Test Results 59 | path: ./system_test/out/ 60 | 61 | # Create a Release 62 | create-release: 63 | needs: buildAndTest 64 | if: startsWith(github.ref, 'refs/tags/') 65 | runs-on: ubuntu-latest 66 | steps: 67 | - uses: actions/checkout@v1 68 | - uses: actions/download-artifact@v1 69 | with: 70 | name: kafka-backup-jar 71 | - name: Prepare Artifact 72 | run: | 73 | mv kafka-backup-jar/kafka-backup.jar bin/kafka-backup.jar 74 | mv bin kafka-backup 75 | tar cfz kafka-backup.tar.gz kafka-backup 76 | - name: Create Release 77 | id: create_release 78 | uses: actions/create-release@v1 79 | env: 80 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 81 | with: 82 | tag_name: ${{ github.ref }} 83 | release_name: Release ${{ github.ref }} 84 | draft: true 85 | prerelease: false 86 | - name: Upload Release Asset 87 | id: upload-release-asset 88 | uses: actions/upload-release-asset@v1 89 | env: 90 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 91 | with: 92 | upload_url: ${{ steps.create_release.outputs.upload_url }} 93 | asset_path: ./kafka-backup.tar.gz 94 | asset_name: kafka-backup.tar.gz 95 | asset_content_type: application/gzip -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | local-* 3 | .idea 4 | *.iml 5 | .gradle 6 | *\#* 7 | *~ 8 | bin/*.jar -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Build Kafka Backup 2 | FROM gradle:6.3.0-jdk8 AS builder 3 | WORKDIR /opt/kafka-backup 4 | COPY . /opt/kafka-backup 5 | RUN gradle --no-daemon check test shadowJar 6 | 7 | # Build Docker Image with Kafka Backup Jar 8 | FROM openjdk:8u212-jre-alpine 9 | 10 | ARG kafka_version=2.5.0 11 | ARG scala_version=2.12 12 | ARG glibc_version=2.31-r0 13 | 14 | ENV KAFKA_VERSION=$kafka_version \ 15 | SCALA_VERSION=$scala_version \ 16 | KAFKA_HOME=/opt/kafka \ 17 | GLIBC_VERSION=$glibc_version 18 | 19 | ENV PATH=${PATH}:${KAFKA_HOME}/bin 20 | 21 | RUN apk add --no-cache bash curl \ 22 | && wget "https://archive.apache.org/dist/kafka/${KAFKA_VERSION}/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz" -O "/tmp/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz" \ 23 | && tar xfz /tmp/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz -C /opt \ 24 | && rm /tmp/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz \ 25 | && ln -s /opt/kafka_${SCALA_VERSION}-${KAFKA_VERSION} ${KAFKA_HOME} \ 26 | && wget https://github.com/sgerrand/alpine-pkg-glibc/releases/download/${GLIBC_VERSION}/glibc-${GLIBC_VERSION}.apk \ 27 | && apk add --no-cache --allow-untrusted glibc-${GLIBC_VERSION}.apk \ 28 | && rm glibc-${GLIBC_VERSION}.apk 29 | 30 | COPY ./bin /opt/kafka-backup/ 31 | COPY --from=builder /opt/kafka-backup/build/libs/kafka-backup.jar /opt/kafka-backup/ 32 | 33 | ENV PATH="${KAFKA_HOME}/bin:/opt/kafka-backup/:${PATH}" 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kafka Backup 2 | 3 | > **Update:** I am no longer maintaining the Kafka Backup project. As an alternative, I recommend [Kannika](https://kannika.io/?utm_source=github_anatoly), a commercial backup solution developed by my friends at [Cymo](https://cymo.eu/?utm_source=github_anatoly) (and don't forget to say hello from Anatoly 😊). 4 | > [Disclosure: I am a business partner of Cymo and may receive compensation for referrals to Kannika] 5 | > 6 | > Please contact me if you want to continue maintaining this project. 7 | 8 | Kafka Backup is a tool to back up and restore your Kafka data 9 | including all (configurable) topic data and especially also consumer 10 | group offsets. To the best of our knowledge, Kafka Backup is the only 11 | viable solution to take a cold backup of your Kafka data and restore 12 | it correctly. 13 | 14 | It is designed as two connectors for Kafka 15 | Connect: A sink connector (backing data up) and a source connector 16 | (restoring data). 17 | 18 | Currently `kafka-backup` supports backup and restore to/from the file 19 | system. 20 | 21 | ## Features 22 | 23 | * Backup and restore topic data 24 | * Backup and restore consumer-group offsets 25 | * Currently supports only backup/restore to/from local file system 26 | * Released as a jar file or packaged as a Docker image 27 | 28 | # Getting Started 29 | 30 | **Option A) Download binary** 31 | 32 | Download the latest release [from GitHub](https://github.com/itadventurer/kafka-backup/releases) and unzip it. 33 | 34 | **Option B) Use Docker image** 35 | 36 | Pull the latest Docker image from [Docker Hub](https://hub.docker.com/repository/docker/itadventurer/kafka-backup/tags) 37 | 38 | **DO NOT USE THE `latest` STAGE IN PRODUCTION**. `latest` are automatic builds of the master branch. Be careful! 39 | 40 | **Option C) Build from source** 41 | 42 | Just run `./gradlew shadowJar` in the root directory of Kafka Backup. You will find the CLI tools in the `bin` directory. 43 | 44 | ## Start Kafka Backup 45 | 46 | ```sh 47 | backup-standalone.sh --bootstrap-server localhost:9092 \ 48 | --target-dir /path/to/backup/dir --topics 'topic1,topic2' 49 | ``` 50 | 51 | In Docker: 52 | ```sh 53 | docker run -d -v /path/to/backup-dir/:/kafka-backup/ --rm \ 54 | kafka-backup:[LATEST_TAG] \ 55 | backup-standalone.sh --bootstrap-server kafka:9092 \ 56 | --target-dir /kafka-backup/ --topics 'topic1,topic2' 57 | ``` 58 | 59 | You can pass options via CLI arguments or using environment variables: 60 | 61 | | Parameter | Type/required? | Description | 62 | |---------------------------------------------|----------------|----------------------------------------------------------------------------------------------------------------------| 63 | | `--bootstrap-server`
`BOOTSTRAP_SERVER` | [REQUIRED] | The Kafka server to connect to | 64 | | `--target-dir`
`TARGET_DIR` | [REQUIRED] | Directory where the backup files should be stored | 65 | | `--topics`
`TOPICS` | | List of topics to be backed up. You must provide either `--topics` or `--topics-regex`. Not both | 66 | | `--topics-regex`
`TOPICS_REGEX` | | Regex of topics to be backed up. You must provide either `--topics` or `--topics-regex`. Not both | 67 | | `--max-segment-size`
`MAX_SEGMENT_SIZE` | | Size of the backup segments in bytes DEFAULT: 1GiB | 68 | | `--command-config`
`COMMAND_CONFIG` | | Property file containing configs to be passed to Admin Client. Only useful if you have additional connection options | 69 | | `--debug`
`DEBUG=y` | | Print Debug information | 70 | | `--help` | | Prints this message | 71 | 72 | **Kafka Backup does not stop!** The Backup process is a continous background job that runs forever as Kafka models data as a stream without end. See [Issue 52: Support point-in-time snapshots](https://github.com/itadventurer/kafka-backup/issues/52) for more information. 73 | 74 | ## Restore data 75 | 76 | ```sh 77 | restore-standalone.sh --bootstrap-server localhost:9092 \ 78 | --target-dir /path/to/backup/dir --topics 'topic1,topic2' 79 | ``` 80 | 81 | In Docker: 82 | ```sh 83 | docker run -v /path/to/backup/dir:/kafka-backup/ --rm \ 84 | kafka-backup:[LATEST_TAG] 85 | restore-standalone.sh --bootstrap-server kafka:9092 \ 86 | --source-dir /kafka-backup/ --topics 'topic1,topic2' 87 | ``` 88 | 89 | You can pass options via CLI arguments or using environment variables: 90 | 91 | 92 | | Parameter | Type/required? | Description | 93 | |---------------------------------------------|----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 94 | | `--bootstrap-server`
`BOOTSTRAP_SERVER` | [REQUIRED] | The Kafka server to connect to | 95 | | `--source-dir`
`SOURCE_DIR` | [REQUIRED] | Directory where the backup files are found | 96 | | `--topics`
`TOPICS` | [REQUIRED] | List of topics to restore | 97 | | `--batch-size`
`BATCH_SIZE` | | Batch size (Default: 1MiB) | 98 | | `--offset-file`
`OFFSET_FILE` | | File where to store offsets. THIS FILE IS CRUCIAL FOR A CORRECT RESTORATION PROCESS IF YOU LOSE IT YOU NEED TO START THE BACKUP FROM SCRATCH. OTHERWISE YOU WILL HAVE DUPLICATE DATA Default: [source-dir]/restore.offsets | 99 | | `--command-config`
`COMMAND_CONFIG` | | Property file containing configs to be passed to Admin Client. Only useful if you have additional connection options | 100 | | `--help`
`HELP` | | Prints this message | 101 | | `--debug`
`DEBUG` | | Print Debug information (if using the environment variable, set it to 'y') | 102 | 103 | ## More Documentation 104 | 105 | * [FAQ](./docs/FAQ.md) 106 | * [High Level 107 | Introduction](./docs/Blogposts/2019-06_Introducing_Kafka_Backup.md) 108 | * [Comparing Kafka Backup 109 | Solutions](./docs/Comparing_Kafka_Backup_Solutions.md) 110 | * [Architecture](./docs/Kafka_Backup_Architecture.md) 111 | * [Tooling](./docs/Tooling.md) 112 | 113 | ## License 114 | 115 | This project is licensed under the Apache License Version 2.0 (see 116 | [LICENSE](./LICENSE)). 117 | -------------------------------------------------------------------------------- /bin/completed_segments.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Find (and delete) kafka-backup segment files 4 | """ 5 | 6 | import os 7 | import re 8 | import sys 9 | import argparse 10 | 11 | 12 | def filename_pattern(): 13 | """ Kafka-backup segment/index filename pattern. 14 | 15 | Using function as constant string. 16 | This pattern is used to make filename-matching regex below too. 17 | """ 18 | return 'segment_partition_%s_from_offset_%s_%s' 19 | 20 | 21 | def oneplus(string): 22 | """ Check argument is >= 1 """ 23 | value = int(string) 24 | if value < 1: 25 | raise argparse.ArgumentTypeError("cannot be less than 1") 26 | return value 27 | 28 | 29 | def parse_args(): 30 | """ Parse cmdline args """ 31 | parser = argparse.ArgumentParser( 32 | description='Find (and delete) kafka-backup segment files' 33 | ) 34 | parser.add_argument( 35 | '-d', '--delete', 36 | help='delete segment files', 37 | action='store_true', 38 | default=False, 39 | ) 40 | parser.add_argument( 41 | '-k', '--keep', 42 | help='keep N segment files (one by default)', 43 | type=oneplus, 44 | default=1, 45 | metavar='N', 46 | ) 47 | parser.add_argument( 48 | '-l', '--list', 49 | help='list segment files', 50 | action='store_true', 51 | default=False, 52 | ) 53 | parser.add_argument( 54 | 'target_dir', 55 | help='backup directory (target.dir)', 56 | default=os.getcwd(), 57 | ) 58 | return parser.parse_args() 59 | 60 | 61 | def collect_segments(target_dir): 62 | """ Collect segment partitions & offsets per directory (topic) 63 | 64 | Args: 65 | target_dir (str): Kafka-backup target.dir 66 | 67 | Returns: 68 | A dict mapping keys to topics. Each value is dict of partitions with 69 | list of offsets in it. For example 70 | 71 | topic1: { 72 | 000: [ 0000000000 ], 73 | }, 74 | topic2: { 75 | 000: [ 0000000021 ], 76 | 001: [ 0000000391 ], 77 | 002: [ 0000001291, 0000018423 ] 78 | }, 79 | ... 80 | 81 | """ 82 | 83 | # Make regex from pattern 84 | # Implemented this way to keep file pattern just in single place (filename_pattern() above) 85 | fregex = re.compile(r"^%s$" % (filename_pattern() % (r'(\d{3})', r'(\d{10})', 'records'))) 86 | res = {} 87 | # Traverse dirtree to collect offsets in partitions per topic 88 | for tdir, _, files in os.walk(target_dir): 89 | if tdir == target_dir: 90 | continue 91 | res[tdir] = {} 92 | for segfile in sorted(files): 93 | match = fregex.match(segfile) 94 | if match: 95 | (partition, offset) = match.groups() 96 | if partition not in res[tdir]: 97 | res[tdir][partition] = [] 98 | res[tdir][partition].append(offset) 99 | return res 100 | 101 | 102 | def process_segment(tdir, partition, offset, do_delete, do_list): 103 | """ Process segment 104 | 105 | Args: 106 | tdir (str): topic directory 107 | partition (str): topic partition 108 | offset (str): starting segment offset 109 | do_delete (bool): delete segment files? 110 | do_list (boot): list segment files? 111 | """ 112 | index_file = filename_pattern() % (partition, offset, 'index') 113 | records_file = filename_pattern() % (partition, offset, 'records') 114 | 115 | if do_delete or do_list: 116 | index_path = os.path.join(tdir, index_file) 117 | records_path = os.path.join(tdir, records_file) 118 | if do_list: 119 | print(index_path) 120 | print(records_path) 121 | if do_delete: 122 | os.unlink(index_path) 123 | os.unlink(records_path) 124 | else: 125 | print("Topic %s, First offset %s - Index file: %s Records File: %s" 126 | % (os.path.basename(tdir), offset, index_file, records_file)) 127 | 128 | 129 | def main(): 130 | """ int main(int argc, char **argv) """ 131 | args = parse_args() 132 | 133 | segs = collect_segments(args.target_dir) 134 | 135 | for tdir, seg_data in segs.items(): 136 | for partition, offsets in seg_data.items(): 137 | for i, offset in enumerate(offsets): 138 | # Perform action requested (list/delete/default) on the file 139 | # Skip (keep) last `args.keep` files (1 by default) 140 | # That one skipped by default file is usually incompleted so must be kept anyway 141 | if i < len(offsets) - args.keep: 142 | process_segment(tdir, partition, offset, args.delete, args.list) 143 | return 0 144 | 145 | 146 | if __name__ == '__main__': 147 | sys.exit(main()) 148 | -------------------------------------------------------------------------------- /bin/partition-index.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | java -cp "$( dirname "${BASH_SOURCE[0]}" )/kafka-backup.jar" de.azapps.kafkabackup.cli.PartitionIndexCLI "$@" -------------------------------------------------------------------------------- /bin/segment-index.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | java -cp "$( dirname "${BASH_SOURCE[0]}" )/kafka-backup.jar" de.azapps.kafkabackup.cli.SegmentIndexCLI "$@" -------------------------------------------------------------------------------- /bin/segment.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | java -cp "$( dirname "${BASH_SOURCE[0]}" )/kafka-backup.jar" de.azapps.kafkabackup.cli.SegmentCLI "$@" -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | buildscript { 2 | repositories { 3 | jcenter() 4 | maven { 5 | url "https://plugins.gradle.org/m2/" 6 | } 7 | } 8 | dependencies { 9 | classpath 'com.github.jengelman.gradle.plugins:shadow:4.0.2' 10 | classpath "io.codearte.gradle.nexus:gradle-nexus-staging-plugin:0.20.0" 11 | classpath "gradle.plugin.com.github.spotbugs.snom:spotbugs-gradle-plugin:4.3.0" 12 | } 13 | } 14 | 15 | apply plugin: 'java' 16 | apply plugin: 'idea' 17 | apply plugin: "com.github.spotbugs" 18 | 19 | description = "kafka-backup" 20 | group = 'de.azapps.kafkabackup' 21 | 22 | ext { 23 | pomHumanName = 'Kafka Backup' 24 | pomDesc = 'Kafka Backup Connector' 25 | } 26 | 27 | allprojects { 28 | sourceCompatibility = 1.8 29 | targetCompatibility = 1.8 30 | } 31 | 32 | repositories { 33 | mavenCentral() 34 | } 35 | 36 | dependencies { 37 | implementation "org.slf4j:slf4j-api:1.7.26" 38 | implementation group: 'org.apache.kafka', name: 'connect-api', version: '2.4.0' 39 | implementation group: 'org.apache.kafka', name: 'kafka-clients', version: '2.4.0' 40 | implementation group: 'com.fasterxml.jackson.core', name: 'jackson-core', version: '2.10.1' 41 | implementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: '2.10.1' 42 | implementation group: 'net.sf.jopt-simple', name: 'jopt-simple', version: '6.0-alpha-3' 43 | implementation 'com.github.spotbugs:spotbugs-annotations:4.0.1' 44 | testImplementation('org.junit.jupiter:junit-jupiter:5.6.0') 45 | } 46 | 47 | apply plugin: "com.github.johnrengelman.shadow" 48 | 49 | shadowJar { 50 | dependencies { 51 | } 52 | } 53 | 54 | task javadocJar(type: Jar, dependsOn: javadoc) { 55 | classifier = 'javadoc' 56 | from subprojects*.tasks.javadoc.destinationDir 57 | } 58 | 59 | task sourcesJar(type: Jar) { 60 | from subprojects*.sourceSets.main.allSource 61 | classifier = 'sources' 62 | } 63 | 64 | artifacts { 65 | archives javadocJar, sourcesJar 66 | } 67 | test { 68 | useJUnitPlatform() 69 | } -------------------------------------------------------------------------------- /docs/Comparing_Kafka_Backup_Solutions.md: -------------------------------------------------------------------------------- 1 | # Comparing Kafka Backup Solutions 2 | 3 | 4 | > **Update:** I am no longer maintaining the Kafka Backup project. As an alternative, I recommend [Kannika](https://kannika.io/?utm_source=github_anatoly), a commercial backup solution developed by my friends at [Cymo](https://cymo.eu/?utm_source=github_anatoly) (and don't forget to say hello from Anatoly 😊). 5 | > [Disclosure: I am a business partner of Cymo and may receive compensation for referrals to Kannika] 6 | > 7 | > Please contact me if you want to continue maintaining this project. 8 | 9 | Basically there are three other ways to backup and restore data 10 | from/to Kafka: 11 | 12 | ## File System Snapshots 13 | 14 | This was the easiest and most reliable way to backup data and consumer 15 | offsets from Kafka. The procedure basically shuts down one broker 16 | after another and performs a file system snapshot which is stored on 17 | another (cold) disk. 18 | 19 | **Backup Procedure:** 20 | 21 | * Repeat for each Kafka broker: 22 | 1. Shut down the broker 23 | 2. Take a snapshot of the Filesystem (optional) 24 | 3. Copy the snapshot (or simply the files) to the backup storage 25 | 4. Turn on the broker and wait until all partitions are in sync 26 | 27 | **Restore Procedure:** 28 | 29 | * Restore the snapshot for each broker 30 | * Boot the brokers 31 | 32 | **Advantages:** 33 | 34 | * Uses native OS tools 35 | * As this procedure needs to be done very often, the fear of shutting 36 | down a broker is minimized (especially for a team and environment 37 | with few Kafka expertise) 38 | * Offsets are backed up and restored correctly 39 | * Internal topics are backed up and restored correctly 40 | * Compacted messages are deleted too 41 | * Messages older than the retention time are deleted too 42 | * Uses cold storage 43 | 44 | **Disadvantages:** 45 | 46 | * Each message is backed up `replication factor`-times. Even if it 47 | enough to store it without replication. 48 | * Reduced availability as every broker needs to be turned of for a 49 | backup 50 | * Incremental Backups are harder to achieve (e.g. due to partition 51 | rebalancing) 52 | * **POTENTIAL DATA LOSS**: If the backup is performed during a 53 | partition rebalance (very likely when the backup takes a loooong 54 | time) the backup could miss a whole partition due to bad timing. 55 | 56 | 57 | ## Using Mirror Maker 2 to backup data to another Cluster 58 | 59 | The traditional Mirror Maker has many issues as discussed in 60 | [KIP-382](https://cwiki.apache.org/confluence/display/KAFKA/KIP-382%3A+MirrorMaker+2.0). Mirror 61 | Maker 2 approaches many of them and can be used to back up data from 62 | one cluster to another. 63 | 64 | Mirror Maker 2 is also (as `kafka-backup`) based on Kafka connect and 65 | copies consumer offsets too. 66 | 67 | **Backup Procedure A+B (normal setup):** 68 | 69 | * Set up the MM2 Connector that copies the data from the topic 70 | `[topic]` on the source cluster to the topic 71 | `[source-cluster-name].[topic]` on the cluster name. 72 | * Mirror Maker 2 ensures that the messages are copied continously 73 | offsets are also copied to a separate topic 74 | 75 | **Backup Procedure C (for consistent Snapshots):** 76 | 77 | * Set up the sink (backup) cluster with one broker 78 | * Set up the topics on the sink cluster with a replication factor of 79 | `1` 80 | * Set up MM2 to copy data from the source cluster to the sink cluster 81 | * Use a cronjob to shut down the sink cluster (with one broker) 82 | regularly and take a snapshot of the file system and store them on 83 | cold storage. 84 | 85 | **Restore Procedure A (Use other cluster):** 86 | 87 | * Use the offset sync topic to configure the consumer groups to 88 | consume from the correct offset. 89 | * Setup the consumers to use the other cluster. Throw away the old 90 | one. 91 | * Set up the clients to produce and consume from the new topics in the 92 | new cluster 93 | * Set up a new Backup Cluster 94 | 95 | **Restore Procedure B (Mirror data back):** 96 | 97 | * Create a new Kafka Cluster 98 | * Set up Mirror Maker 2 to copy the data to the new cluster 99 | * Continue with procedure A 100 | 101 | **Restore Procedure C (Mirror + Snapshot):** 102 | 103 | * Use Procedure B or restore a new cluster from the file system 104 | snapshots 105 | * Add more nodes accordingly 106 | * Increase the replication factor to match the requirements 107 | * Rebalance the partitions if needed 108 | * Continue with procedure A 109 | 110 | **Advantages:** 111 | 112 | * Support for warm cluster fail-over (active-active, active-passive) 113 | * Support for more advanced cluster topologies 114 | 115 | **Disadvantages:** 116 | 117 | * Requires a second Kafka Cluster 118 | * Apart from `C` this is a warm backup and does not protect from 119 | bugs in Kafka or the underlying OS 120 | * Requires custom implementation of the switch-over handling to the 121 | restored cluster 122 | * Adds a lot of complexity in the setup 123 | 124 | ## `kafka-connect-s3` 125 | 126 | `kafka-connect-s3` is a popular Kafka Connect connector to mirror the 127 | data from topics to Amazon S3 (or compatible other services like 128 | Minio). Zalando describes a setup in their article [Surviving Data 129 | Loss](https://jobs.zalando.com/tech/blog/backing-up-kafka-zookeeper/) 130 | 131 | **Backup procedure:** 132 | 133 | * Set up the sink connector to use your S3 endpoint 134 | * Set up another sink connector that backs up the `__consumer_offsets` topic. 135 | 136 | **Restore procedure:** 137 | 138 | * Set up the source connector to read the data from S3 into Kafka 139 | * Manually extract the new offset for the consumers and manually 140 | identify which offset on the new Kafka cluster matches the old 141 | one. (This is not a trivial task – you would need to count the ACK'd 142 | messages from the beginning to find out the exact offset – and not 143 | forgetting about compacted and deleted messages) 144 | 145 | **Advantages:** 146 | 147 | * Cold backup (to S3) 148 | * Possible to use in downstream services that work only with S3 (e.l. Data 149 | Warehouses) 150 | 151 | **Disadvantages:** 152 | 153 | * Supports only S3 (and compatible systems) as the storage backend 154 | * No support for restoring consumer offsets (the method described 155 | above could be described as guesstimating and will not work in many 156 | edge cases) 157 | 158 | ## `kafka-backup` 159 | 160 | `kafka-backup` is inspired heavily by the Mirror Maker 2 and 161 | `kafka-connect-s3`. It consists of a sink and a source connector both 162 | of which support the backup and restore of the topic data and also 163 | consumer offsets. 164 | 165 | **Backup Procedure:** 166 | 167 | * Set up the Kafka Backup Sink connector 168 | * Copy the backed up data to a backup storage of your choice 169 | * See [GitHub](http://github.com/azapps/kafka-backup) for more 170 | information of how to back up your Kafka Cluster 171 | 172 | **Restore Procedure** 173 | 174 | * Set up the Kafka Backup Source connector 175 | * Wait until it finished (see logs for information) 176 | * Use the restored cluster 177 | 178 | **Advantages:** 179 | 180 | * Only solution which is able to restore topic data and also consumer 181 | offsets 182 | * Only solution designed to take cold backups of Kafka 183 | * Simple to do incremental backups 184 | 185 | **Disadvantages:** 186 | 187 | * See [GitHub](http://github.com/azapps/kafka-backup) for the current 188 | maturity status of the project 189 | * Currently supports only the file system as the storage backend 190 | * Requires Kafka Connect binaries of Kafka 2.3 191 | -------------------------------------------------------------------------------- /docs/FAQ.md: -------------------------------------------------------------------------------- 1 | # How to restore to a different topic 2 | 3 | > I didn't see in the documentation if it's possible to be able to restore to a different destination topic, such as mybackupedtopic-restored. It would help with testing restore procedures without disturbing the existing topic, among other things. 4 | 5 | Simply rename the topic directories in the Backup target. 6 | 7 | # Restoring a multi-partition topic does not work 8 | 9 | > When I restore topic with 24 partitions it creates topic with one partitions and restore failed. 10 | > Restore successful if I create 24 partitions topic before restore. 11 | 12 | You need to create the topic manually before restore. For a "real" backup scenario you also need to backup and restore Zookeeper 13 | 14 | # Error "Plugin class loader for connector was not found" 15 | 16 | ```sh 17 | ERROR Plugin class loader for connector: 'de.azapps.kafkabackup.sink.BackupSinkConnector' was not found. Returning: org.apache.kafka.connect.runtime.isolation.DelegatingClassLoader@5b068087 (org.apache.kafka.connect.runtime.isolation.DelegatingClassLoader:165) 18 | ``` 19 | 20 | You forgot to build the jar file. Either get an official release of Kafka Backup or run `./gradlew shadowJar` in the root directory of Kafka Backup. -------------------------------------------------------------------------------- /docs/Tooling.md: -------------------------------------------------------------------------------- 1 | # Kafka Backup: Tooling 2 | 3 | Before you go, you need to add the `kafka-backup.jar` to your 4 | classpath: 5 | 6 | ```sh 7 | export CLASSPATH="./path/to/kafka-backup.jar:$CLASSPATH" 8 | ``` 9 | 10 | If you are in the root directory of Kafka Backup, you can use: 11 | 12 | ```sh 13 | export CLASSPATH="`pwd`/build/libs/kafka-backup.jar:$CLASSPATH" 14 | ``` 15 | 16 | ## SegmentCLI 17 | 18 | Basic usage: 19 | 20 | ```sh 21 | java de.azapps.kafkabackup.cli.SegmentCLI 22 | ``` 23 | 24 | ### List all records 25 | 26 | ```sh 27 | java de.azapps.kafkabackup.cli.SegmentCLI \ 28 | --list \ 29 | --segment /path/to/segment_partition_123_from_offset_0000000123_records 30 | ``` 31 | 32 | ### Show key and value of a specific offset in a segment 33 | 34 | ```sh 35 | java de.azapps.kafkabackup.cli.SegmentCLI --show --segment /path/to/segment_partition_123_from_offset_0000000123_records --offset 597 36 | ``` 37 | 38 | ### Formatting Options 39 | 40 | Using the `--formatter` option you can customize how the keys and 41 | values of the messages are formatted. The default is the 42 | `RawFormatter` which prints the bytes as they are (i.e. as characters 43 | to the console. 44 | 45 | Implemented options: 46 | 47 | * `de.azapps.kafkabackup.cli.formatters.RawFormatter` 48 | * `de.azapps.kafkabackup.cli.formatters.UTF8Formatter` 49 | * `de.azapps.kafkabackup.cli.formatters.Base64Formatter` 50 | 51 | Example: 52 | 53 | ```sh 54 | java de.azapps.kafkabackup.cli.SegmentCLI --list \ 55 | --segment /path/to/segment_partition_123_from_offset_0000000123_records \ 56 | --key-formatter de.azapps.kafkabackup.cli.formatters.Base64Formatter 57 | ``` 58 | ## SegmentIndexCLI 59 | 60 | The segment index is required for faster access to the records in the 61 | segment file. It also simplifies the implementation of the idempotent 62 | sink connector. The segment index does not need to be backed up, but 63 | must exist before performing a restore. 64 | 65 | ### List Index entries 66 | 67 | Displays information about the records referenced in the index. 68 | 69 | ```sh 70 | java de.azapps.kafkabackup.cli.SegmentIndexCLI --list \ 71 | --segment-index /path/to/segment_partition_123_from_offset_0000000123_records \ 72 | ``` 73 | 74 | ### Restore Index 75 | 76 | Given a record file, restores the segment index for that file. 77 | 78 | ```sh 79 | java de.azapps.kafkabackup.cli.SegmentIndexCLI --restore-index \ 80 | --segment /path/to/segment_partition_123_from_offset_0000000123_records 81 | ``` 82 | 83 | ### Restoring all Segment Indexes 84 | 85 | ```sh 86 | export TOPICDIR="/path/to/topicdir/" 87 | for f in "$TOPICDIR"/segment_partition_*_records ; do 88 | java de.azapps.kafkabackup.cli.SegmentIndexCLI --restore-index \ 89 | --segment $f 90 | done 91 | ``` 92 | 93 | ## PartitionIndexCLI 94 | 95 | The partition index contains the information about which offsets are 96 | located in which segment. This file too, does not need to be backed up 97 | but is required for restoration. 98 | 99 | It is totally ok to delete old segments that are not needed 100 | anymore. But it is crucial to restore the partition index afterwards. 101 | 102 | ### List Index entries 103 | 104 | ```sh 105 | java de.azapps.kafkabackup.cli.PartitionIndexCLI --list \ 106 | --partition-index /path/to/index_partition_123 107 | ``` 108 | 109 | ### Restore Partition Index 110 | 111 | ```sh 112 | java de.azapps.kafkabackup.cli.PartitionIndexCLI --restore \ 113 | --partition 0 \ 114 | --topic-dir /path/to/topicdir/ 115 | ``` 116 | 117 | #### Restore Indexes for all Partitions 118 | 119 | ```sh 120 | export NUM_PARTITIONS=9 121 | export TOPICDIR="/path/to/topicdir/" 122 | for i in {0..$(( $NUM_PARTITIONS - 1 ))} ; do 123 | java de.azapps.kafkabackup.cli.PartitionIndexCLI --restore --partition $i --topic-dir "$TOPICDIR" 124 | done 125 | ``` 126 | 127 | ## Completed segments processing 128 | 129 | You may want to process completed segment files. Let's say you have your 130 | `target.dir` backed up to cloud storage daily. So you don't need to keep all 131 | the files locally then. To save some space you may delete completed segment 132 | files. There is `bin/completed_segments.py` script for your convenience. 133 | 134 | To get some information on segment files just call script with path to your 135 | backup directory. 136 | 137 | ```sh 138 | completed_segments.py /path/to/target_dir 139 | ``` 140 | 141 | To delete completed segments use `-d` option. 142 | ```sh 143 | completed_segments.py -d /path/to/target_dir 144 | ``` 145 | 146 | You may keep last N completed segments by using `-k N` option. 147 | 148 | If you need more complex processing you may just list completed segment files 149 | and pass them for further processing. E.g. to keep last 2 segments and `shred` 150 | the rest run the following command. 151 | 152 | ```sh 153 | completed_segments.py -l -k 2 /path/to/target_dir | xargs shred -u 154 | ``` 155 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/itadventurer/kafka-backup/4692ffeaf2f314aa9ad0d7a2346e47f24ab2dc3d/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Sun Jun 07 12:49:55 CEST 2020 2 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.5-all.zip 3 | distributionBase=GRADLE_USER_HOME 4 | distributionPath=wrapper/dists 5 | zipStorePath=wrapper/dists 6 | zipStoreBase=GRADLE_USER_HOME 7 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Attempt to set APP_HOME 10 | # Resolve links: $0 may be a link 11 | PRG="$0" 12 | # Need this for relative symlinks. 13 | while [ -h "$PRG" ] ; do 14 | ls=`ls -ld "$PRG"` 15 | link=`expr "$ls" : '.*-> \(.*\)$'` 16 | if expr "$link" : '/.*' > /dev/null; then 17 | PRG="$link" 18 | else 19 | PRG=`dirname "$PRG"`"/$link" 20 | fi 21 | done 22 | SAVED="`pwd`" 23 | cd "`dirname \"$PRG\"`/" >/dev/null 24 | APP_HOME="`pwd -P`" 25 | cd "$SAVED" >/dev/null 26 | 27 | APP_NAME="Gradle" 28 | APP_BASE_NAME=`basename "$0"` 29 | 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 31 | DEFAULT_JVM_OPTS='"-Xmx64m"' 32 | 33 | # Use the maximum available, or set MAX_FD != -1 to use that value. 34 | MAX_FD="maximum" 35 | 36 | warn () { 37 | echo "$*" 38 | } 39 | 40 | die () { 41 | echo 42 | echo "$*" 43 | echo 44 | exit 1 45 | } 46 | 47 | # OS specific support (must be 'true' or 'false'). 48 | cygwin=false 49 | msys=false 50 | darwin=false 51 | nonstop=false 52 | case "`uname`" in 53 | CYGWIN* ) 54 | cygwin=true 55 | ;; 56 | Darwin* ) 57 | darwin=true 58 | ;; 59 | MINGW* ) 60 | msys=true 61 | ;; 62 | NONSTOP* ) 63 | nonstop=true 64 | ;; 65 | esac 66 | 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 68 | 69 | # Determine the Java command to use to start the JVM. 70 | if [ -n "$JAVA_HOME" ] ; then 71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 72 | # IBM's JDK on AIX uses strange locations for the executables 73 | JAVACMD="$JAVA_HOME/jre/sh/java" 74 | else 75 | JAVACMD="$JAVA_HOME/bin/java" 76 | fi 77 | if [ ! -x "$JAVACMD" ] ; then 78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 79 | 80 | Please set the JAVA_HOME variable in your environment to match the 81 | location of your Java installation." 82 | fi 83 | else 84 | JAVACMD="java" 85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 86 | 87 | Please set the JAVA_HOME variable in your environment to match the 88 | location of your Java installation." 89 | fi 90 | 91 | # Increase the maximum file descriptors if we can. 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 93 | MAX_FD_LIMIT=`ulimit -H -n` 94 | if [ $? -eq 0 ] ; then 95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 96 | MAX_FD="$MAX_FD_LIMIT" 97 | fi 98 | ulimit -n $MAX_FD 99 | if [ $? -ne 0 ] ; then 100 | warn "Could not set maximum file descriptor limit: $MAX_FD" 101 | fi 102 | else 103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 104 | fi 105 | fi 106 | 107 | # For Darwin, add options to specify how the application appears in the dock 108 | if $darwin; then 109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 110 | fi 111 | 112 | # For Cygwin, switch paths to Windows format before running java 113 | if $cygwin ; then 114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 116 | JAVACMD=`cygpath --unix "$JAVACMD"` 117 | 118 | # We build the pattern for arguments to be converted via cygpath 119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 120 | SEP="" 121 | for dir in $ROOTDIRSRAW ; do 122 | ROOTDIRS="$ROOTDIRS$SEP$dir" 123 | SEP="|" 124 | done 125 | OURCYGPATTERN="(^($ROOTDIRS))" 126 | # Add a user-defined pattern to the cygpath arguments 127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 129 | fi 130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 131 | i=0 132 | for arg in "$@" ; do 133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 135 | 136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 138 | else 139 | eval `echo args$i`="\"$arg\"" 140 | fi 141 | i=$((i+1)) 142 | done 143 | case $i in 144 | (0) set -- ;; 145 | (1) set -- "$args0" ;; 146 | (2) set -- "$args0" "$args1" ;; 147 | (3) set -- "$args0" "$args1" "$args2" ;; 148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 154 | esac 155 | fi 156 | 157 | # Escape application args 158 | save () { 159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 160 | echo " " 161 | } 162 | APP_ARGS=$(save "$@") 163 | 164 | # Collect all arguments for the java command, following the shell quoting and substitution rules 165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 166 | 167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 169 | cd "$(dirname "$0")" 170 | fi 171 | 172 | exec "$JAVACMD" "$@" 173 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS="-Xmx64m" 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | 53 | :win9xME_args 54 | @rem Slurp the command line arguments. 55 | set CMD_LINE_ARGS= 56 | set _SKIP=2 57 | 58 | :win9xME_args_slurp 59 | if "x%~1" == "x" goto execute 60 | 61 | set CMD_LINE_ARGS=%* 62 | 63 | :execute 64 | @rem Setup the command line 65 | 66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 67 | 68 | @rem Execute Gradle 69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 70 | 71 | :end 72 | @rem End local scope for the variables with windows NT shell 73 | if "%ERRORLEVEL%"=="0" goto mainEnd 74 | 75 | :fail 76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 77 | rem the _cmd.exe /c_ return code! 78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 79 | exit /b 1 80 | 81 | :mainEnd 82 | if "%OS%"=="Windows_NT" endlocal 83 | 84 | :omega 85 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'kafka-backup' -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/cli/PartitionIndexCLI.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.cli; 2 | 3 | import de.azapps.kafkabackup.common.partition.PartitionIndex; 4 | import de.azapps.kafkabackup.common.partition.PartitionIndexEntry; 5 | import de.azapps.kafkabackup.common.partition.PartitionIndexRestore; 6 | import joptsimple.OptionParser; 7 | import joptsimple.OptionSet; 8 | import org.apache.kafka.common.utils.Exit; 9 | 10 | import java.io.IOException; 11 | import java.nio.file.Paths; 12 | import java.util.Arrays; 13 | import java.util.stream.Stream; 14 | 15 | public class PartitionIndexCLI { 16 | private static final String CMD_LIST = "list"; 17 | private static final String CMD_RESTORE = "restore-index"; 18 | private static final String[] COMMANDS = {CMD_LIST, CMD_RESTORE}; 19 | private static final String ARG_PARTITION_INDEX = "partition-index"; 20 | private static final String ARG_TOPIC_DIR = "topic-dir"; 21 | private static final String ARG_PARTITION = "partition"; 22 | 23 | public static void main(String[] args) throws Exception { 24 | /* 25 | cli --list --partition-index [file] 26 | cli --restore-index --partition 0 --topic-dir [dir] 27 | // ideas for later 28 | cli --validate --partition-index [file] --partition 0 --topic-dir [dir] 29 | */ 30 | final OptionParser optionParser = new OptionParser(); 31 | // Commands 32 | optionParser.accepts(CMD_LIST); 33 | optionParser.accepts(CMD_RESTORE); 34 | // Arguments 35 | optionParser.accepts(ARG_PARTITION_INDEX) 36 | .requiredIf(CMD_LIST) 37 | .withRequiredArg().ofType(String.class); 38 | optionParser.accepts(ARG_TOPIC_DIR) 39 | .requiredIf(CMD_RESTORE) 40 | .withRequiredArg().ofType(String.class); 41 | optionParser.accepts(ARG_PARTITION) 42 | .requiredIf(CMD_RESTORE) 43 | .withRequiredArg().ofType(Integer.class); 44 | 45 | OptionSet options; 46 | try { 47 | options = optionParser.parse(args); 48 | if (Stream.of(COMMANDS).filter(options::has).count() != 1) { 49 | throw new Exception("Must contain exactly one of " + Arrays.toString(COMMANDS)); 50 | } 51 | } catch (Exception e) { 52 | System.err.println(e.getMessage()); 53 | optionParser.printHelpOn(System.err); 54 | Exit.exit(-1); 55 | return; 56 | } 57 | if (options.has(CMD_LIST)) { 58 | list((String) options.valueOf(ARG_PARTITION_INDEX)); 59 | } else if (options.has(CMD_RESTORE)) { 60 | restore((String) options.valueOf(ARG_TOPIC_DIR), (Integer) options.valueOf(ARG_PARTITION)); 61 | } else { 62 | optionParser.printHelpOn(System.err); 63 | } 64 | } 65 | 66 | private static void restore(String topicDir, int partition) throws PartitionIndex.IndexException, PartitionIndexRestore.RestoreException, IOException { 67 | PartitionIndexRestore restore = new PartitionIndexRestore(Paths.get(topicDir), partition); 68 | restore.restore(); 69 | } 70 | 71 | private static void list(String partitionIndexFileName) throws IOException, PartitionIndex.IndexException { 72 | System.out.println(partitionIndexFileName); 73 | PartitionIndex partitionIndex = new PartitionIndex(Paths.get(partitionIndexFileName)); 74 | for (PartitionIndexEntry entry : partitionIndex.index()) { 75 | System.out.println(String.format("File: %s StartOffset: %d", entry.filename(), entry.startOffset())); 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/cli/SegmentCLI.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.cli; 2 | 3 | import de.azapps.kafkabackup.cli.formatters.*; 4 | import de.azapps.kafkabackup.common.record.Record; 5 | import de.azapps.kafkabackup.common.segment.UnverifiedSegmentReader; 6 | import joptsimple.OptionParser; 7 | import joptsimple.OptionSet; 8 | import org.apache.kafka.common.utils.Exit; 9 | 10 | import java.io.EOFException; 11 | import java.io.IOException; 12 | import java.lang.reflect.InvocationTargetException; 13 | import java.nio.file.Paths; 14 | import java.util.Arrays; 15 | import java.util.stream.Stream; 16 | 17 | public class SegmentCLI { 18 | private static final String CMD_LIST = "list"; 19 | private static final String ARG_SEGMENT = "segment"; 20 | private static final String ARG_SEGMENT_HELP = "Segment file (of the form segment_partition_xxx_from_offset_xxxxxx_records)"; 21 | private static final String CMD_SHOW = "show"; 22 | private static final String ARG_OFFSET = "offset"; 23 | private static final String ARG_OFFSET_HELP = "The offset of the message to display"; 24 | private static final String CMD_LIST_HELP = "Lists all records in the segment. Just counting the value length – not displaying it."; 25 | private static final String CMD_SHOW_HELP = "Shows a specific record in the segment. Displays key and value"; 26 | private static final String[] COMMANDS = {CMD_LIST, CMD_SHOW}; 27 | private static final String ARG_KEY_FORMAT = "key-formatter"; 28 | private static final String ARG_KEY_FORMAT_HELP = "Which formatter to use to display the key (default: StringFormatter)"; 29 | private static final String ARG_VALUE_FORMAT = "value-formatter"; 30 | private static final String ARG_VALUE_FORMAT_HELP = "Which formatter to use to display the value (default: StringFormatter)"; 31 | 32 | public static void main(String[] args) throws IOException { 33 | /* 34 | cli --list --segment [file] 35 | cli --show --segment [file] --offset 0 36 | */ 37 | final OptionParser optionParser = new OptionParser(); 38 | optionParser.accepts(ARG_SEGMENT, ARG_SEGMENT_HELP).withRequiredArg().ofType(String.class); 39 | optionParser.accepts(CMD_LIST, CMD_LIST_HELP); 40 | optionParser.accepts(CMD_SHOW, CMD_SHOW_HELP); 41 | optionParser.accepts(ARG_OFFSET, ARG_OFFSET_HELP).requiredIf(CMD_SHOW).withRequiredArg().ofType(Long.class); 42 | optionParser.accepts(ARG_KEY_FORMAT, ARG_KEY_FORMAT_HELP).withRequiredArg().ofType(String.class) 43 | .defaultsTo(RawFormatter.class.getCanonicalName()); 44 | optionParser.accepts(ARG_VALUE_FORMAT, ARG_VALUE_FORMAT_HELP).withRequiredArg().ofType(String.class) 45 | .defaultsTo(RawFormatter.class.getCanonicalName()); 46 | 47 | 48 | OptionSet options; 49 | try { 50 | options = optionParser.parse(args); 51 | if (Stream.of(COMMANDS).filter(options::has).count() != 1) { 52 | throw new Exception("Must contain exactly one of " + Arrays.toString(COMMANDS)); 53 | } 54 | } catch (Exception e) { 55 | System.err.println(e.getMessage()); 56 | optionParser.printHelpOn(System.err); 57 | Exit.exit(-1); 58 | return; 59 | } 60 | 61 | String segmentIndexFileName = (String) options.valueOf(ARG_SEGMENT); 62 | if (!segmentIndexFileName.endsWith("_records")) { 63 | segmentIndexFileName += "_records"; 64 | } 65 | UnverifiedSegmentReader segmentReader = new UnverifiedSegmentReader(Paths.get(segmentIndexFileName)); 66 | 67 | ByteFormatter keyFormatter = (ByteFormatter) instanciateClass((String) options.valueOf(ARG_KEY_FORMAT)); 68 | ByteFormatter valueFormatter = (ByteFormatter) instanciateClass((String) options.valueOf(ARG_VALUE_FORMAT)); 69 | if (options.has(CMD_LIST)) { 70 | RecordFormatter formatter = new ListRecordFormatter(keyFormatter, valueFormatter); 71 | list(segmentReader, formatter); 72 | } else if (options.has(CMD_SHOW)) { 73 | RecordFormatter formatter = new DetailedRecordFormatter(keyFormatter, valueFormatter); 74 | show(segmentReader, formatter, (Long) options.valueOf(ARG_OFFSET)); 75 | } 76 | 77 | } 78 | 79 | private static Object instanciateClass(String name) { 80 | try { 81 | Class formatterClass = Class.forName(name); 82 | return formatterClass.getDeclaredConstructor().newInstance(); 83 | } catch (ClassNotFoundException | IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) { 84 | System.err.println("formatter must be a valid class"); 85 | Exit.exit(1); 86 | // impossible to reach 87 | throw new RuntimeException("…"); 88 | } 89 | } 90 | 91 | private static void show(UnverifiedSegmentReader segmentReader, RecordFormatter formatter, Long offset) { 92 | long maxOffset = -1; 93 | while (true) { 94 | try { 95 | Record record = segmentReader.read(); 96 | maxOffset = record.kafkaOffset(); 97 | if (record.kafkaOffset() == offset) { 98 | formatter.writeTo(record, System.out); 99 | Exit.exit(0); 100 | } 101 | } catch (EOFException e) { 102 | System.out.println("Did not found offset " + offset + " in file. Max offset is " + maxOffset); 103 | Exit.exit(-2); 104 | } catch (IOException e) { 105 | e.printStackTrace(); 106 | Exit.exit(-3); 107 | } 108 | } 109 | } 110 | 111 | private static void list(UnverifiedSegmentReader segmentReader, RecordFormatter formatter) { 112 | int cnt = 0; 113 | while (true) { 114 | try { 115 | Record record = segmentReader.read(); 116 | formatter.writeTo(record, System.out); 117 | cnt++; 118 | } catch (EOFException e) { 119 | break; 120 | } catch (IOException e) { 121 | e.printStackTrace(); 122 | Exit.exit(-2); 123 | } 124 | } 125 | System.out.println(String.format("%d entries in Segment", cnt)); 126 | 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/cli/SegmentIndexCLI.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.cli; 2 | 3 | import de.azapps.kafkabackup.common.segment.SegmentIndex; 4 | import de.azapps.kafkabackup.common.segment.SegmentIndexEntry; 5 | import de.azapps.kafkabackup.common.segment.SegmentIndexRestore; 6 | import joptsimple.OptionParser; 7 | import joptsimple.OptionSet; 8 | import org.apache.kafka.common.utils.Exit; 9 | 10 | import java.io.IOException; 11 | import java.nio.file.Paths; 12 | import java.util.Arrays; 13 | import java.util.List; 14 | import java.util.stream.Stream; 15 | 16 | public class SegmentIndexCLI { 17 | private static final String CMD_LIST = "list"; 18 | private static final String CMD_LIST_HELP = "List all Records in a segment"; 19 | private static final String ARG_SEGMENT_INDEX = "segment-index"; 20 | private static final String ARG_SEGMENT_INDEX_HELP = "Segment index file (of the form segment_partition_xxx_from_offset_xxxxx_index)"; 21 | private static final String CMD_RESTORE = "restore-index"; 22 | private static final String CMD_RESTORE_HELP = "Restores the segment index given the segment file"; 23 | private static final String ARG_SEGMENT = "segment"; 24 | private static final String ARG_SEGMENT_HELP = "Segment file (of the form segment_partition_xxx_from_offset_xxxxxx_records)"; 25 | private static final String[] COMMANDS = {CMD_LIST, CMD_RESTORE}; 26 | 27 | public static void main(String[] args) throws Exception { 28 | /* 29 | cli --list --segment-index [file] 30 | cli --restore-index --segment [file] 31 | // ideas for later 32 | cli --show --segment-index [file] --offset [offset] 33 | cli --validate --segment-index [file] --segment [file] 34 | */ 35 | final OptionParser optionParser = new OptionParser(); 36 | // Commands 37 | optionParser.accepts(CMD_LIST, CMD_LIST_HELP); 38 | optionParser.accepts(CMD_RESTORE, CMD_RESTORE_HELP); 39 | // Arguments 40 | optionParser.accepts(ARG_SEGMENT_INDEX, ARG_SEGMENT_INDEX_HELP) 41 | .requiredIf(CMD_LIST) 42 | .withRequiredArg().ofType(String.class); 43 | optionParser.accepts(ARG_SEGMENT, ARG_SEGMENT_HELP) 44 | .requiredIf(CMD_RESTORE) 45 | .withRequiredArg().ofType(String.class); 46 | 47 | OptionSet options; 48 | try { 49 | options = optionParser.parse(args); 50 | if (Stream.of(COMMANDS).filter(options::has).count() != 1) { 51 | throw new Exception("Must contain exactly one of " + Arrays.toString(COMMANDS)); 52 | } 53 | } catch (Exception e) { 54 | System.err.println(e.getMessage()); 55 | optionParser.printHelpOn(System.err); 56 | Exit.exit(-1); 57 | return; 58 | } 59 | 60 | if (options.has(CMD_LIST)) { 61 | list((String) options.valueOf(ARG_SEGMENT_INDEX)); 62 | } else if (options.has(CMD_RESTORE)) { 63 | restore((String) options.valueOf(ARG_SEGMENT)); 64 | } 65 | } 66 | 67 | private static void restore(String segmentFileName) throws SegmentIndex.IndexException, SegmentIndexRestore.RestoreException, IOException { 68 | if (!segmentFileName.endsWith("_records")) { 69 | segmentFileName += "_records"; 70 | } 71 | SegmentIndexRestore restore = new SegmentIndexRestore(Paths.get(segmentFileName)); 72 | restore.restore(); 73 | } 74 | 75 | private static void list(String segmentIndexFileName) throws IOException, SegmentIndex.IndexException { 76 | if (!segmentIndexFileName.endsWith("_index")) { 77 | segmentIndexFileName += "_index"; 78 | } 79 | SegmentIndex segmentIndex = new SegmentIndex(Paths.get(segmentIndexFileName)); 80 | List index = segmentIndex.index(); 81 | long previousOffset = index.get(0).getOffset() - 1; 82 | for (SegmentIndexEntry entry : index) { 83 | System.out.print(String.format("Offset: %d Position: %d Length: %d", entry.getOffset(), entry.recordFilePosition(), entry.recordByteLength())); 84 | if (entry.getOffset() > previousOffset + 1) { 85 | System.out.print(" <- FYI Here is a gap"); 86 | } 87 | System.out.println(); 88 | previousOffset = entry.getOffset(); 89 | } 90 | System.out.println(String.format("%d entries in Index", index.size())); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/cli/formatters/Base64Formatter.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.cli.formatters; 2 | 3 | import java.util.Base64; 4 | 5 | public class Base64Formatter implements ByteFormatter { 6 | @Override 7 | public String toString(byte[] in) { 8 | Base64.Encoder encoder = Base64.getEncoder(); 9 | return encoder.encodeToString(in); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/cli/formatters/ByteFormatter.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.cli.formatters; 2 | 3 | public interface ByteFormatter { 4 | String toString(byte[] in); 5 | } 6 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/cli/formatters/DetailedRecordFormatter.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.cli.formatters; 2 | 3 | import de.azapps.kafkabackup.common.record.Record; 4 | 5 | import java.io.PrintStream; 6 | 7 | public class DetailedRecordFormatter extends RecordFormatter { 8 | 9 | public DetailedRecordFormatter(ByteFormatter keyFormatter, ByteFormatter valueFormatter) { 10 | super(keyFormatter, valueFormatter); 11 | } 12 | 13 | @Override 14 | public void writeTo(Record record, PrintStream outputStream) { 15 | outputStream.println(keyFormatter.toString(record.key()) 16 | + ", " 17 | + valueFormatter.toString(record.value())); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/cli/formatters/ListRecordFormatter.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.cli.formatters; 2 | 3 | import de.azapps.kafkabackup.common.record.Record; 4 | 5 | import java.io.PrintStream; 6 | import java.text.DateFormat; 7 | import java.text.SimpleDateFormat; 8 | 9 | public class ListRecordFormatter extends RecordFormatter { 10 | private final DateFormat timestampFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 11 | 12 | public ListRecordFormatter(ByteFormatter keyFormatter, ByteFormatter valueFormatter) { 13 | super(keyFormatter, valueFormatter); 14 | } 15 | 16 | @Override 17 | public void writeTo(Record record, PrintStream outputStream) { 18 | String offset = "Offset: " + record.kafkaOffset(); 19 | String key; 20 | if (record.key() == null) { 21 | key = "NULL Key"; 22 | } else { 23 | key = "Key: " + keyFormatter.toString(record.key()); 24 | } 25 | String timestamp = "Timestamp: "; 26 | System.out.println(record); 27 | 28 | switch (record.timestampType()) { 29 | case NO_TIMESTAMP_TYPE: 30 | timestamp += "No Timestamp"; 31 | break; 32 | case CREATE_TIME: 33 | timestamp += "(create)"; 34 | timestamp += timestampFormat.format(record.timestamp()); 35 | break; 36 | case LOG_APPEND_TIME: 37 | timestamp += "(log append)"; 38 | timestamp += timestampFormat.format(record.timestamp()); 39 | break; 40 | } 41 | String data_length; 42 | if (record.value() == null) { 43 | data_length = "NULL Value"; 44 | } else { 45 | data_length = "Data: " + valueFormatter.toString(record.value()); 46 | } 47 | 48 | outputStream.println(offset + " " + key + " " + timestamp + " " + data_length); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/cli/formatters/RawFormatter.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.cli.formatters; 2 | 3 | import java.nio.charset.StandardCharsets; 4 | 5 | public class RawFormatter implements ByteFormatter { 6 | @Override 7 | public String toString(byte[] in) { 8 | return new String(in, StandardCharsets.UTF_8); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/cli/formatters/RecordFormatter.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.cli.formatters; 2 | 3 | import de.azapps.kafkabackup.common.record.Record; 4 | 5 | import java.io.PrintStream; 6 | 7 | public abstract class RecordFormatter { 8 | ByteFormatter keyFormatter; 9 | ByteFormatter valueFormatter; 10 | 11 | RecordFormatter(ByteFormatter keyFormatter, ByteFormatter valueFormatter) { 12 | this.keyFormatter = keyFormatter; 13 | this.valueFormatter = valueFormatter; 14 | } 15 | 16 | public abstract void writeTo(Record record, PrintStream outputStream); 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/cli/formatters/UTF8Formatter.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.cli.formatters; 2 | 3 | import java.nio.charset.StandardCharsets; 4 | 5 | public class UTF8Formatter implements ByteFormatter { 6 | @Override 7 | public String toString(byte[] in) { 8 | return new String(in, StandardCharsets.UTF_8); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/BackupConfig.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common; 2 | 3 | import org.apache.kafka.common.config.AbstractConfig; 4 | import org.apache.kafka.common.config.ConfigDef; 5 | 6 | import java.util.Map; 7 | 8 | public abstract class BackupConfig extends AbstractConfig { 9 | public static final String CLUSTER_PREFIX = "cluster."; 10 | public static final String CLUSTER_BOOTSTRAP_SERVERS = CLUSTER_PREFIX + "bootstrap.servers"; 11 | public static final String KEY_CONVERTER = "key.converter"; 12 | public static final String VALUE_CONVERTER = "value.converter"; 13 | public static final String HEADER_CONVERTER = "header.converter"; 14 | public static final String MANDATORY_CONVERTER = "org.apache.kafka.connect.converters.ByteArrayConverter"; 15 | 16 | public BackupConfig(ConfigDef configDef, Map props) { 17 | super(configDef, props); 18 | if (!props.containsKey(CLUSTER_BOOTSTRAP_SERVERS)) { 19 | throw new RuntimeException("Missing Configuration Variable: " + CLUSTER_BOOTSTRAP_SERVERS); 20 | } 21 | 22 | if(!props.containsKey(KEY_CONVERTER) || ! props.get(KEY_CONVERTER).equals(MANDATORY_CONVERTER)) { 23 | throw new RuntimeException(KEY_CONVERTER + " must be set and must equal " + MANDATORY_CONVERTER); 24 | } 25 | 26 | if(!props.containsKey(VALUE_CONVERTER) || !props.get(VALUE_CONVERTER).equals(MANDATORY_CONVERTER)) { 27 | throw new RuntimeException(VALUE_CONVERTER + " must be set and must equal " + MANDATORY_CONVERTER); 28 | } 29 | 30 | if(!props.containsKey(HEADER_CONVERTER) || !props.get(HEADER_CONVERTER).equals(MANDATORY_CONVERTER)) { 31 | throw new RuntimeException(HEADER_CONVERTER + " must be set and must equal " + MANDATORY_CONVERTER); 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/offset/EndOffsetReader.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.offset; 2 | 3 | import org.apache.kafka.clients.consumer.KafkaConsumer; 4 | import org.apache.kafka.common.TopicPartition; 5 | import org.apache.kafka.common.serialization.ByteArrayDeserializer; 6 | 7 | import java.util.*; 8 | 9 | public class EndOffsetReader { 10 | private final Map consumerConfig; 11 | 12 | public EndOffsetReader(Map consumerConfig) { 13 | this.consumerConfig = consumerConfig; 14 | } 15 | 16 | /** 17 | * Obtain end offsets for each given partition 18 | */ 19 | public Map getEndOffsets(Collection partitions) { 20 | Map serializerConfig = new HashMap<>(consumerConfig); 21 | serializerConfig.put("key.deserializer", ByteArrayDeserializer.class.getName()); 22 | serializerConfig.put("value.deserializer", ByteArrayDeserializer.class.getName()); 23 | try (KafkaConsumer consumer = new KafkaConsumer<>(serializerConfig)) { 24 | consumer.assign(partitions); 25 | 26 | Map offsets = consumer.endOffsets(partitions); 27 | List toRemove = new ArrayList<>(); 28 | 29 | for (Map.Entry partitionOffset : offsets.entrySet()) { 30 | if (partitionOffset.getValue() == 0L) { 31 | toRemove.add(partitionOffset.getKey()); // don't store empty offsets 32 | } 33 | } 34 | 35 | for (TopicPartition partition : toRemove) { 36 | offsets.remove(partition); 37 | } 38 | 39 | return offsets; 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/offset/OffsetSink.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.offset; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import org.apache.kafka.clients.admin.AdminClient; 5 | import org.apache.kafka.clients.admin.ConsumerGroupListing; 6 | import org.apache.kafka.clients.consumer.OffsetAndMetadata; 7 | import org.apache.kafka.common.TopicPartition; 8 | import org.apache.kafka.connect.errors.RetriableException; 9 | 10 | import java.io.IOException; 11 | import java.nio.file.Files; 12 | import java.nio.file.Path; 13 | import java.nio.file.Paths; 14 | import java.util.ArrayList; 15 | import java.util.HashMap; 16 | import java.util.List; 17 | import java.util.Map; 18 | import java.util.concurrent.ExecutionException; 19 | import java.util.stream.Collectors; 20 | 21 | public class OffsetSink { 22 | private final Path targetDir; 23 | private final Map topicOffsets = new HashMap<>(); 24 | private List consumerGroups = new ArrayList<>(); 25 | private final AdminClient adminClient; 26 | 27 | public OffsetSink(AdminClient adminClient, Path targetDir) { 28 | this.adminClient = adminClient; 29 | this.targetDir = targetDir; 30 | } 31 | 32 | public void syncConsumerGroups() { 33 | try { 34 | consumerGroups = adminClient.listConsumerGroups().all().get().stream().map(ConsumerGroupListing::groupId).collect(Collectors.toList()); 35 | } catch (InterruptedException | ExecutionException e) { 36 | throw new RetriableException(e); 37 | } 38 | } 39 | 40 | public void syncOffsets() throws IOException { 41 | boolean error = false; 42 | for (String consumerGroup : consumerGroups) { 43 | try { 44 | syncOffsetsForGroup(consumerGroup); 45 | } catch (IOException e) { 46 | e.printStackTrace(); 47 | error = true; 48 | } 49 | } 50 | if (error) { 51 | throw new IOException("syncOffsets() threw an IOException"); 52 | } 53 | } 54 | 55 | private void syncOffsetsForGroup(String consumerGroup) throws IOException { 56 | Map topicOffsetsAndMetadata; 57 | try { 58 | topicOffsetsAndMetadata = adminClient.listConsumerGroupOffsets(consumerGroup).partitionsToOffsetAndMetadata().get(); 59 | } catch (InterruptedException | ExecutionException e) { 60 | throw new RetriableException(e); 61 | } 62 | for (Map.Entry entry : topicOffsetsAndMetadata.entrySet()) { 63 | TopicPartition tp = entry.getKey(); 64 | OffsetAndMetadata offsetAndMetadata = entry.getValue(); 65 | 66 | if (validTopic(tp.topic())) { 67 | if (!this.topicOffsets.containsKey(tp)) { 68 | this.topicOffsets.put(tp, new OffsetStoreFile(targetDir, tp)); 69 | } 70 | OffsetStoreFile offsets = this.topicOffsets.get(tp); 71 | offsets.put(consumerGroup, offsetAndMetadata.offset()); 72 | } 73 | } 74 | } 75 | 76 | private boolean validTopic(String topic) { 77 | return Files.isDirectory(Paths.get(targetDir.toString(), topic)); 78 | } 79 | 80 | public void flush() throws IOException { 81 | boolean error = false; 82 | for (OffsetStoreFile offsetStoreFile : topicOffsets.values()) { 83 | try { 84 | offsetStoreFile.flush(); 85 | } catch (IOException e) { 86 | e.printStackTrace(); 87 | error = true; 88 | } 89 | } 90 | if (error) { 91 | throw new IOException("syncOffsets() threw an IOException"); 92 | } 93 | } 94 | 95 | public void close() throws IOException { 96 | flush(); 97 | } 98 | 99 | private static class OffsetStoreFile { 100 | private Map groupOffsets = new HashMap<>(); 101 | 102 | private final ObjectMapper mapper = new ObjectMapper(); 103 | private final Path storeFile; 104 | 105 | OffsetStoreFile(Path targetDir, TopicPartition topicPartition) throws IOException { 106 | storeFile = OffsetUtils.offsetStoreFile(targetDir, topicPartition); 107 | if (!Files.isRegularFile(storeFile)) { 108 | Files.createFile(storeFile); 109 | } 110 | if (Files.size(storeFile) > 0) { 111 | groupOffsets = mapper.readValue(storeFile.toFile(), Map.class); 112 | } 113 | } 114 | 115 | void put(String consumerGroup, long offset) { 116 | groupOffsets.put(consumerGroup, offset); 117 | } 118 | 119 | void flush() throws IOException { 120 | mapper.writeValue(storeFile.toFile(), groupOffsets); 121 | } 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/offset/OffsetSource.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.offset; 2 | 3 | import com.fasterxml.jackson.core.type.TypeReference; 4 | import com.fasterxml.jackson.databind.ObjectMapper; 5 | import org.apache.kafka.clients.consumer.Consumer; 6 | import org.apache.kafka.clients.consumer.KafkaConsumer; 7 | import org.apache.kafka.clients.consumer.OffsetAndMetadata; 8 | import org.apache.kafka.common.TopicPartition; 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | import java.io.IOException; 13 | import java.nio.file.Files; 14 | import java.nio.file.Path; 15 | import java.nio.file.Paths; 16 | import java.util.*; 17 | import java.util.stream.Collectors; 18 | 19 | public class OffsetSource { 20 | private static final Logger log = LoggerFactory.getLogger(OffsetSource.class); 21 | private final Map topicOffsets = new HashMap<>(); 22 | private final Map consumerConfig; 23 | 24 | public OffsetSource(Path backupDir, List topics, Map consumerConfig) throws IOException { 25 | this.consumerConfig = consumerConfig; 26 | for (String topic : topics) { 27 | findOffsetStores(backupDir, topic); 28 | } 29 | } 30 | 31 | private void findOffsetStores(Path backupDir, String topic) throws IOException { 32 | Path topicDir = Paths.get(backupDir.toString(), topic); 33 | for (Path f : Files.list(topicDir).collect(Collectors.toList())) { 34 | Optional partition = OffsetUtils.isOffsetStoreFile(f); 35 | if (partition.isPresent()) { 36 | TopicPartition topicPartition = new TopicPartition(topic, partition.get()); 37 | topicOffsets.put(topicPartition, new OffsetStoreFile(f)); 38 | } 39 | } 40 | } 41 | 42 | public void syncGroupForOffset(TopicPartition topicPartition, long sourceOffset, long targetOffset) { 43 | OffsetStoreFile offsetStoreFile = topicOffsets.get(topicPartition); 44 | // __consumer_offsets contains the offset of the message to read next. So we need to search for the offset + 1 45 | // if we do not do that we might miss 46 | List groups = offsetStoreFile.groupForOffset(sourceOffset + 1); 47 | if (groups != null && groups.size() > 0) { 48 | for (String group : groups) { 49 | Map groupConsumerConfig = new HashMap<>(consumerConfig); 50 | groupConsumerConfig.put("group.id", group); 51 | Consumer consumer = new KafkaConsumer<>(groupConsumerConfig); 52 | consumer.assign(Collections.singletonList(topicPartition)); 53 | // ! Target Offset + 1 as we commit the offset of the "next message to read" 54 | OffsetAndMetadata offsetAndMetadata = new OffsetAndMetadata(targetOffset + 1); 55 | Map offsets = Collections.singletonMap(topicPartition, offsetAndMetadata); 56 | consumer.commitSync(offsets); 57 | consumer.close(); 58 | log.debug("Committed target offset {} for group {} for topic {} partition {}", 59 | (targetOffset + 1), group, topicPartition.topic(), topicPartition.partition()); 60 | } 61 | } 62 | } 63 | 64 | private static class OffsetStoreFile { 65 | TypeReference> typeRef 66 | = new TypeReference>() { 67 | }; 68 | private final Map> offsetGroups = new HashMap<>(); 69 | 70 | OffsetStoreFile(Path storeFile) throws IOException { 71 | ObjectMapper mapper = new ObjectMapper(); 72 | Map groupOffsets = mapper.readValue(storeFile.toFile(), typeRef); 73 | for (Map.Entry entry : groupOffsets.entrySet()) { 74 | String group = entry.getKey(); 75 | Long offset = entry.getValue(); 76 | 77 | if (offsetGroups.containsKey(offset)) { 78 | List groups = offsetGroups.get(offset); 79 | groups.add(group); 80 | } else { 81 | List groups = new ArrayList<>(1); 82 | groups.add(group); 83 | offsetGroups.put(offset, groups); 84 | } 85 | } 86 | } 87 | 88 | List groupForOffset(Long offset) { 89 | return offsetGroups.get(offset); 90 | } 91 | } 92 | 93 | } 94 | 95 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/offset/OffsetUtils.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.offset; 2 | 3 | import org.apache.kafka.common.TopicPartition; 4 | 5 | import java.nio.file.Path; 6 | import java.nio.file.Paths; 7 | import java.util.Optional; 8 | import java.util.regex.Matcher; 9 | import java.util.regex.Pattern; 10 | 11 | class OffsetUtils { 12 | 13 | private static final String OFFSET_STORE_FILE_PREFIX = "consumer_offsets_partition"; 14 | private static final Pattern FILE_PATTERN = Pattern.compile("consumer_offsets_partition_([0-9]+)"); 15 | 16 | static String offsetStoreFileName(int partition) { 17 | return String.format(OFFSET_STORE_FILE_PREFIX + "_%03d", partition); 18 | } 19 | 20 | static Path offsetStoreFile(Path backupDir, TopicPartition topicPartition) { 21 | return Paths.get(backupDir.toString(), topicPartition.topic(), OffsetUtils.offsetStoreFileName(topicPartition.partition())); 22 | } 23 | 24 | static Optional isOffsetStoreFile(Path f) { 25 | Path fpath = f.getFileName(); 26 | if (fpath == null) { 27 | return Optional.empty(); 28 | } 29 | String fname = fpath.toString(); 30 | Matcher m = FILE_PATTERN.matcher(fname); 31 | if (m.find()) { 32 | String partitionStr = m.group(1); 33 | return Optional.of(Integer.valueOf(partitionStr)); 34 | } else { 35 | return Optional.empty(); 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/partition/PartitionIndex.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.partition; 2 | 3 | import java.io.EOFException; 4 | import java.io.FileInputStream; 5 | import java.io.FileOutputStream; 6 | import java.io.IOException; 7 | import java.nio.file.Files; 8 | import java.nio.file.Path; 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | import java.util.Optional; 12 | 13 | public class PartitionIndex { 14 | private static final byte V1_MAGIC_BYTE = 0x01; 15 | private Path indexFile; 16 | private List index = new ArrayList<>(); 17 | private FileOutputStream fileOutputStream; 18 | private FileInputStream fileInputStream; 19 | private int position = 0; 20 | private long latestStartOffset = -1; 21 | 22 | public PartitionIndex(Path indexFile) throws IOException, IndexException { 23 | this.indexFile = indexFile; 24 | initFile(); 25 | while (true) { 26 | try { 27 | PartitionIndexEntry partitionIndexEntry = PartitionIndexEntry.fromStream(fileInputStream); 28 | if (partitionIndexEntry.startOffset() <= latestStartOffset) { 29 | throw new IndexException("Offsets must be always increasing! There is something terribly wrong in your index " + indexFile + "! Got " + partitionIndexEntry.startOffset() + " expected an offset larger than " + latestStartOffset); 30 | } 31 | index.add(partitionIndexEntry); 32 | latestStartOffset = partitionIndexEntry.startOffset(); 33 | } catch (EOFException e) { 34 | // reached End of File 35 | break; 36 | } 37 | } 38 | } 39 | 40 | private void initFile() throws IOException, IndexException { 41 | if (!Files.isRegularFile(indexFile)) { 42 | Files.createFile(indexFile); 43 | fileOutputStream = new FileOutputStream(indexFile.toFile()); 44 | fileOutputStream.write(V1_MAGIC_BYTE); 45 | } else { 46 | fileOutputStream = new FileOutputStream(indexFile.toFile(), true); 47 | } 48 | this.fileInputStream = new FileInputStream(indexFile.toFile()); 49 | fileInputStream.getChannel().position(0); 50 | byte[] v1Validation = new byte[1]; 51 | if (fileInputStream.read(v1Validation) != 1 || v1Validation[0] != V1_MAGIC_BYTE) { 52 | throw new IndexException("Cannot validate Magic Byte in the beginning of the index " + indexFile); 53 | } 54 | } 55 | 56 | void appendSegment(String segmentFile, long startOffset) throws IOException, IndexException { 57 | if (startOffset <= latestStartOffset) { 58 | throw new IndexException("Offsets must be always increasing! There is something terribly wrong in your index " + indexFile + "! Got " + startOffset + " expected an offset larger than " + latestStartOffset); 59 | } 60 | PartitionIndexEntry indexEntry = new PartitionIndexEntry(fileOutputStream, segmentFile, startOffset); 61 | index.add(indexEntry); 62 | latestStartOffset = startOffset; 63 | } 64 | 65 | Optional latestSegmentFile() { 66 | if (index.isEmpty()) { 67 | return Optional.empty(); 68 | } else { 69 | return Optional.of(index.get(index.size() - 1)); 70 | } 71 | } 72 | 73 | long latestStartOffset() { 74 | return latestStartOffset; 75 | } 76 | 77 | void close() throws IOException { 78 | fileInputStream.close(); 79 | fileOutputStream.close(); 80 | } 81 | 82 | void flush() throws IOException { 83 | fileOutputStream.flush(); 84 | } 85 | 86 | long firstOffset() throws IndexException { 87 | if (index.size() == 0) { 88 | throw new PartitionIndex.IndexException("Partition Index is empty. Something is wrong with your partition index. Try to rebuild the index " + indexFile); 89 | } 90 | return index.get(0).startOffset(); 91 | } 92 | 93 | void seek(long offset) throws PartitionIndex.IndexException { 94 | int previousPosition = -1; 95 | // Iterate the index after the last element 96 | // Such that we can seek to an offset in the last index entry 97 | for (int i = 0; i <= index.size(); i++) { 98 | if (i == index.size()) { 99 | // Offset must be in the last index entry 100 | position = previousPosition; 101 | } else { 102 | PartitionIndexEntry current = index.get(i); 103 | if (current.startOffset() > offset) { 104 | if (previousPosition >= 0) { 105 | position = previousPosition; 106 | // 107 | return; 108 | } else { 109 | throw new PartitionIndex.IndexException("No Index file found matching the target offset in partition index " + indexFile + ". Search for offset " + offset + ", smallest offset in index: " + current.startOffset()); 110 | } 111 | } else { 112 | previousPosition = i; 113 | } 114 | } 115 | } 116 | } 117 | 118 | boolean hasMoreData() { 119 | return position < index.size(); 120 | } 121 | 122 | String readFileName() { 123 | String fileName = index.get(position).filename(); 124 | position++; 125 | // allow the cursor to be one after the index size. 126 | // This way we can detect easier when we reached the end of the index 127 | if (position > index.size()) { 128 | throw new IndexOutOfBoundsException("Index " + indexFile + " out of bound"); 129 | } 130 | return fileName; 131 | } 132 | 133 | public List index() { 134 | return index; 135 | } 136 | 137 | public static class IndexException extends Exception { 138 | IndexException(String message) { 139 | super(message); 140 | } 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/partition/PartitionIndexEntry.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.partition; 2 | 3 | import java.io.*; 4 | import java.nio.charset.StandardCharsets; 5 | import java.util.Objects; 6 | 7 | /** 8 | * Format: 9 | * fileNameLength: int32 10 | * fileName: UTF8-String[fileNameLength] 11 | * startOffset: int64 12 | * [endOffset: int64] 13 | */ 14 | public class PartitionIndexEntry { 15 | private final String filename; 16 | private final long startOffset; 17 | 18 | PartitionIndexEntry(OutputStream byteStream, String filename, long startOffset) throws IOException { 19 | this.filename = filename; 20 | this.startOffset = startOffset; 21 | DataOutputStream stream = new DataOutputStream(byteStream); 22 | byte[] filenameBytes = filename.getBytes(StandardCharsets.UTF_8); 23 | stream.writeInt(filenameBytes.length); 24 | stream.write(filenameBytes); 25 | stream.writeLong(startOffset); 26 | } 27 | 28 | PartitionIndexEntry(String filename, long startOffset) { 29 | this.filename = filename; 30 | this.startOffset = startOffset; 31 | } 32 | 33 | static PartitionIndexEntry fromStream(InputStream byteStream) throws IOException { 34 | DataInputStream stream = new DataInputStream(byteStream); 35 | int filenameLength = stream.readInt(); 36 | byte[] filenameBytes = new byte[filenameLength]; 37 | int readBytes = stream.read(filenameBytes); 38 | if (readBytes != filenameLength) { 39 | throw new IOException(String.format("Expected to read %d bytes, got %d", filenameLength, readBytes)); 40 | } 41 | String filename = new String(filenameBytes, StandardCharsets.UTF_8); 42 | long startOffset = stream.readLong(); 43 | return new PartitionIndexEntry(filename, startOffset); 44 | } 45 | 46 | public long startOffset() { 47 | return startOffset; 48 | } 49 | 50 | public String filename() { 51 | return filename; 52 | } 53 | 54 | @Override 55 | public int hashCode() { 56 | return Objects.hash(filename, startOffset); 57 | } 58 | 59 | @Override 60 | public boolean equals(Object o) { 61 | if (this == o) 62 | return true; 63 | if (o == null || getClass() != o.getClass()) 64 | return false; 65 | 66 | PartitionIndexEntry that = (PartitionIndexEntry) o; 67 | 68 | return Objects.equals(filename(), that.filename()) 69 | && Objects.equals(startOffset(), that.startOffset()); 70 | } 71 | 72 | @Override 73 | public String toString() { 74 | return String.format("PartitionIndexEntry{filename: %s, startOffset: %d}", 75 | filename, startOffset); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/partition/PartitionIndexRestore.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.partition; 2 | 3 | import de.azapps.kafkabackup.common.segment.SegmentUtils; 4 | 5 | import java.io.IOException; 6 | import java.nio.file.Files; 7 | import java.nio.file.Path; 8 | 9 | public class PartitionIndexRestore { 10 | private final Path indexFile; 11 | private final int partition; 12 | PartitionIndex index; 13 | Path topicDir; 14 | 15 | public PartitionIndexRestore(Path topicDir, int partition) throws RestoreException, IOException, PartitionIndex.IndexException { 16 | this.topicDir = topicDir; 17 | this.indexFile = PartitionUtils.indexFile(topicDir, partition); 18 | this.partition = partition; 19 | 20 | if (Files.isRegularFile(indexFile)) { 21 | throw new RestoreException("Index file " + indexFile + " must not exist"); 22 | } 23 | index = new PartitionIndex(indexFile); 24 | if (!Files.isDirectory(topicDir)) { 25 | throw new RuntimeException("Topic directory " + topicDir + " does not exist"); 26 | } 27 | } 28 | 29 | public void restore() throws IOException { 30 | Files.list(topicDir) 31 | .filter(x -> SegmentUtils.isSegment(x) 32 | && SegmentUtils.getPartitionFromSegment(x) == partition) 33 | .sorted() 34 | .forEach((Path f) -> { 35 | 36 | long offset = SegmentUtils.getStartOffsetFromSegment(f); 37 | try { 38 | index.appendSegment(SegmentUtils.filePrefix(partition, offset), offset); 39 | } catch (IOException | PartitionIndex.IndexException e) { 40 | throw new RuntimeException(e); 41 | } 42 | }); 43 | index.flush(); 44 | index.close(); 45 | } 46 | 47 | 48 | public static class RestoreException extends Exception { 49 | RestoreException(String message) { 50 | super(message); 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/partition/PartitionReader.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.partition; 2 | 3 | import de.azapps.kafkabackup.common.record.Record; 4 | import de.azapps.kafkabackup.common.segment.SegmentIndex; 5 | import de.azapps.kafkabackup.common.segment.SegmentReader; 6 | 7 | import java.io.IOException; 8 | import java.nio.file.Files; 9 | import java.nio.file.Path; 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | 13 | public class PartitionReader { 14 | private final String topic; 15 | private final int partition; 16 | private final Path topicDir; 17 | private SegmentReader currentSegment; 18 | private final PartitionIndex partitionIndex; 19 | 20 | public PartitionReader(String topic, int partition, Path topicDir) throws IOException, PartitionIndex.IndexException, PartitionException, SegmentIndex.IndexException { 21 | this.topic = topic; 22 | this.partition = partition; 23 | this.topicDir = topicDir; 24 | Path indexFile = PartitionUtils.indexFile(topicDir, partition); 25 | if (!Files.isDirectory(this.topicDir)) { 26 | throw new PartitionException("Cannot find topic directory for topic " + topic); 27 | } 28 | if (!Files.isRegularFile(indexFile)) { 29 | throw new PartitionException("Cannot find index file for partition " + partition); 30 | } 31 | partitionIndex = new PartitionIndex(indexFile); 32 | if (partitionIndex.hasMoreData()) { 33 | seek(partitionIndex.firstOffset()); 34 | } 35 | } 36 | 37 | public void close() throws IOException { 38 | partitionIndex.close(); 39 | if (currentSegment != null) { 40 | currentSegment.close(); 41 | } 42 | } 43 | 44 | public void seek(long offset) throws PartitionIndex.IndexException, IOException, SegmentIndex.IndexException, IndexOutOfBoundsException { 45 | partitionIndex.seek(offset); 46 | String segmentFilePrefix = partitionIndex.readFileName(); 47 | currentSegment = new SegmentReader(topic, partition, topicDir, segmentFilePrefix); 48 | currentSegment.seek(offset); 49 | } 50 | 51 | public boolean hasMoreData() throws IOException { 52 | if (currentSegment != null) { 53 | return currentSegment.hasMoreData() || partitionIndex.hasMoreData(); 54 | } else { 55 | return false; 56 | } 57 | } 58 | 59 | public Record read() throws IOException, SegmentIndex.IndexException { 60 | if (currentSegment.hasMoreData()) { 61 | return currentSegment.read(); 62 | } else if (partitionIndex.hasMoreData()) { 63 | currentSegment.close(); 64 | String segmentFilePrefix = partitionIndex.readFileName(); 65 | currentSegment = new SegmentReader(topic, partition, topicDir, segmentFilePrefix); 66 | return currentSegment.read(); 67 | } else { 68 | throw new IndexOutOfBoundsException("No more data available"); 69 | } 70 | } 71 | 72 | public List readN(int n) throws IOException, SegmentIndex.IndexException { 73 | List records = new ArrayList<>(); 74 | while (hasMoreData() && records.size() < n) { 75 | Record record = read(); 76 | records.add(record); 77 | } 78 | return records; 79 | } 80 | 81 | public List readBytesBatch(long batchsize) throws IOException, SegmentIndex.IndexException { 82 | List records = new ArrayList<>(); 83 | long currentSize = 0; 84 | while (hasMoreData() && currentSize < batchsize) { 85 | Record record = read(); 86 | records.add(record); 87 | if (record.value() != null) { 88 | currentSize += record.value().length; 89 | } 90 | if (record.key() != null) { 91 | currentSize += record.key().length; 92 | } 93 | } 94 | return records; 95 | } 96 | 97 | 98 | public List readFully() throws IOException, SegmentIndex.IndexException { 99 | List records = new ArrayList<>(); 100 | while (hasMoreData()) { 101 | Record record = read(); 102 | records.add(record); 103 | } 104 | return records; 105 | } 106 | 107 | 108 | public static class PartitionException extends Exception { 109 | PartitionException(String message) { 110 | super(message); 111 | } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/partition/PartitionUtils.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.partition; 2 | 3 | import java.nio.file.Path; 4 | import java.nio.file.Paths; 5 | import java.util.Optional; 6 | import java.util.regex.Matcher; 7 | import java.util.regex.Pattern; 8 | 9 | public class PartitionUtils { 10 | private static final Pattern PARTITION_INDEX_PATTERN = Pattern.compile("^index_partition_([0-9]+)$"); 11 | 12 | static Path indexFile(Path topicDir, int partition) { 13 | return Paths.get(topicDir.toString(), String.format("index_partition_%03d", partition)); 14 | } 15 | 16 | public static Optional isPartitionIndex(Path f) { 17 | Path fpath = f.getFileName(); 18 | if (fpath == null) { 19 | return Optional.empty(); 20 | } 21 | String fname = fpath.toString(); 22 | Matcher m = PARTITION_INDEX_PATTERN.matcher(fname); 23 | if (m.find()) { 24 | String partitionStr = m.group(1); 25 | return Optional.of(Integer.valueOf(partitionStr)); 26 | } else { 27 | return Optional.empty(); 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/partition/PartitionWriter.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.partition; 2 | 3 | import de.azapps.kafkabackup.common.record.Record; 4 | import de.azapps.kafkabackup.common.segment.SegmentIndex; 5 | import de.azapps.kafkabackup.common.segment.SegmentWriter; 6 | 7 | import java.io.IOException; 8 | import java.nio.file.Files; 9 | import java.nio.file.Path; 10 | import java.util.Optional; 11 | 12 | public class PartitionWriter { 13 | private String topic; 14 | private int partition; 15 | private Path topicDir; 16 | private SegmentWriter currentSegment; 17 | private PartitionIndex partitionIndex; 18 | private long maxSegmentSizeBytes; 19 | 20 | public PartitionWriter(String topic, int partition, Path topicDir, long maxSegmentSizeBytes) throws IOException, PartitionIndex.IndexException, SegmentIndex.IndexException { 21 | this.topic = topic; 22 | this.partition = partition; 23 | this.topicDir = topicDir; 24 | this.maxSegmentSizeBytes = maxSegmentSizeBytes; 25 | Path indexFile = PartitionUtils.indexFile(topicDir, partition); 26 | if (!Files.isDirectory(this.topicDir)) { 27 | Files.createDirectories(this.topicDir); 28 | } 29 | partitionIndex = new PartitionIndex(indexFile); 30 | Optional optionalPartitionIndexEntry = partitionIndex.latestSegmentFile(); 31 | if (optionalPartitionIndexEntry.isPresent()) { 32 | currentSegment = new SegmentWriter(topic, partition, optionalPartitionIndexEntry.get().startOffset(), topicDir); 33 | } else { 34 | currentSegment = new SegmentWriter(topic, partition, 0, topicDir); 35 | // do not forget to add the current segment to the partition index. Even if it is empty 36 | partitionIndex.appendSegment(currentSegment.filePrefix(), 0); 37 | } 38 | } 39 | 40 | private void nextSegment(long startOffset) throws IOException, SegmentIndex.IndexException, PartitionIndex.IndexException { 41 | currentSegment.close(); 42 | SegmentWriter segment = new SegmentWriter(topic, partition, startOffset, topicDir); 43 | if (startOffset > partitionIndex.latestStartOffset()) { 44 | partitionIndex.appendSegment(segment.filePrefix(), startOffset); 45 | } 46 | currentSegment = segment; 47 | } 48 | 49 | public long lastWrittenOffset() { 50 | return currentSegment.lastWrittenOffset(); 51 | } 52 | 53 | public void append(Record record) throws IOException, SegmentIndex.IndexException, PartitionIndex.IndexException, SegmentWriter.SegmentException { 54 | if (currentSegment.size() > maxSegmentSizeBytes) { 55 | nextSegment(record.kafkaOffset()); 56 | } 57 | currentSegment.append(record); 58 | } 59 | 60 | public void close() throws IOException { 61 | partitionIndex.close(); 62 | currentSegment.close(); 63 | } 64 | 65 | public void flush() throws IOException { 66 | partitionIndex.flush(); 67 | currentSegment.flush(); 68 | } 69 | 70 | public String topic() { 71 | return topic; 72 | } 73 | 74 | public int partition() { 75 | return partition; 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/record/Record.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.record; 2 | 3 | import org.apache.kafka.common.header.Header; 4 | import org.apache.kafka.common.header.Headers; 5 | import org.apache.kafka.common.header.internals.RecordHeaders; 6 | import org.apache.kafka.common.record.TimestampType; 7 | import org.apache.kafka.connect.data.Schema; 8 | import org.apache.kafka.connect.errors.DataException; 9 | import org.apache.kafka.connect.header.ConnectHeaders; 10 | import org.apache.kafka.connect.sink.SinkRecord; 11 | 12 | import java.util.Arrays; 13 | import java.util.Iterator; 14 | import java.util.Objects; 15 | 16 | public class Record { 17 | private final String topic; 18 | private final Integer kafkaPartition; 19 | private final byte[] key; 20 | private final byte[] value; 21 | private final Long timestamp; 22 | private final Headers headers; 23 | private final long kafkaOffset; 24 | private final TimestampType timestampType; 25 | 26 | public Record(String topic, int partition, byte[] key, byte[] value, long kafkaOffset) { 27 | this(topic, partition, key, value, kafkaOffset, null, TimestampType.NO_TIMESTAMP_TYPE); 28 | } 29 | 30 | public Record(String topic, int partition, byte[] key, byte[] value, long kafkaOffset, Long timestamp, TimestampType timestampType) { 31 | this(topic, partition, key, value, kafkaOffset, timestamp, timestampType, new RecordHeaders()); 32 | } 33 | 34 | // We do not want to copy the data and assume that Kafka Connect is not malicious 35 | @edu.umd.cs.findbugs.annotations.SuppressFBWarnings("EI_EXPOSE_REP") 36 | public Record(String topic, int partition, byte[] key, byte[] value, long kafkaOffset, Long timestamp, TimestampType timestampType, Headers headers) { 37 | this.topic = topic; 38 | this.kafkaPartition = partition; 39 | this.key = key; 40 | this.value = value; 41 | this.timestamp = timestamp; 42 | this.headers = headers; 43 | this.kafkaOffset = kafkaOffset; 44 | this.timestampType = timestampType; 45 | } 46 | 47 | public static Record fromSinkRecord(SinkRecord sinkRecord) { 48 | byte[] key = connectDataToBytes(sinkRecord.keySchema(), sinkRecord.key()); 49 | byte[] value = connectDataToBytes(sinkRecord.valueSchema(), sinkRecord.value()); 50 | RecordHeaders recordHeaders = new RecordHeaders(); 51 | for (org.apache.kafka.connect.header.Header connectHeader : sinkRecord.headers()) { 52 | byte[] headerValue = connectDataToBytes(connectHeader.schema(), connectHeader.value()); 53 | recordHeaders.add(connectHeader.key(), headerValue); 54 | } 55 | return new Record(sinkRecord.topic(), sinkRecord.kafkaPartition(), key, value, sinkRecord.kafkaOffset(), sinkRecord.timestamp(), sinkRecord.timestampType(), recordHeaders); 56 | } 57 | 58 | private static byte[] connectDataToBytes(Schema schema, Object value) { 59 | if (schema != null && schema.type() != Schema.Type.BYTES) 60 | throw new DataException("Invalid schema type for ByteArrayConverter: " + schema.type().toString()); 61 | 62 | if (value != null && !(value instanceof byte[])) 63 | throw new DataException("ByteArrayConverter is not compatible with objects of type " + value.getClass()); 64 | 65 | return (byte[]) value; 66 | } 67 | 68 | public SinkRecord toSinkRecord() { 69 | ConnectHeaders connectHeaders = new ConnectHeaders(); 70 | for (Header header : headers) { 71 | connectHeaders.addBytes(header.key(), header.value()); 72 | } 73 | return new SinkRecord(topic, kafkaPartition, Schema.OPTIONAL_BYTES_SCHEMA, key, Schema.OPTIONAL_BYTES_SCHEMA, value, kafkaOffset, 74 | timestamp, timestampType, connectHeaders); 75 | } 76 | 77 | public String topic() { 78 | return topic; 79 | } 80 | 81 | public Integer kafkaPartition() { 82 | return kafkaPartition; 83 | } 84 | 85 | // We do not want to copy the data and assume that Kafka Connect is not malicious 86 | @edu.umd.cs.findbugs.annotations.SuppressFBWarnings("EI_EXPOSE_REP") 87 | public byte[] key() { 88 | return key; 89 | } 90 | 91 | // We do not want to copy the data and assume that Kafka Connect is not malicious 92 | @edu.umd.cs.findbugs.annotations.SuppressFBWarnings("EI_EXPOSE_REP") 93 | public byte[] value() { 94 | return value; 95 | } 96 | 97 | public Long timestamp() { 98 | return timestamp; 99 | } 100 | 101 | public Headers headers() { 102 | return headers; 103 | } 104 | 105 | public long kafkaOffset() { 106 | return kafkaOffset; 107 | } 108 | 109 | public TimestampType timestampType() { 110 | return timestampType; 111 | } 112 | 113 | @Override 114 | public int hashCode() { 115 | int result = Objects.hash(topic, kafkaPartition, timestamp, headers, kafkaOffset, timestampType); 116 | result = 31 * result + Arrays.hashCode(key); 117 | result = 31 * result + Arrays.hashCode(value); 118 | return result; 119 | } 120 | 121 | @Override 122 | public boolean equals(Object o) { 123 | if (this == o) 124 | return true; 125 | if (o == null || getClass() != o.getClass()) 126 | return false; 127 | 128 | Record that = (Record) o; 129 | 130 | // alternative implementation of ConnectRecord.equals that use Headers equality by value 131 | return Objects.equals(kafkaPartition(), that.kafkaPartition()) 132 | && Objects.equals(topic(), that.topic()) 133 | && Arrays.equals(key(), that.key()) 134 | && Arrays.equals(value(), that.value()) 135 | && Objects.equals(timestamp(), that.timestamp()) 136 | && headersEqualityByValue(headers(), that.headers()) 137 | && Objects.equals(kafkaOffset(), that.kafkaOffset()) 138 | && Objects.equals(timestampType(), that.timestampType()); 139 | } 140 | 141 | @Override 142 | public String toString() { 143 | String keyLength = (key == null) ? "null" : String.valueOf(key.length); 144 | String valueLength = (value == null) ? "null" : String.valueOf(value.length); 145 | String timestampTypeStr = timestampType.toString(); 146 | String timestampStr = (timestamp == null) ? "null" : String.valueOf(timestamp); 147 | return String.format("Record{topic: %s, partition: %d, offset: %d, key: byte[%s], value: byte[%s], timestampType: %s, timestamp: %s, headers: %s}", 148 | topic, kafkaPartition, kafkaOffset, keyLength, valueLength, timestampTypeStr, timestampStr, headers); 149 | } 150 | 151 | private boolean headersEqualityByValue(Headers a, Headers b) { 152 | // This is an alternative implementation of ConnectHeaders::equals that use proper Header equality by value 153 | if (a == b) { 154 | return true; 155 | } 156 | // Note, similar to ConnectHeaders::equals, it requires headers to have the same order 157 | // (although, that is probably not what we want in most cases) 158 | Iterator
aIter = a.iterator(); 159 | Iterator
bIter = b.iterator(); 160 | while (aIter.hasNext() && bIter.hasNext()) { 161 | if (!headerEqualityByValue(aIter.next(), bIter.next())) 162 | return false; 163 | } 164 | return !aIter.hasNext() && !bIter.hasNext(); 165 | } 166 | 167 | private boolean headerEqualityByValue(Header a, Header b) { 168 | // This is an alternative implementation of ConnectHeader::equals that use proper Value equality by value 169 | // (even if they are byte arrays) 170 | if (a == b) { 171 | return true; 172 | } 173 | if (!Objects.equals(a.key(), b.key())) { 174 | return false; 175 | } 176 | try { 177 | // This particular case is not handled by ConnectHeader::equals 178 | byte[] aBytes = a.value(); 179 | byte[] bBytes = b.value(); 180 | return Arrays.equals(aBytes, bBytes); 181 | } catch (ClassCastException e) { 182 | return a.value() == b.value(); 183 | } 184 | } 185 | 186 | 187 | } 188 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/record/RecordSerde.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.record; 2 | 3 | import org.apache.kafka.common.header.Header; 4 | import org.apache.kafka.common.header.internals.RecordHeaders; 5 | import org.apache.kafka.common.record.TimestampType; 6 | 7 | import java.io.*; 8 | import java.nio.charset.StandardCharsets; 9 | 10 | /** 11 | * Record Format: 12 | * offset: int64 13 | * timestampType: int32 -2 if timestamp is null 14 | * [timestamp: int64] if timestampType != NO_TIMESTAMP_TYPE && timestamp != null 15 | * keyLength: int32 16 | * [key: byte[keyLength]] if keyLength >= 0 17 | * valueLength: int32 18 | * [value: byte[valueLength]] if valueLength >= 0 19 | * headerCount: int32 20 | * headers: Header[headerCount] 21 | *

22 | * Header Format: 23 | * headerKeyLength: int32 24 | * headerKey: byte[headerKeyLength] 25 | * headerValueLength: int32 26 | * [headerValue: byte[headerValueLength]] if headerValueLength >= 0 27 | */ 28 | public class RecordSerde { 29 | public static Record read(String topic, int partition, InputStream inputStream) throws IOException { 30 | DataInputStream dataStream = new DataInputStream(inputStream); 31 | long offset = dataStream.readLong(); 32 | int timestampTypeInt = dataStream.readInt(); 33 | TimestampType timestampType; 34 | Long timestamp; 35 | // See comment in `write()` 36 | if (timestampTypeInt == -2) { 37 | timestampType = TimestampType.CREATE_TIME; 38 | timestamp=null; 39 | } else { 40 | switch (timestampTypeInt) { 41 | case -1: 42 | timestampType = TimestampType.NO_TIMESTAMP_TYPE; 43 | break; 44 | case 0: 45 | timestampType = TimestampType.CREATE_TIME; 46 | break; 47 | case 1: 48 | timestampType = TimestampType.LOG_APPEND_TIME; 49 | break; 50 | default: 51 | throw new RuntimeException("Unexpected TimestampType. Expected -1,0 or 1. Got " + timestampTypeInt); 52 | } 53 | if (timestampType != TimestampType.NO_TIMESTAMP_TYPE) { 54 | timestamp = dataStream.readLong(); 55 | } else { 56 | timestamp = null; 57 | } 58 | } 59 | int keyLength = dataStream.readInt(); 60 | byte[] key = null; 61 | if (keyLength >= 0) { 62 | key = new byte[keyLength]; 63 | int readBytes = dataStream.read(key); 64 | if (readBytes != keyLength) { 65 | throw new IOException(String.format("Expected to read %d bytes, got %d", keyLength, readBytes)); 66 | } 67 | } 68 | 69 | int valueLength = dataStream.readInt(); 70 | byte[] value = null; 71 | if (valueLength >= 0) { 72 | value = new byte[valueLength]; 73 | int readBytes = dataStream.read(value); 74 | if (readBytes != valueLength) { 75 | throw new IOException(String.format("Expected to read %d bytes, got %d", valueLength, readBytes)); 76 | } 77 | } 78 | int headerCount = dataStream.readInt(); 79 | RecordHeaders headers = new RecordHeaders(); 80 | for (int i = 0; i < headerCount; i++) { 81 | // Key 82 | int headerKeyLength = dataStream.readInt(); 83 | if (headerKeyLength < 0) { 84 | throw new RuntimeException("Invalid negative header key size " + headerKeyLength); 85 | } 86 | byte[] headerKeyBytes = new byte[headerKeyLength]; 87 | int readBytes = dataStream.read(headerKeyBytes); 88 | if (readBytes != headerKeyLength) { 89 | throw new IOException(String.format("Expected to read %d bytes, got %d", headerKeyLength, readBytes)); 90 | } 91 | String headerKey = new String(headerKeyBytes, StandardCharsets.UTF_8); 92 | // Value 93 | int headerValueLength = dataStream.readInt(); 94 | byte[] headerValue = null; 95 | if (headerValueLength >= 0) { 96 | headerValue = new byte[headerValueLength]; 97 | int hvReadBytes = dataStream.read(headerValue); 98 | if (hvReadBytes != headerValueLength) { 99 | throw new IOException(String.format("Expected to read %d bytes, got %d", headerValueLength, hvReadBytes)); 100 | } 101 | } 102 | headers.add(headerKey, headerValue); 103 | } 104 | 105 | return new Record(topic, partition, key, value, offset, timestamp, timestampType, headers); 106 | } 107 | 108 | public static void write(OutputStream outputStream, Record record) throws IOException { 109 | DataOutputStream dataStream = new DataOutputStream(outputStream); 110 | dataStream.writeLong(record.kafkaOffset()); 111 | // There is a special case where the timestamp type eqauls `CREATE_TIME` but is actually `null`. 112 | // This should not happen normally and I see it as a bug in the Client implementation of pykafka 113 | // But as Kafka accepts that value, so should Kafka Backup. Thus, this dirty workaround: we write the 114 | // timestamp type `-2` if the type is CREATE_TIME but the timestamp itself is null. Otherwise we would have 115 | // needed to change the byte format and for now I think this is the better solution. 116 | if (record.timestampType() == TimestampType.CREATE_TIME && record.timestamp() == null) { 117 | dataStream.writeInt(-2); 118 | } else { 119 | dataStream.writeInt(record.timestampType().id); 120 | if (record.timestampType() != TimestampType.NO_TIMESTAMP_TYPE) { 121 | dataStream.writeLong(record.timestamp()); 122 | } 123 | } 124 | if (record.key() != null) { 125 | dataStream.writeInt(record.key().length); 126 | dataStream.write(record.key()); 127 | } else { 128 | dataStream.writeInt(-1); 129 | } 130 | if (record.value() != null) { 131 | dataStream.writeInt(record.value().length); 132 | dataStream.write(record.value()); 133 | } else { 134 | dataStream.writeInt(-1); 135 | } 136 | Header[] headers = record.headers().toArray(); 137 | dataStream.writeInt(headers.length); 138 | for (Header header : record.headers()) { 139 | byte[] headerKeyBytes = header.key().getBytes(StandardCharsets.UTF_8); 140 | dataStream.writeInt(headerKeyBytes.length); 141 | dataStream.write(headerKeyBytes); 142 | if (header.value() != null) { 143 | dataStream.writeInt(header.value().length); 144 | dataStream.write(header.value()); 145 | } else { 146 | dataStream.writeInt(-1); 147 | } 148 | } 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/segment/SegmentIndex.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.segment; 2 | 3 | import java.io.EOFException; 4 | import java.io.FileInputStream; 5 | import java.io.FileOutputStream; 6 | import java.io.IOException; 7 | import java.nio.file.Files; 8 | import java.nio.file.Path; 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | import java.util.Optional; 12 | 13 | public class SegmentIndex { 14 | private static final byte V1_MAGIC_BYTE = 0x01; 15 | private Path indexFile; 16 | private List index = new ArrayList<>(); 17 | private long lastValidRecordOffset = -1; 18 | private long lastValidIndexPosition = 1; // mind the magic byte! 19 | private FileOutputStream fileOutputStream; 20 | private FileInputStream fileInputStream; 21 | 22 | public SegmentIndex(Path indexFile) throws IOException, IndexException { 23 | this.indexFile = indexFile; 24 | initFile(); 25 | while (true) { 26 | try { 27 | SegmentIndexEntry segmentIndexEntry = SegmentIndexEntry.fromStream(fileInputStream); 28 | if (segmentIndexEntry.getOffset() <= lastValidRecordOffset) { 29 | throw new IndexException("Offsets must be always increasing! There is something terribly wrong in your index!"); 30 | } 31 | index.add(segmentIndexEntry); 32 | lastValidRecordOffset = segmentIndexEntry.getOffset(); 33 | lastValidIndexPosition = fileInputStream.getChannel().position(); 34 | } catch (EOFException e) { 35 | // reached End of File 36 | break; 37 | } 38 | } 39 | } 40 | 41 | private void initFile() throws IOException, IndexException { 42 | if (!Files.isRegularFile(indexFile)) { 43 | Files.createFile(indexFile); 44 | fileOutputStream = new FileOutputStream(indexFile.toFile()); 45 | fileOutputStream.write(V1_MAGIC_BYTE); 46 | } else { 47 | fileOutputStream = new FileOutputStream(indexFile.toFile(), true); 48 | } 49 | this.fileInputStream = new FileInputStream(indexFile.toFile()); 50 | byte[] v1Validation = new byte[1]; 51 | if (fileInputStream.read(v1Validation) != 1 || v1Validation[0] != V1_MAGIC_BYTE) { 52 | throw new IndexException("Cannot validate Magic Byte in the beginning of the index " + indexFile); 53 | } 54 | } 55 | 56 | void addEntry(SegmentIndexEntry segmentIndexEntry) throws IOException, IndexException { 57 | if (segmentIndexEntry.getOffset() <= lastValidRecordOffset) { 58 | throw new IndexException("Offsets must be always increasing! There is something terribly wrong in your index!"); 59 | } 60 | fileOutputStream.getChannel().position(lastValidIndexPosition); 61 | segmentIndexEntry.writeToStream(fileOutputStream); 62 | lastValidIndexPosition = fileOutputStream.getChannel().position(); 63 | lastValidRecordOffset = segmentIndexEntry.getOffset(); 64 | index.add(segmentIndexEntry); 65 | } 66 | 67 | Optional lastIndexEntry() { 68 | if (!index.isEmpty()) { 69 | return Optional.of(index.get(index.size() - 1)); 70 | } else { 71 | return Optional.empty(); 72 | } 73 | } 74 | 75 | long lastValidStartPosition() { 76 | if (!index.isEmpty()) { 77 | return index.get(index.size() - 1).recordFilePosition(); 78 | } else { 79 | return 0L; 80 | } 81 | 82 | } 83 | 84 | Optional getByPosition(int position) { 85 | if (position >= index.size()) { 86 | return Optional.empty(); 87 | } else { 88 | return Optional.of(index.get(position)); 89 | } 90 | } 91 | 92 | Optional findByOffset(long offset) { 93 | for (SegmentIndexEntry current : index) { 94 | if (current.getOffset() == offset) { 95 | return Optional.of(current.recordFilePosition()); 96 | } 97 | } 98 | return Optional.empty(); 99 | } 100 | 101 | Optional findEarliestWithHigherOrEqualOffset(long offset) { 102 | for (SegmentIndexEntry current : index) { 103 | if (current.getOffset() >= offset) { 104 | return Optional.of(current.recordFilePosition()); 105 | } 106 | } 107 | return Optional.empty(); 108 | } 109 | 110 | int size() { 111 | return index.size(); 112 | } 113 | 114 | public List index() { 115 | return index; 116 | } 117 | 118 | void flush() throws IOException { 119 | fileOutputStream.flush(); 120 | } 121 | 122 | void close() throws IOException { 123 | fileInputStream.close(); 124 | fileOutputStream.close(); 125 | } 126 | 127 | public static class IndexException extends Exception { 128 | IndexException(String message) { 129 | super(message); 130 | } 131 | } 132 | 133 | } 134 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/segment/SegmentIndexEntry.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.segment; 2 | 3 | import java.io.*; 4 | import java.util.Objects; 5 | 6 | /** 7 | * Format: 8 | * offset: int64 9 | * recordFilePosition: int64 10 | * recordLength: int64 11 | */ 12 | public class SegmentIndexEntry { 13 | private final long offset; 14 | private final long recordFilePosition; 15 | private final long recordByteLength; 16 | 17 | SegmentIndexEntry(long offset, long recordFilePosition, long recordByteLength) { 18 | this.offset = offset; 19 | this.recordFilePosition = recordFilePosition; 20 | this.recordByteLength = recordByteLength; 21 | } 22 | 23 | static SegmentIndexEntry fromStream(InputStream byteStream) throws IOException { 24 | DataInputStream stream = new DataInputStream(byteStream); 25 | long offset = stream.readLong(); 26 | long recordFileOffset = stream.readLong(); 27 | long recordByteLength = stream.readLong(); 28 | return new SegmentIndexEntry(offset, recordFileOffset, recordByteLength); 29 | } 30 | 31 | public long getOffset() { 32 | return offset; 33 | } 34 | 35 | public long recordFilePosition() { 36 | return recordFilePosition; 37 | } 38 | 39 | public long recordByteLength() { 40 | return recordByteLength; 41 | } 42 | 43 | void writeToStream(OutputStream byteStream) throws IOException { 44 | DataOutputStream stream = new DataOutputStream(byteStream); 45 | stream.writeLong(offset); 46 | stream.writeLong(recordFilePosition); 47 | stream.writeLong(recordByteLength); 48 | } 49 | 50 | @Override 51 | public int hashCode() { 52 | return Objects.hash(offset, recordFilePosition, recordByteLength); 53 | } 54 | 55 | @Override 56 | public boolean equals(Object o) { 57 | if (this == o) 58 | return true; 59 | if (o == null || getClass() != o.getClass()) 60 | return false; 61 | 62 | SegmentIndexEntry that = (SegmentIndexEntry) o; 63 | 64 | return Objects.equals(getOffset(), that.getOffset()) 65 | && Objects.equals(recordFilePosition(), that.recordFilePosition()) 66 | && Objects.equals(recordByteLength(), that.recordByteLength()); 67 | } 68 | 69 | @Override 70 | public String toString() { 71 | return String.format("SegmentIndexEntry{offset: %d, recordFilePosition: %d, recordByteLength: %d}", 72 | offset, recordFilePosition, recordByteLength); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/segment/SegmentIndexRestore.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.segment; 2 | 3 | import de.azapps.kafkabackup.common.record.Record; 4 | 5 | import java.io.EOFException; 6 | import java.io.IOException; 7 | import java.nio.file.Files; 8 | import java.nio.file.Path; 9 | 10 | public class SegmentIndexRestore { 11 | private final SegmentIndex segmentIndex; 12 | private final UnverifiedSegmentReader reader; 13 | 14 | public SegmentIndexRestore(Path segmentFile) throws IOException, RestoreException, SegmentIndex.IndexException { 15 | int partition = SegmentUtils.getPartitionFromSegment(segmentFile); 16 | long startOffset = SegmentUtils.getStartOffsetFromSegment(segmentFile); 17 | Path parent = segmentFile.toAbsolutePath().getParent(); 18 | if (parent == null) { 19 | throw new RestoreException("Segment file " + segmentFile + " does not exist"); 20 | } 21 | Path indexFile = SegmentUtils.indexFile(parent, partition, startOffset); 22 | 23 | if (!Files.isRegularFile(segmentFile)) { 24 | throw new RestoreException("Segment file " + segmentFile + " does not exist"); 25 | } 26 | if (Files.isRegularFile(indexFile)) { 27 | throw new RestoreException("Index file " + indexFile + " must not exist"); 28 | } 29 | segmentIndex = new SegmentIndex(indexFile); 30 | reader = new UnverifiedSegmentReader(segmentFile); 31 | } 32 | 33 | public void restore() throws IOException, SegmentIndex.IndexException { 34 | long lastPosition = 1; // mind the magic byte! 35 | while (true) { 36 | try { 37 | Record record = reader.read(); 38 | long currentPosition = reader.position(); 39 | SegmentIndexEntry indexEntry = new SegmentIndexEntry(record.kafkaOffset(), lastPosition, currentPosition - lastPosition); 40 | segmentIndex.addEntry(indexEntry); 41 | lastPosition = currentPosition; 42 | } catch (EOFException e) { 43 | break; 44 | } 45 | } 46 | segmentIndex.flush(); 47 | segmentIndex.close(); 48 | } 49 | 50 | public static class RestoreException extends Exception { 51 | RestoreException(String message) { 52 | super(message); 53 | } 54 | } 55 | 56 | 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/segment/SegmentReader.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.segment; 2 | 3 | import de.azapps.kafkabackup.common.record.Record; 4 | import de.azapps.kafkabackup.common.record.RecordSerde; 5 | 6 | import java.io.EOFException; 7 | import java.io.FileInputStream; 8 | import java.io.IOException; 9 | import java.nio.channels.FileChannel; 10 | import java.nio.file.Files; 11 | import java.nio.file.Path; 12 | import java.util.ArrayList; 13 | import java.util.List; 14 | import java.util.Optional; 15 | 16 | public class SegmentReader { 17 | private final String topic; 18 | private final int partition; 19 | private final String filePrefix; 20 | private final SegmentIndex segmentIndex; 21 | private final FileInputStream recordInputStream; 22 | private final long lastValidStartPosition; 23 | 24 | public SegmentReader(String topic, int partition, Path topicDir, long startOffset) throws IOException, SegmentIndex.IndexException { 25 | this(topic, partition, topicDir, SegmentUtils.filePrefix(partition, startOffset)); 26 | } 27 | 28 | public SegmentReader(String topic, int partition, Path topicDir, String filePrefix) throws IOException, SegmentIndex.IndexException { 29 | this.topic = topic; 30 | this.partition = partition; 31 | this.filePrefix = filePrefix; 32 | 33 | Path indexFile = SegmentUtils.indexFile(topicDir, filePrefix); 34 | Path recordFile = SegmentUtils.recordsFile(topicDir, filePrefix); 35 | if (!Files.isRegularFile(indexFile)) { 36 | throw new RuntimeException("Index for Segment not found: " + indexFile.toString()); 37 | } 38 | if (!Files.isRegularFile(recordFile)) { 39 | throw new RuntimeException("Segment not found: " + recordFile.toString()); 40 | } 41 | segmentIndex = new SegmentIndex(indexFile); 42 | recordInputStream = new FileInputStream(recordFile.toFile()); 43 | SegmentUtils.ensureValidSegment(recordInputStream); 44 | lastValidStartPosition = segmentIndex.lastValidStartPosition(); 45 | } 46 | 47 | public void seek(long offset) throws IOException { 48 | Optional optionalPosition = segmentIndex.findEarliestWithHigherOrEqualOffset(offset); 49 | if (optionalPosition.isPresent()) { 50 | recordInputStream.getChannel().position(optionalPosition.get()); 51 | } else { 52 | // If we couldn't find such a record, skip to EOF. This will make sure that hasMoreData() returns false. 53 | FileChannel fileChannel = recordInputStream.getChannel(); 54 | fileChannel.position(fileChannel.size()); 55 | } 56 | } 57 | 58 | public boolean hasMoreData() throws IOException { 59 | return recordInputStream.getChannel().position() <= lastValidStartPosition; 60 | } 61 | 62 | public Record read() throws IOException { 63 | if (!hasMoreData()) { 64 | throw new EOFException("Already read the last valid record in topic " + topic + ", segment " + filePrefix); 65 | } 66 | return RecordSerde.read(topic, partition, recordInputStream); 67 | } 68 | 69 | public List readN(int n) throws IOException { 70 | List records = new ArrayList<>(n); 71 | while (hasMoreData() && records.size() < n) { 72 | Record record = read(); 73 | records.add(record); 74 | } 75 | return records; 76 | } 77 | 78 | public List readFully() throws IOException { 79 | List records = new ArrayList<>(segmentIndex.size()); 80 | while (hasMoreData()) { 81 | Record record = read(); 82 | records.add(record); 83 | } 84 | return records; 85 | } 86 | 87 | public void close() throws IOException { 88 | recordInputStream.close(); 89 | segmentIndex.close(); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/segment/SegmentUtils.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.segment; 2 | 3 | import java.io.FileInputStream; 4 | import java.io.IOException; 5 | import java.nio.file.Path; 6 | import java.nio.file.Paths; 7 | import java.util.regex.Matcher; 8 | import java.util.regex.Pattern; 9 | 10 | public class SegmentUtils { 11 | 12 | static final byte V1_MAGIC_BYTE = 0x01; 13 | private static final Pattern SEGMENT_PATTERN = Pattern.compile("^segment_partition_([0-9]+)_from_offset_([0-9]+)_records$"); 14 | 15 | public static String filePrefix(int partition, long startOffset) { 16 | return String.format("segment_partition_%03d_from_offset_%010d", partition, startOffset); 17 | } 18 | 19 | static void ensureValidSegment(FileInputStream inputStream) throws IOException { 20 | inputStream.getChannel().position(0); 21 | byte[] v1Validation = new byte[1]; 22 | if (inputStream.read(v1Validation) != 1 || v1Validation[0] != SegmentUtils.V1_MAGIC_BYTE) { 23 | throw new IOException("Cannot validate Magic Byte in the beginning of the Segment"); 24 | } 25 | } 26 | 27 | public static Path indexFile(Path topicDir, int partition, long startOffset) { 28 | return indexFile(topicDir, filePrefix(partition, startOffset)); 29 | } 30 | 31 | static Path indexFile(Path topicDir, String filePrefix) { 32 | return Paths.get(topicDir.toString(), filePrefix + "_index"); 33 | } 34 | 35 | public static Path recordsFile(Path topicDir, int partition, long startOffset) { 36 | return recordsFile(topicDir, filePrefix(partition, startOffset)); 37 | } 38 | 39 | static Path recordsFile(Path topicDir, String filePrefix) { 40 | return Paths.get(topicDir.toString(), filePrefix + "_records"); 41 | } 42 | 43 | public static boolean isSegment(Path file) { 44 | Path fpath = file.getFileName(); 45 | if (fpath == null) { 46 | return false; 47 | } 48 | Matcher m = SEGMENT_PATTERN.matcher(fpath.toString()); 49 | return m.find(); 50 | } 51 | 52 | public static int getPartitionFromSegment(Path file) { 53 | Path fpath = file.getFileName(); 54 | if (fpath == null) { 55 | throw new RuntimeException("File " + file + " is not a Segment"); 56 | } 57 | Matcher m = SEGMENT_PATTERN.matcher(fpath.toString()); 58 | if (m.find()) { 59 | String partitionStr = m.group(1); 60 | return Integer.parseInt(partitionStr); 61 | } else { 62 | throw new RuntimeException("File " + file + " is not a Segment"); 63 | } 64 | } 65 | 66 | public static long getStartOffsetFromSegment(Path file) { 67 | Path fpath = file.getFileName(); 68 | if (fpath == null) { 69 | throw new RuntimeException("File " + file + " is not a Segment"); 70 | } 71 | Matcher m = SEGMENT_PATTERN.matcher(fpath.toString()); 72 | if (m.find()) { 73 | String offsetStr = m.group(2); 74 | return Long.parseLong(offsetStr); 75 | } else { 76 | throw new RuntimeException("File " + file + " is not a Segment"); 77 | } 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/segment/SegmentWriter.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.segment; 2 | 3 | import de.azapps.kafkabackup.common.record.Record; 4 | import de.azapps.kafkabackup.common.record.RecordSerde; 5 | 6 | import java.io.FileInputStream; 7 | import java.io.FileOutputStream; 8 | import java.io.IOException; 9 | import java.nio.file.Files; 10 | import java.nio.file.Path; 11 | import java.util.Optional; 12 | 13 | public class SegmentWriter { 14 | private final String topic; 15 | private final int partition; 16 | private final long startOffset; 17 | private final SegmentIndex segmentIndex; 18 | private final FileOutputStream recordOutputStream; 19 | 20 | public SegmentWriter(String topic, int partition, long startOffset, Path topicDir) throws IOException, SegmentIndex.IndexException { 21 | this.topic = topic; 22 | this.partition = partition; 23 | this.startOffset = startOffset; 24 | 25 | Path indexFile = SegmentUtils.indexFile(topicDir, partition, startOffset); 26 | segmentIndex = new SegmentIndex(indexFile); 27 | 28 | Path recordFile = SegmentUtils.recordsFile(topicDir, partition, startOffset); 29 | if (!Files.isRegularFile(recordFile)) { 30 | Files.createFile(recordFile); 31 | recordOutputStream = new FileOutputStream(recordFile.toFile()); 32 | recordOutputStream.write(SegmentUtils.V1_MAGIC_BYTE); 33 | } else { 34 | // Validate Magic Byte 35 | FileInputStream inputStream = new FileInputStream(recordFile.toFile()); 36 | SegmentUtils.ensureValidSegment(inputStream); 37 | inputStream.close(); 38 | 39 | // move to last committed position of the file 40 | recordOutputStream = new FileOutputStream(recordFile.toFile(), true); 41 | Optional optionalPreviousIndexEntry = segmentIndex.lastIndexEntry(); 42 | if (optionalPreviousIndexEntry.isPresent()) { 43 | SegmentIndexEntry previousSegmentIndexEntry = optionalPreviousIndexEntry.get(); 44 | long position = previousSegmentIndexEntry.recordFilePosition() + previousSegmentIndexEntry.recordByteLength(); 45 | recordOutputStream.getChannel().position(position); 46 | } else { 47 | recordOutputStream.getChannel().position(1); 48 | } 49 | } 50 | } 51 | 52 | public long lastWrittenOffset() { 53 | return segmentIndex.lastIndexEntry().map(SegmentIndexEntry::getOffset).orElse(-1L); 54 | } 55 | 56 | public void append(Record record) throws IOException, SegmentIndex.IndexException, SegmentException { 57 | if (!record.topic().equals(topic)) { 58 | throw new SegmentException("Trying to append to wrong topic!\n" + 59 | "Expected topic: " + topic + " given topic: " + record.topic()); 60 | } 61 | if (record.kafkaPartition() != partition) { 62 | throw new SegmentException("Trying to append to wrong partition!\n" + 63 | "Expected partition: " + partition + " given partition: " + partition); 64 | } 65 | if (record.kafkaOffset() < startOffset) { 66 | throw new SegmentException("Try to append a record with an offset smaller than the start offset. Something is very wrong. \n" + 67 | "Topic: " + record.topic() + "Partition: " + record.kafkaPartition() + " StartOffset: " + startOffset + " RecordOffset: " + record.kafkaOffset() + "\n" + 68 | "You probably forgot to delete a previous Backup\n"); 69 | } 70 | if (record.kafkaOffset() <= lastWrittenOffset()) { 71 | // We are handling the offsets ourselves. This should never happen! 72 | throw new SegmentException("Trying to override a written record. There is something terribly wrong in your setup! Please check whether you are trying to override an existing backup" + 73 | "Topic: " + record.topic() + "Partition: " + record.kafkaPartition() + " lastWrittenOffset: " + lastWrittenOffset() + " RecordOffset: " + record.kafkaOffset()); 74 | } 75 | long startPosition = recordOutputStream.getChannel().position(); 76 | RecordSerde.write(recordOutputStream, record); 77 | long recordByteLength = recordOutputStream.getChannel().position() - startPosition; 78 | SegmentIndexEntry segmentIndexEntry = new SegmentIndexEntry(record.kafkaOffset(), startPosition, recordByteLength); 79 | segmentIndex.addEntry(segmentIndexEntry); 80 | } 81 | 82 | public String filePrefix() { 83 | return SegmentUtils.filePrefix(partition, startOffset); 84 | } 85 | 86 | public long size() throws IOException { 87 | return recordOutputStream.getChannel().size(); 88 | } 89 | 90 | public void flush() throws IOException { 91 | recordOutputStream.flush(); 92 | segmentIndex.flush(); 93 | } 94 | 95 | public void close() throws IOException { 96 | recordOutputStream.close(); 97 | segmentIndex.close(); 98 | } 99 | 100 | public static class SegmentException extends Exception { 101 | SegmentException(String message) { 102 | super(message); 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/common/segment/UnverifiedSegmentReader.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.segment; 2 | 3 | import de.azapps.kafkabackup.common.record.Record; 4 | import de.azapps.kafkabackup.common.record.RecordSerde; 5 | 6 | import java.io.EOFException; 7 | import java.io.FileInputStream; 8 | import java.io.IOException; 9 | import java.nio.file.Path; 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | 13 | public class UnverifiedSegmentReader { 14 | private String topic; 15 | private int partition; 16 | private FileInputStream recordInputStream; 17 | 18 | public UnverifiedSegmentReader(Path recordFile) throws IOException { 19 | this(recordFile, "topic", 0); 20 | } 21 | 22 | public UnverifiedSegmentReader(Path recordFile, String topic, int partition) throws IOException { 23 | recordInputStream = new FileInputStream(recordFile.toFile()); 24 | this.topic = topic; 25 | this.partition = partition; 26 | SegmentUtils.ensureValidSegment(recordInputStream); 27 | } 28 | 29 | public Record read() throws IOException { 30 | return RecordSerde.read(topic, partition, recordInputStream); 31 | } 32 | 33 | public List readN(int n) throws IOException { 34 | List records = new ArrayList<>(n); 35 | while (records.size() <= n) { 36 | try { 37 | Record record = read(); 38 | records.add(record); 39 | } catch (EOFException e) { 40 | break; 41 | } 42 | } 43 | return records; 44 | } 45 | 46 | public List readFully() throws IOException { 47 | List records = new ArrayList<>(); 48 | while (true) { 49 | try { 50 | Record record = read(); 51 | records.add(record); 52 | } catch (EOFException e) { 53 | break; 54 | } 55 | } 56 | return records; 57 | } 58 | 59 | public long position() throws IOException { 60 | return recordInputStream.getChannel().position(); 61 | } 62 | 63 | public void close() throws IOException { 64 | recordInputStream.close(); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/sink/BackupSinkConfig.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.sink; 2 | 3 | import org.apache.kafka.common.config.AbstractConfig; 4 | import org.apache.kafka.common.config.ConfigDef; 5 | 6 | import java.util.HashMap; 7 | import java.util.Map; 8 | 9 | class BackupSinkConfig extends AbstractConfig { 10 | static final String CLUSTER_PREFIX = "cluster."; 11 | static final String CLUSTER_BOOTSTRAP_SERVERS = CLUSTER_PREFIX + "bootstrap.servers"; 12 | static final String ADMIN_CLIENT_PREFIX = "admin."; 13 | static final String TARGET_DIR_CONFIG = "target.dir"; 14 | static final String MAX_SEGMENT_SIZE = "max.segment.size.bytes"; 15 | static final String SNAPSHOT = "snapshot"; 16 | 17 | static final ConfigDef CONFIG_DEF = new ConfigDef() 18 | .define(TARGET_DIR_CONFIG, ConfigDef.Type.STRING, 19 | ConfigDef.Importance.HIGH, "TargetDir") 20 | .define(MAX_SEGMENT_SIZE, ConfigDef.Type.INT, 1024 ^ 3, // 1 GiB 21 | ConfigDef.Importance.LOW, "Maximum segment size") 22 | .define(SNAPSHOT, ConfigDef.Type.BOOLEAN, false, 23 | ConfigDef.Importance.LOW, "Creates a snapshot. Terminates connector when end of all partitions has been reached."); 24 | 25 | BackupSinkConfig(Map props) { 26 | super(CONFIG_DEF, props, true); 27 | if (!props.containsKey(TARGET_DIR_CONFIG)) { 28 | throw new RuntimeException("Missing Configuration Variable: " + TARGET_DIR_CONFIG); 29 | } 30 | if (!props.containsKey(MAX_SEGMENT_SIZE)) { 31 | throw new RuntimeException("Missing Configuration Variable: " + MAX_SEGMENT_SIZE); 32 | } 33 | } 34 | 35 | Map adminConfig() { 36 | Map props = new HashMap<>(); 37 | props.putAll(originalsWithPrefix(CLUSTER_PREFIX)); 38 | props.putAll(originalsWithPrefix(ADMIN_CLIENT_PREFIX)); 39 | return props; 40 | } 41 | 42 | String targetDir() { 43 | return getString(TARGET_DIR_CONFIG); 44 | } 45 | 46 | Integer maxSegmentSizeBytes() { 47 | return getInt(MAX_SEGMENT_SIZE); 48 | } 49 | 50 | Boolean snapShotMode() { return getBoolean(SNAPSHOT); } 51 | 52 | Map consumerConfig() { 53 | return new HashMap<>(originalsWithPrefix(CLUSTER_PREFIX)); 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/sink/BackupSinkConnector.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.sink; 2 | 3 | import org.apache.kafka.common.config.ConfigDef; 4 | import org.apache.kafka.connect.connector.Task; 5 | import org.apache.kafka.connect.errors.ConnectException; 6 | import org.apache.kafka.connect.sink.SinkConnector; 7 | 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | import java.util.Map; 11 | 12 | public class BackupSinkConnector extends SinkConnector { 13 | private Map config; 14 | 15 | @Override 16 | public void start(Map props) { 17 | config = props; 18 | } 19 | 20 | @Override 21 | public Class taskClass() { 22 | return BackupSinkTask.class; 23 | } 24 | 25 | @Override 26 | public List> taskConfigs(int maxTasks) { 27 | if (maxTasks > 1) { 28 | throw new ConnectException("kafka-backup can currently handle only one task."); 29 | } 30 | List> configs = new ArrayList<>(); 31 | configs.add(config); 32 | return configs; 33 | } 34 | 35 | @Override 36 | public void stop() { 37 | 38 | } 39 | 40 | @Override 41 | public ConfigDef config() { 42 | return BackupSinkConfig.CONFIG_DEF; 43 | } 44 | 45 | @Override 46 | public String version() { 47 | return "0.1"; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/source/BackupSourceConfig.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.source; 2 | 3 | import de.azapps.kafkabackup.common.BackupConfig; 4 | import org.apache.kafka.common.config.AbstractConfig; 5 | import org.apache.kafka.common.config.ConfigDef; 6 | 7 | import java.util.Arrays; 8 | import java.util.HashMap; 9 | import java.util.List; 10 | import java.util.Map; 11 | 12 | class BackupSourceConfig extends BackupConfig { 13 | private static final String CLUSTER_KEY_DESERIALIZER = CLUSTER_PREFIX + "key.deserializer"; 14 | private static final String CLUSTER_VALUE_DESERIALIZER = CLUSTER_PREFIX + "value.deserializer"; 15 | private static final String BATCH_SIZE_CONFIG = "batch.size"; 16 | private static final String SOURCE_DIR_CONFIG = "source.dir"; 17 | private static final String TOPICS_CONFIG = "topics"; 18 | static final String ALLOW_OLD_KAFKA_CONNECT_VERSION = "allow.old.kafka.connect.version.unsafe"; 19 | 20 | 21 | private static final ConfigDef CONFIG_DEF = new ConfigDef() 22 | .define(SOURCE_DIR_CONFIG, ConfigDef.Type.STRING, 23 | ConfigDef.Importance.HIGH, "TargetDir") 24 | .define(BATCH_SIZE_CONFIG, ConfigDef.Type.INT, 100, 25 | ConfigDef.Importance.LOW, "Batch size per partition") 26 | .define(TOPICS_CONFIG, ConfigDef.Type.STRING, 27 | ConfigDef.Importance.HIGH, "Topics to restore"); 28 | 29 | BackupSourceConfig(Map props) { 30 | super(CONFIG_DEF, props); 31 | if (!props.containsKey(SOURCE_DIR_CONFIG)) { 32 | throw new RuntimeException("Missing Configuration Variable: " + SOURCE_DIR_CONFIG); 33 | } 34 | if (!props.containsKey(TOPICS_CONFIG)) { 35 | throw new RuntimeException("Missing Configuration Variable: " + TOPICS_CONFIG); 36 | } 37 | if (!props.containsKey(CLUSTER_KEY_DESERIALIZER)) { 38 | throw new RuntimeException("Missing Configuration Variable: " + CLUSTER_KEY_DESERIALIZER); 39 | } 40 | if (!props.containsKey(CLUSTER_VALUE_DESERIALIZER)) { 41 | throw new RuntimeException("Missing Configuration Variable: " + CLUSTER_VALUE_DESERIALIZER); 42 | } 43 | } 44 | 45 | Map consumerConfig() { 46 | return new HashMap<>(originalsWithPrefix(CLUSTER_PREFIX)); 47 | } 48 | 49 | String sourceDir() { 50 | return getString(SOURCE_DIR_CONFIG); 51 | } 52 | 53 | Integer batchSize() { 54 | return getInt(BATCH_SIZE_CONFIG); 55 | } 56 | 57 | List topics() { 58 | return Arrays.asList(getString(TOPICS_CONFIG).split("\\s*,\\s*")); 59 | } 60 | 61 | 62 | } 63 | 64 | -------------------------------------------------------------------------------- /src/main/java/de/azapps/kafkabackup/source/BackupSourceConnector.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.source; 2 | 3 | import org.apache.kafka.clients.producer.RecordMetadata; 4 | import org.apache.kafka.common.config.ConfigDef; 5 | import org.apache.kafka.connect.connector.Task; 6 | import org.apache.kafka.connect.errors.ConnectException; 7 | import org.apache.kafka.connect.source.SourceConnector; 8 | import org.apache.kafka.connect.source.SourceRecord; 9 | import org.apache.kafka.connect.source.SourceTask; 10 | 11 | import java.util.ArrayList; 12 | import java.util.List; 13 | import java.util.Map; 14 | 15 | public class BackupSourceConnector extends SourceConnector { 16 | private Map config; 17 | 18 | 19 | @Override 20 | public void start(Map props) { 21 | config = props; 22 | if (!config.getOrDefault(BackupSourceConfig.ALLOW_OLD_KAFKA_CONNECT_VERSION, "false").equals("true")) { 23 | try { 24 | SourceTask.class.getMethod("commitRecord", SourceRecord.class, RecordMetadata.class); 25 | } catch (NoSuchMethodException e) { 26 | throw new RuntimeException("Kafka Backup requires at least Kafka Connect 2.4. Otherwise Offsets cannot be committed. If you are sure what you are doing, please set " + BackupSourceConfig.ALLOW_OLD_KAFKA_CONNECT_VERSION + " to true"); 27 | } 28 | } 29 | } 30 | 31 | @Override 32 | public Class taskClass() { 33 | return BackupSourceTask.class; 34 | } 35 | 36 | @Override 37 | public List> taskConfigs(int maxTasks) { 38 | if (maxTasks > 1) { 39 | throw new ConnectException("kafka-backup can currently handle only one task."); 40 | } 41 | List> configs = new ArrayList<>(); 42 | configs.add(config); 43 | return configs; 44 | } 45 | 46 | @Override 47 | public void stop() { 48 | 49 | } 50 | 51 | @Override 52 | public ConfigDef config() { 53 | return new ConfigDef(); 54 | } 55 | 56 | @Override 57 | public String version() { 58 | return "0.1"; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/test/assets/v1/partitionindex/testIndex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/itadventurer/kafka-backup/4692ffeaf2f314aa9ad0d7a2346e47f24ab2dc3d/src/test/assets/v1/partitionindex/testIndex -------------------------------------------------------------------------------- /src/test/assets/v1/records/empty_record: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/itadventurer/kafka-backup/4692ffeaf2f314aa9ad0d7a2346e47f24ab2dc3d/src/test/assets/v1/records/empty_record -------------------------------------------------------------------------------- /src/test/assets/v1/records/header_record: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/itadventurer/kafka-backup/4692ffeaf2f314aa9ad0d7a2346e47f24ab2dc3d/src/test/assets/v1/records/header_record -------------------------------------------------------------------------------- /src/test/assets/v1/records/null_record: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/itadventurer/kafka-backup/4692ffeaf2f314aa9ad0d7a2346e47f24ab2dc3d/src/test/assets/v1/records/null_record -------------------------------------------------------------------------------- /src/test/assets/v1/records/simple_record: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/itadventurer/kafka-backup/4692ffeaf2f314aa9ad0d7a2346e47f24ab2dc3d/src/test/assets/v1/records/simple_record -------------------------------------------------------------------------------- /src/test/assets/v1/segmentindex/testIndex: -------------------------------------------------------------------------------- 1 |  2 |  3 | $ 4 | -------------------------------------------------------------------------------- /src/test/assets/v1/segments/segment_partition_000_from_offset_0000000000_index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/itadventurer/kafka-backup/4692ffeaf2f314aa9ad0d7a2346e47f24ab2dc3d/src/test/assets/v1/segments/segment_partition_000_from_offset_0000000000_index -------------------------------------------------------------------------------- /src/test/assets/v1/segments/segment_partition_000_from_offset_0000000000_records: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/itadventurer/kafka-backup/4692ffeaf2f314aa9ad0d7a2346e47f24ab2dc3d/src/test/assets/v1/segments/segment_partition_000_from_offset_0000000000_records -------------------------------------------------------------------------------- /src/test/java/de/azapps/kafkabackup/common/TestUtils.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Files; 5 | import java.nio.file.Path; 6 | import java.nio.file.Paths; 7 | 8 | public class TestUtils { 9 | private static Path TEMP_DIR; 10 | 11 | static { 12 | try { 13 | TEMP_DIR = Files.createTempDirectory("kafka_backup_tests"); 14 | } catch (IOException e) { 15 | e.printStackTrace(); 16 | } 17 | } 18 | 19 | public static Path getTestDir(String tests) { 20 | Path ret = Paths.get(TEMP_DIR.toString(), tests); 21 | try { 22 | Files.createDirectories(ret); 23 | } catch (Exception e) { 24 | throw new RuntimeException(e); 25 | } 26 | return ret; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/test/java/de/azapps/kafkabackup/common/partition/PartitionIndexTest.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.partition; 2 | 3 | import de.azapps.kafkabackup.common.TestUtils; 4 | import de.azapps.kafkabackup.common.segment.SegmentIndex; 5 | import de.azapps.kafkabackup.common.segment.SegmentIndexEntry; 6 | import org.junit.jupiter.api.Test; 7 | 8 | import java.nio.file.Files; 9 | import java.nio.file.Path; 10 | import java.nio.file.Paths; 11 | import java.util.ArrayList; 12 | import java.util.List; 13 | import java.util.Optional; 14 | 15 | import static org.junit.jupiter.api.Assertions.*; 16 | 17 | public class PartitionIndexTest { 18 | private static Path TEMP_DIR = TestUtils.getTestDir("PartitionIndexTest"); 19 | 20 | @Test 21 | public void simpleRoundtripTest() throws Exception { 22 | String indexFile = "simpleRoundtripTestIndex"; 23 | List entries = new ArrayList<>(); 24 | entries.add(new PartitionIndexEntry("s0", 0)); 25 | entries.add(new PartitionIndexEntry("s100", 100)); 26 | entries.add(new PartitionIndexEntry("s200", 200)); 27 | entries.add(new PartitionIndexEntry("s300", 300)); 28 | PartitionIndex index = new PartitionIndex(Paths.get(TEMP_DIR.toString(), indexFile)); 29 | index.appendSegment(entries.get(0).filename(), entries.get(0).startOffset()); 30 | index.appendSegment(entries.get(1).filename(), entries.get(1).startOffset()); 31 | index.appendSegment(entries.get(2).filename(), entries.get(2).startOffset()); 32 | index.appendSegment(entries.get(3).filename(), entries.get(3).startOffset()); 33 | 34 | assertEquals(entries, index.index()); 35 | index.close(); 36 | 37 | 38 | PartitionIndex b = new PartitionIndex(Paths.get(TEMP_DIR.toString(), indexFile)); 39 | assertEquals(entries, b.index()); 40 | b.seek(10); 41 | assertEquals(entries.get(0).filename(), b.readFileName()); 42 | assertTrue(b.hasMoreData()); 43 | b.seek(200); 44 | assertEquals(entries.get(2).filename(), b.readFileName()); 45 | assertTrue(b.hasMoreData()); 46 | b.seek(310); 47 | assertEquals(entries.get(3).filename(), b.readFileName()); 48 | assertFalse(b.hasMoreData()); 49 | b.close(); 50 | } 51 | 52 | 53 | @Test 54 | public void testReadV1Index() throws Exception { 55 | String indexFile = "testIndex"; 56 | Path directory = Paths.get("src/test/assets/v1/partitionindex"); 57 | List entries = new ArrayList<>(); 58 | entries.add(new PartitionIndexEntry("s0", 0)); 59 | entries.add(new PartitionIndexEntry("s100", 100)); 60 | entries.add(new PartitionIndexEntry("s200", 200)); 61 | entries.add(new PartitionIndexEntry("s300", 300)); 62 | 63 | PartitionIndex b = new PartitionIndex(Paths.get(directory.toString(), indexFile)); 64 | assertEquals(entries, b.index()); 65 | } 66 | 67 | 68 | /** 69 | * Utility function to be run once when the format on disk changes to be able to stay backwards-compatible 70 | *

71 | * Call it manually once when the format changes 72 | */ 73 | private static void writeTestIndexToFile() throws Exception { 74 | String indexFile = "testIndex"; 75 | Path directory = Paths.get("src/test/assets/v1/partitionindex"); // CHANGEME WHEN CHANGING DATA FORMAT! 76 | Files.createDirectories(directory); 77 | 78 | List entries = new ArrayList<>(); 79 | entries.add(new PartitionIndexEntry("s0", 0)); 80 | entries.add(new PartitionIndexEntry("s100", 100)); 81 | entries.add(new PartitionIndexEntry("s200", 200)); 82 | entries.add(new PartitionIndexEntry("s300", 300)); 83 | 84 | PartitionIndex index = new PartitionIndex(Paths.get(directory.toString(), indexFile)); 85 | index.appendSegment(entries.get(0).filename(), entries.get(0).startOffset()); 86 | index.appendSegment(entries.get(1).filename(), entries.get(1).startOffset()); 87 | index.appendSegment(entries.get(2).filename(), entries.get(2).startOffset()); 88 | index.appendSegment(entries.get(3).filename(), entries.get(3).startOffset()); 89 | index.close(); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/test/java/de/azapps/kafkabackup/common/partition/PartitionSerdeTest.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.partition; 2 | 3 | import de.azapps.kafkabackup.common.TestUtils; 4 | import de.azapps.kafkabackup.common.record.Record; 5 | import de.azapps.kafkabackup.common.segment.SegmentReader; 6 | import de.azapps.kafkabackup.common.segment.SegmentUtils; 7 | import org.apache.kafka.common.header.internals.RecordHeaders; 8 | import org.apache.kafka.common.record.TimestampType; 9 | import org.apache.kafka.connect.header.ConnectHeaders; 10 | import org.apache.kafka.connect.header.Headers; 11 | import org.junit.jupiter.api.Test; 12 | 13 | import java.nio.charset.StandardCharsets; 14 | import java.nio.file.Files; 15 | import java.nio.file.Path; 16 | import java.util.ArrayList; 17 | import java.util.List; 18 | 19 | import static org.junit.jupiter.api.Assertions.assertEquals; 20 | import static org.junit.jupiter.api.Assertions.assertFalse; 21 | 22 | public class PartitionSerdeTest { 23 | private static final String TOPIC = "test-topic"; 24 | private static final byte[] KEY_BYTES = "test-key".getBytes(StandardCharsets.UTF_8); 25 | private static final byte[] VALUE_BYTES = "test-value".getBytes(StandardCharsets.UTF_8); 26 | private static final Path TEMP_DIR = TestUtils.getTestDir("PartitionSerdeTest"); 27 | 28 | private static final RecordHeaders HEADERS = new RecordHeaders(); 29 | private static final byte[] HEADER_0_VALUE_BYTES = "header0-value".getBytes(StandardCharsets.UTF_8); 30 | private static final byte[] HEADER_1_VALUE_BYTES = "header1-value".getBytes(StandardCharsets.UTF_8); 31 | static { 32 | HEADERS.add("", new byte[0]); 33 | HEADERS.add("null", null); 34 | HEADERS.add("value0", HEADER_0_VALUE_BYTES); 35 | HEADERS.add("value1", HEADER_1_VALUE_BYTES); 36 | } 37 | @Test 38 | public void simpleRoundtripTest() throws Exception { 39 | int partition = 0; 40 | 41 | List records = new ArrayList<>(); 42 | records.add(new Record(TOPIC, partition, KEY_BYTES, VALUE_BYTES, 0)); 43 | records.add(new Record(TOPIC, partition, null, null, 1)); 44 | records.add(new Record(TOPIC, partition, new byte[0], new byte[0], 2)); 45 | records.add(new Record(TOPIC, partition, KEY_BYTES, VALUE_BYTES, 3, null, TimestampType.NO_TIMESTAMP_TYPE, HEADERS)); 46 | 47 | PartitionWriter partitionWriter = new PartitionWriter(TOPIC, partition, TEMP_DIR, 50); 48 | partitionWriter.append(records.get(0)); 49 | partitionWriter.append(records.get(1)); 50 | partitionWriter.append(records.get(2)); 51 | partitionWriter.append(records.get(3)); 52 | partitionWriter.close(); 53 | 54 | PartitionReader partitionReader = new PartitionReader(TOPIC, partition, TEMP_DIR); 55 | assertEquals(records, partitionReader.readFully()); 56 | assertFalse(partitionReader.hasMoreData()); 57 | partitionReader.seek(1); 58 | assertEquals(records.get(1), partitionReader.read()); 59 | partitionReader.seek(3); 60 | assertEquals(records.get(3), partitionReader.read()); 61 | assertFalse(partitionReader.hasMoreData()); 62 | } 63 | 64 | @Test 65 | public void smallSegmentSizeTest() throws Exception { 66 | int partition = 1; 67 | 68 | List records = new ArrayList<>(); 69 | records.add(new Record(TOPIC, partition, KEY_BYTES, VALUE_BYTES, 0)); 70 | records.add(new Record(TOPIC, partition, null, null, 1)); 71 | records.add(new Record(TOPIC, partition, new byte[0], new byte[0], 2)); 72 | 73 | PartitionWriter partitionWriter = new PartitionWriter(TOPIC, partition, TEMP_DIR, 1); 74 | partitionWriter.append(records.get(0)); 75 | partitionWriter.append(records.get(1)); 76 | partitionWriter.append(records.get(2)); 77 | partitionWriter.close(); 78 | 79 | SegmentReader a = new SegmentReader(TOPIC, partition, TEMP_DIR, 0); 80 | assertEquals(records.get(0), a.read()); 81 | assertFalse(a.hasMoreData()); 82 | SegmentReader b = new SegmentReader(TOPIC, partition, TEMP_DIR, 1); 83 | assertEquals(records.get(1), b.read()); 84 | assertFalse(b.hasMoreData()); 85 | SegmentReader c = new SegmentReader(TOPIC, partition, TEMP_DIR, 2); 86 | assertEquals(records.get(2), c.read()); 87 | assertFalse(c.hasMoreData()); 88 | } 89 | 90 | @Test 91 | public void deleteSomeSegmentsTest() throws Exception { 92 | int partition = 2; 93 | 94 | List records = new ArrayList<>(); 95 | records.add(new Record(TOPIC, partition, KEY_BYTES, VALUE_BYTES, 0)); 96 | records.add(new Record(TOPIC, partition, null, null, 1)); 97 | records.add(new Record(TOPIC, partition, new byte[0], new byte[0], 2)); 98 | records.add(new Record(TOPIC, partition, KEY_BYTES, VALUE_BYTES, 3, null, TimestampType.NO_TIMESTAMP_TYPE, HEADERS)); 99 | 100 | PartitionWriter partitionWriter = new PartitionWriter(TOPIC, partition, TEMP_DIR, 1); 101 | partitionWriter.append(records.get(0)); 102 | partitionWriter.append(records.get(1)); 103 | partitionWriter.append(records.get(2)); 104 | partitionWriter.append(records.get(3)); 105 | partitionWriter.close(); 106 | 107 | // Delete segments 0 and 2 108 | Files.delete(SegmentUtils.recordsFile(TEMP_DIR, partition, 0)); 109 | Files.delete(SegmentUtils.indexFile(TEMP_DIR, partition, 0)); 110 | Files.delete(SegmentUtils.recordsFile(TEMP_DIR, partition, 2)); 111 | Files.delete(SegmentUtils.indexFile(TEMP_DIR, partition, 2)); 112 | Files.delete(PartitionUtils.indexFile(TEMP_DIR, partition)); 113 | 114 | // Restore indices 115 | PartitionIndexRestore restore = new PartitionIndexRestore(TEMP_DIR, partition); 116 | restore.restore(); 117 | 118 | // Expected 119 | List expected = new ArrayList<>(); 120 | expected.add(records.get(1)); 121 | expected.add(records.get(3)); 122 | 123 | PartitionReader reader = new PartitionReader(TOPIC, partition, TEMP_DIR); 124 | assertEquals(expected, reader.readFully()); 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/test/java/de/azapps/kafkabackup/common/record/RecordSerdeTest.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.record; 2 | 3 | import org.apache.kafka.common.header.internals.RecordHeaders; 4 | import org.apache.kafka.common.record.TimestampType; 5 | import org.junit.jupiter.api.Test; 6 | 7 | import java.io.*; 8 | import java.nio.charset.StandardCharsets; 9 | 10 | import static org.junit.jupiter.api.Assertions.assertEquals; 11 | import static org.junit.jupiter.api.Assertions.assertNotEquals; 12 | 13 | public class RecordSerdeTest { 14 | 15 | private static final String TOPIC = "test-topic"; 16 | private static final int PARTITION = 42; 17 | private static final long OFFSET = 123; 18 | private static final byte[] KEY_BYTES = "test-key".getBytes(StandardCharsets.UTF_8); 19 | private static final byte[] VALUE_BYTES = "test-value".getBytes(StandardCharsets.UTF_8); 20 | private static final byte[] NULL_TIMESTAMP_BYTES = "null-timestamp".getBytes(StandardCharsets.UTF_8); 21 | 22 | private static final String SIMPLE_RECORD_FILE = "simple_record"; 23 | private static final String NULL_RECORD_FILE = "null_record"; 24 | private static final String EMPTY_RECORD_FILE = "empty_record"; 25 | private static final String HEADER_RECORD_FILE = "header_record"; 26 | 27 | // Example records 28 | private static final Record SIMPLE_RECORD, NULL_RECORD, EMPTY_RECORD, HEADER_RECORD, NULL_TIMESTAMP_RECORD; 29 | 30 | static { 31 | SIMPLE_RECORD = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET); 32 | NULL_RECORD = new Record(TOPIC, PARTITION, null, null, OFFSET); 33 | EMPTY_RECORD = new Record(TOPIC, PARTITION, new byte[0], new byte[0], OFFSET); 34 | NULL_TIMESTAMP_RECORD = new Record(TOPIC, PARTITION, NULL_TIMESTAMP_BYTES, null, OFFSET, null, TimestampType.CREATE_TIME); 35 | // Build multiple headers that might cause problems 36 | RecordHeaders headers = new RecordHeaders(); 37 | headers.add("", new byte[0]); 38 | headers.add("null", null); 39 | headers.add("value", VALUE_BYTES); 40 | HEADER_RECORD = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, null, TimestampType.NO_TIMESTAMP_TYPE, headers); 41 | } 42 | 43 | @Test 44 | public void roundtripTest() throws Exception { 45 | Record simpleRoundtrip = writeAndReadRecord(SIMPLE_RECORD); 46 | assertEquals(SIMPLE_RECORD, simpleRoundtrip); 47 | } 48 | 49 | @Test 50 | public void roundtripWithNull() throws Exception { 51 | Record nullRoundtrip = writeAndReadRecord(NULL_RECORD); 52 | assertEquals(NULL_RECORD, nullRoundtrip); 53 | 54 | Record emptyRoundtrip = writeAndReadRecord(EMPTY_RECORD); 55 | assertEquals(EMPTY_RECORD, emptyRoundtrip); 56 | 57 | // Must be different 58 | assertNotEquals(nullRoundtrip, emptyRoundtrip); 59 | } 60 | 61 | @Test 62 | public void roundtripNullTimestamp() throws Exception { 63 | Record nullTimestampRoundtrip =writeAndReadRecord(NULL_TIMESTAMP_RECORD); 64 | assertEquals(NULL_TIMESTAMP_RECORD, nullTimestampRoundtrip); 65 | } 66 | 67 | @Test 68 | public void roundtripHeaders() throws Exception { 69 | Record headerRoundtrip = writeAndReadRecord(HEADER_RECORD); 70 | assertEquals(HEADER_RECORD, headerRoundtrip); 71 | } 72 | 73 | /** 74 | * DO NOT CHANGE THIS TEST! 75 | */ 76 | @Test 77 | public void readV1() throws Exception { 78 | File v1Directory = new File("src/test/assets/v1/records"); 79 | Record simpleRecord = readFromFile(new File(v1Directory, SIMPLE_RECORD_FILE)); 80 | assertEquals(SIMPLE_RECORD, simpleRecord); 81 | Record nullRecord = readFromFile(new File(v1Directory, NULL_RECORD_FILE)); 82 | assertEquals(NULL_RECORD, nullRecord); 83 | assertNotEquals(SIMPLE_RECORD, nullRecord); // just to make sure! 84 | Record emptyRecord = readFromFile(new File(v1Directory, EMPTY_RECORD_FILE)); 85 | assertEquals(EMPTY_RECORD, emptyRecord); 86 | assertNotEquals(NULL_RECORD, emptyRecord); // just to make sure! 87 | Record headerRecord = readFromFile(new File(v1Directory, HEADER_RECORD_FILE)); 88 | assertEquals(HEADER_RECORD, headerRecord); 89 | assertNotEquals(EMPTY_RECORD, headerRecord); // just to make sure! 90 | } 91 | 92 | // UTILS 93 | 94 | private Record writeAndReadRecord(Record record) throws IOException { 95 | ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); 96 | RecordSerde.write(outputStream, record); 97 | byte[] data = outputStream.toByteArray(); 98 | return RecordSerde.read(TOPIC, PARTITION, new ByteArrayInputStream(data)); 99 | } 100 | 101 | private static Record readFromFile(File file) throws IOException { 102 | FileInputStream inputStream = new FileInputStream(file); 103 | return RecordSerde.read(TOPIC, PARTITION, inputStream); 104 | } 105 | 106 | /** 107 | * Utility function to be run once when the format on disk changes to be able to stay backwards-compatible 108 | *

109 | * Call it manually once when the format changes 110 | */ 111 | private static void writeTestRecordsToFile() throws IOException { 112 | File directory = new File("src/test/assets/v1/records"); // CHANGEME WHEN CHANGING DATA FORMAT! 113 | writeCurrentVersionRecordToFile(SIMPLE_RECORD, new File(directory, SIMPLE_RECORD_FILE)); 114 | writeCurrentVersionRecordToFile(NULL_RECORD, new File(directory, NULL_RECORD_FILE)); 115 | writeCurrentVersionRecordToFile(EMPTY_RECORD, new File(directory, EMPTY_RECORD_FILE)); 116 | writeCurrentVersionRecordToFile(HEADER_RECORD, new File(directory, HEADER_RECORD_FILE)); 117 | } 118 | 119 | private static void writeCurrentVersionRecordToFile(Record record, File file) throws IOException { 120 | FileOutputStream outputStream = new FileOutputStream(file); 121 | RecordSerde.write(outputStream, record); 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/test/java/de/azapps/kafkabackup/common/record/RecordTest.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.record; 2 | 3 | import org.apache.kafka.common.header.internals.RecordHeaders; 4 | import org.apache.kafka.common.record.TimestampType; 5 | import org.apache.kafka.connect.sink.SinkRecord; 6 | import org.junit.jupiter.api.Test; 7 | 8 | import java.nio.charset.StandardCharsets; 9 | import java.util.Arrays; 10 | 11 | import static org.junit.jupiter.api.Assertions.assertEquals; 12 | import static org.junit.jupiter.api.Assertions.assertNotEquals; 13 | 14 | public class RecordTest { 15 | private static final String TOPIC = "test-topic"; 16 | private static final int PARTITION = 42; 17 | private static final long OFFSET = 123; 18 | private static final TimestampType TIMESTAMP_TYPE = TimestampType.LOG_APPEND_TIME; 19 | private static final Long TIMESTAMP = 573831430000L; 20 | // encoding here is not really important, we just want some bytes 21 | private static final byte[] KEY_BYTES = "test-key".getBytes(StandardCharsets.UTF_8); 22 | private static final byte[] VALUE_BYTES = "test-value".getBytes(StandardCharsets.UTF_8); 23 | // Header fixtures: 24 | private static final byte[] HEADER_0_VALUE_BYTES = "header0-value".getBytes(StandardCharsets.UTF_8); 25 | private static final byte[] HEADER_1_VALUE_BYTES = "header1-value".getBytes(StandardCharsets.UTF_8); 26 | private static final RecordHeaders HEADERS = new RecordHeaders(); 27 | 28 | static { 29 | HEADERS.add("", new byte[0]); 30 | HEADERS.add("null", null); 31 | HEADERS.add("value0", HEADER_0_VALUE_BYTES); 32 | HEADERS.add("value1", HEADER_1_VALUE_BYTES); 33 | } 34 | 35 | @Test 36 | public void equalsIdentityTrueTest() { 37 | // GIVEN 38 | Record a = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, TIMESTAMP, TIMESTAMP_TYPE, HEADERS); 39 | 40 | // THEN 41 | assertEquals(a, a); 42 | } 43 | 44 | @Test 45 | public void equalsValueTrueTest() { 46 | // GIVEN 47 | Record a = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, TIMESTAMP, TIMESTAMP_TYPE, HEADERS); 48 | 49 | RecordHeaders bHeaders = new RecordHeaders(); 50 | bHeaders.add("", new byte[0]); 51 | bHeaders.add("null", null); 52 | bHeaders.add("value0", Arrays.copyOf(HEADER_0_VALUE_BYTES, HEADER_0_VALUE_BYTES.length)); 53 | bHeaders.add("value1", Arrays.copyOf(HEADER_1_VALUE_BYTES, HEADER_1_VALUE_BYTES.length)); 54 | Record b = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, TIMESTAMP, TIMESTAMP_TYPE, bHeaders); 55 | 56 | // THEN 57 | assertEquals(a, b); 58 | assertEquals(b, a); 59 | } 60 | 61 | @Test 62 | public void equalsFalseBecauseStrictSubsetTest() { 63 | // GIVEN 64 | Record a = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, TIMESTAMP, TIMESTAMP_TYPE, HEADERS); 65 | Record b = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET); 66 | 67 | // THEN 68 | assertNotEquals(a, b); 69 | assertNotEquals(b, a); 70 | } 71 | 72 | @Test 73 | public void equalsFalseBecauseHeadersStrictSubsetTest() { 74 | // GIVEN 75 | RecordHeaders aHeaders = new RecordHeaders(); 76 | aHeaders.add("header0-key", Arrays.copyOf(HEADER_0_VALUE_BYTES, HEADER_0_VALUE_BYTES.length)); 77 | aHeaders.add("header1-key", Arrays.copyOf(HEADER_1_VALUE_BYTES, HEADER_1_VALUE_BYTES.length)); 78 | Record a = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, TIMESTAMP, TIMESTAMP_TYPE, aHeaders); 79 | 80 | RecordHeaders bHeaders = new RecordHeaders(); 81 | bHeaders.add("header0-key", Arrays.copyOf(HEADER_0_VALUE_BYTES, HEADER_0_VALUE_BYTES.length)); 82 | Record b = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, TIMESTAMP, TIMESTAMP_TYPE, bHeaders); 83 | 84 | 85 | RecordHeaders cHeaders = new RecordHeaders(); 86 | cHeaders.add("header1-key", Arrays.copyOf(HEADER_0_VALUE_BYTES, HEADER_0_VALUE_BYTES.length)); 87 | cHeaders.add("header1-key", Arrays.copyOf(HEADER_1_VALUE_BYTES, HEADER_1_VALUE_BYTES.length)); 88 | Record c = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, TIMESTAMP, TIMESTAMP_TYPE, cHeaders); 89 | 90 | // THEN 91 | assertNotEquals(a, b); 92 | assertNotEquals(b, a); 93 | assertNotEquals(a, c); 94 | assertNotEquals(b, c); 95 | } 96 | 97 | /** 98 | * This is not used during normal operations, but we need to verify that this works 99 | * correctly as we use the functions for our end to end tests! 100 | */ 101 | @Test 102 | public void roundtripSinkRecordTest() { 103 | 104 | // given 105 | Record a = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, TIMESTAMP, TIMESTAMP_TYPE, HEADERS); 106 | Record b = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, 3, null, TimestampType.NO_TIMESTAMP_TYPE, HEADERS); 107 | Record c = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, 0); 108 | Record d = new Record(TOPIC, PARTITION, null, null, 1); 109 | Record e = new Record(TOPIC, PARTITION, new byte[0], new byte[0], 2); 110 | Record f = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET); 111 | 112 | // transform 113 | SinkRecord srA = a.toSinkRecord(); 114 | SinkRecord srB = b.toSinkRecord(); 115 | SinkRecord srC = c.toSinkRecord(); 116 | SinkRecord srD = d.toSinkRecord(); 117 | SinkRecord srE = e.toSinkRecord(); 118 | SinkRecord srF = f.toSinkRecord(); 119 | 120 | // expect 121 | assertEquals(a, Record.fromSinkRecord(srA)); 122 | assertEquals(b, Record.fromSinkRecord(srB)); 123 | assertEquals(c, Record.fromSinkRecord(srC)); 124 | assertEquals(d, Record.fromSinkRecord(srD)); 125 | assertEquals(e, Record.fromSinkRecord(srE)); 126 | assertEquals(f, Record.fromSinkRecord(srF)); 127 | 128 | 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/test/java/de/azapps/kafkabackup/common/segment/SegmentIndexTest.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.common.segment; 2 | 3 | import de.azapps.kafkabackup.common.TestUtils; 4 | import de.azapps.kafkabackup.common.record.Record; 5 | import org.apache.kafka.common.record.TimestampType; 6 | import org.junit.jupiter.api.Test; 7 | 8 | import java.nio.charset.StandardCharsets; 9 | import java.nio.file.Files; 10 | import java.nio.file.Path; 11 | import java.nio.file.Paths; 12 | import java.util.ArrayList; 13 | import java.util.List; 14 | import java.util.Optional; 15 | 16 | import static org.junit.jupiter.api.Assertions.*; 17 | 18 | public class SegmentIndexTest { 19 | private static final String TOPIC = "test-topic"; 20 | private static final byte[] KEY_BYTES = "test-key".getBytes(StandardCharsets.UTF_8); 21 | private static final byte[] VALUE_BYTES = "test-value".getBytes(StandardCharsets.UTF_8); 22 | private static final SegmentIndexEntry ENTRY1 = new SegmentIndexEntry(0, 1, 10); 23 | private static final SegmentIndexEntry ENTRY2 = new SegmentIndexEntry(1, 11, 10); 24 | private static final SegmentIndexEntry ENTRY3 = new SegmentIndexEntry(5, 21, 15); 25 | private static final SegmentIndexEntry ENTRY4 = new SegmentIndexEntry(6, 36, 10); 26 | private static final Path TEMP_DIR = TestUtils.getTestDir("SegmentIndexTest"); 27 | 28 | @Test 29 | public void simpleRoundtripTest() throws Exception { 30 | String indexFile = "simpleRoundtripTestIndex"; 31 | SegmentIndex index = new SegmentIndex(Paths.get(TEMP_DIR.toString(), indexFile)); 32 | assertEquals(0L, index.lastValidStartPosition()); 33 | List entries = new ArrayList<>(); 34 | entries.add(ENTRY1); 35 | index.addEntry(ENTRY1); 36 | entries.add(ENTRY2); 37 | index.addEntry(ENTRY2); 38 | entries.add(ENTRY3); 39 | index.addEntry(ENTRY3); 40 | entries.add(ENTRY4); 41 | index.addEntry(ENTRY4); 42 | 43 | index.close(); 44 | 45 | 46 | SegmentIndex b = new SegmentIndex(Paths.get(TEMP_DIR.toString(), indexFile)); 47 | assertEquals(entries, b.index()); 48 | assertEquals(Optional.of(ENTRY3.recordFilePosition()), b.findByOffset(5)); 49 | assertEquals(Optional.of(ENTRY3.recordFilePosition()), b.findEarliestWithHigherOrEqualOffset(2)); 50 | assertEquals(Optional.empty(), b.findEarliestWithHigherOrEqualOffset(11)); 51 | assertEquals(36, b.lastValidStartPosition()); 52 | } 53 | 54 | @Test 55 | public void writeRecordThenCheckIndex() throws Exception { 56 | int partition = 0; 57 | SegmentWriter segmentWriter = new SegmentWriter(TOPIC, partition, 0, TEMP_DIR); 58 | segmentWriter.append(new Record(TOPIC, partition, KEY_BYTES, VALUE_BYTES, 0)); 59 | 60 | SegmentIndex i1 = new SegmentIndex(SegmentUtils.indexFile(TEMP_DIR, partition, 0)); 61 | assertEquals(1, i1.size()); 62 | 63 | 64 | segmentWriter.append(new Record(TOPIC, partition, null, null, 1)); 65 | segmentWriter.append(new Record(TOPIC, partition, new byte[0], new byte[0], 2)); 66 | segmentWriter.close(); 67 | 68 | SegmentIndex i2 = new SegmentIndex(SegmentUtils.indexFile(TEMP_DIR, partition, 0)); 69 | assertEquals(3, i2.size()); 70 | long fileLength = SegmentUtils.recordsFile(TEMP_DIR, partition, 0).toFile().length(); 71 | //noinspection OptionalGetWithoutIsPresent 72 | SegmentIndexEntry entry = i2.lastIndexEntry().get(); 73 | assertEquals(fileLength, entry.recordFilePosition() + entry.recordByteLength()); 74 | } 75 | 76 | @Test 77 | public void restoreTest() throws Exception { 78 | int partition = 1; 79 | List records = new ArrayList<>(); 80 | records.add(new Record(TOPIC, partition, KEY_BYTES, VALUE_BYTES, 0)); 81 | records.add(new Record(TOPIC, partition, null, null, 1)); 82 | records.add(new Record(TOPIC, partition, new byte[0], new byte[0], 2)); 83 | records.add(new Record(TOPIC, partition, KEY_BYTES, VALUE_BYTES, 3, null, TimestampType.NO_TIMESTAMP_TYPE)); 84 | 85 | SegmentWriter segmentWriter = new SegmentWriter(TOPIC, partition, 0, TEMP_DIR); 86 | for (Record record : records) { 87 | segmentWriter.append(record); 88 | } 89 | segmentWriter.close(); 90 | Path indexFile = SegmentUtils.indexFile(TEMP_DIR, partition, 0); 91 | SegmentIndex a = new SegmentIndex(indexFile); 92 | Files.delete(indexFile); 93 | SegmentIndexRestore restore = new SegmentIndexRestore(SegmentUtils.recordsFile(TEMP_DIR, partition, 0)); 94 | restore.restore(); 95 | SegmentIndex b = new SegmentIndex(indexFile); 96 | assertEquals(a.index(), b.index()); 97 | } 98 | 99 | @Test 100 | public void incrementingIndex() throws Exception { 101 | String indexFile = "incrementingIndex"; 102 | SegmentIndex index = new SegmentIndex(Paths.get(TEMP_DIR.toString(), indexFile)); 103 | index.addEntry(new SegmentIndexEntry(5, 22, 15)); 104 | // Wrong offset 105 | assertThrows(SegmentIndex.IndexException.class, 106 | () -> index.addEntry(new SegmentIndexEntry(0, 37, 10))); 107 | // Should be ok 108 | assertDoesNotThrow(() -> index.addEntry(new SegmentIndexEntry(10, 37, 10))); 109 | index.close(); 110 | } 111 | 112 | @Test 113 | public void emptyIndexTest() throws Exception { 114 | String indexFile = "emptyIndexTest"; 115 | SegmentIndex index = new SegmentIndex(Paths.get(TEMP_DIR.toString(), indexFile)); 116 | assertEquals(0L, index.lastValidStartPosition()); 117 | index.close(); 118 | 119 | 120 | SegmentIndex b = new SegmentIndex(Paths.get(TEMP_DIR.toString(), indexFile)); 121 | assertEquals(0L, b.lastValidStartPosition()); 122 | assertEquals(Optional.empty(), b.findEarliestWithHigherOrEqualOffset(0)); 123 | assertEquals(Optional.empty(), b.findEarliestWithHigherOrEqualOffset(11)); 124 | assertTrue(b.index().isEmpty()); 125 | } 126 | 127 | 128 | @Test 129 | public void testReadV1Index() throws Exception { 130 | String indexFile = "testIndex"; 131 | Path directory = Paths.get("src/test/assets/v1/segmentindex"); 132 | List entries = new ArrayList<>(); 133 | entries.add(ENTRY1); 134 | entries.add(ENTRY2); 135 | entries.add(ENTRY3); 136 | entries.add(ENTRY4); 137 | 138 | SegmentIndex b = new SegmentIndex(Paths.get(directory.toString(), indexFile)); 139 | assertEquals(entries, b.index()); 140 | assertEquals(Optional.of(ENTRY3.recordFilePosition()), b.findByOffset(5)); 141 | assertEquals(Optional.of(ENTRY3.recordFilePosition()), b.findEarliestWithHigherOrEqualOffset(2)); 142 | assertEquals(36, b.lastValidStartPosition()); 143 | } 144 | 145 | 146 | /** 147 | * Utility function to be run once when the format on disk changes to be able to stay backwards-compatible 148 | *

149 | * Call it manually once when the format changes 150 | */ 151 | private static void writeTestIndexToFile() throws Exception { 152 | String indexFile = "testIndex"; 153 | Path directory = Paths.get("src/test/assets/v1/segmentindex"); // CHANGEME WHEN CHANGING DATA FORMAT! 154 | Files.createDirectories(directory); 155 | 156 | SegmentIndex index = new SegmentIndex(Paths.get(directory.toString(), indexFile)); 157 | index.addEntry(ENTRY1); 158 | index.addEntry(ENTRY2); 159 | index.addEntry(ENTRY3); 160 | index.addEntry(ENTRY4); 161 | index.close(); 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /src/test/java/de/azapps/kafkabackup/sink/MockEndOffsetReader.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.sink; 2 | 3 | import de.azapps.kafkabackup.common.offset.EndOffsetReader; 4 | import org.apache.kafka.common.TopicPartition; 5 | 6 | import java.util.Collection; 7 | import java.util.HashMap; 8 | import java.util.Map; 9 | 10 | public class MockEndOffsetReader extends EndOffsetReader { 11 | private Map offsets; 12 | public MockEndOffsetReader(Map offsets) { 13 | super(new HashMap<>()); 14 | this.offsets = offsets; 15 | } 16 | @Override 17 | public Map getEndOffsets(Collection partitions) { 18 | return offsets; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/test/java/de/azapps/kafkabackup/sink/MockOffsetSink.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.sink; 2 | 3 | import de.azapps.kafkabackup.common.offset.OffsetSink; 4 | import org.apache.kafka.clients.admin.AdminClient; 5 | 6 | import java.io.IOException; 7 | import java.nio.file.Path; 8 | 9 | public class MockOffsetSink extends OffsetSink { 10 | public MockOffsetSink(AdminClient adminClient, Path targetDir) { 11 | super(adminClient, targetDir); 12 | } 13 | 14 | @Override 15 | public void syncConsumerGroups() { 16 | 17 | } 18 | 19 | @Override 20 | public void syncOffsets() throws IOException { 21 | } 22 | 23 | @Override 24 | public void flush() throws IOException { 25 | } 26 | 27 | @Override 28 | public void close() throws IOException { 29 | } 30 | } 31 | 32 | -------------------------------------------------------------------------------- /src/test/java/de/azapps/kafkabackup/sink/MockSinkTaskContext.java: -------------------------------------------------------------------------------- 1 | package de.azapps.kafkabackup.sink; 2 | 3 | import org.apache.kafka.common.TopicPartition; 4 | import org.apache.kafka.connect.sink.SinkTaskContext; 5 | 6 | import java.util.HashMap; 7 | import java.util.Map; 8 | import java.util.Set; 9 | 10 | public class MockSinkTaskContext implements SinkTaskContext { 11 | 12 | @Override 13 | public Map configs() { 14 | return new HashMap<>(); 15 | } 16 | 17 | @Override 18 | public void offset(Map offsets) { 19 | 20 | } 21 | 22 | @Override 23 | public void offset(TopicPartition tp, long offset) { 24 | 25 | } 26 | 27 | @Override 28 | public void timeout(long timeoutMs) { 29 | 30 | } 31 | 32 | @Override 33 | public Set assignment() { 34 | return null; 35 | } 36 | 37 | @Override 38 | public void pause(TopicPartition... partitions) { 39 | 40 | } 41 | 42 | @Override 43 | public void resume(TopicPartition... partitions) { 44 | 45 | } 46 | 47 | @Override 48 | public void requestCommit() { 49 | 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /system_test/.gitignore: -------------------------------------------------------------------------------- 1 | out -------------------------------------------------------------------------------- /system_test/01_simple_roundtrip_test.yaml: -------------------------------------------------------------------------------- 1 | # Test description 2 | # 3 | # Create two topics: 4 | # * backup-test-1partition: 1 partition 5 | # * backup-test-empty-topic: 3 partitions 6 | # 7 | # * Fill the 1partition topic with 300 messages á 10KB each in every partition 8 | # * Consume the messages with three consumer groups: cg-100: consume 100 messages, cg-200 consume 200 messages, cg-300 consume 300 messages 9 | # * Fill one of the 3 partitions of empty-topic with 300 messages. Leave the other partitions empty 10 | # 11 | # * Take a backup 12 | # * Stop the Cluster 13 | # 14 | # * Start the backup 15 | # * Verify that all messages have been written correctly 16 | # * Check the consumer offsets 17 | # 18 | # * Stop everything 19 | - name: coyote 20 | title: kafka-backup 21 | 22 | - name: Setup Cluster to Backup 23 | entries: 24 | - name: Docker Compose Up 25 | command: docker-compose up -d 26 | - name: Clean previous data 27 | command: docker run -v /tmp/kafka-backup/:/kafka-backup/ kafka-backup-dev:latest rm -rf "/kafka-backup/001_simple_1partition_test/" 28 | - name: Wait for Kafka to get up 29 | command: docker logs to-backup-kafka 2>&1 | grep -q '\[KafkaServer id=1\] started' 30 | timeout: 30s 31 | 32 | - name: Create Topic for tests 33 | entries: 34 | - command: docker-compose exec -T to-backup-kafka bash -c ' 35 | utils.py create_topic --topic backup-test-1partition --partitions 1 && 36 | utils.py create_topic --topic backup-test-weird-msgs --partitions 1 && 37 | utils.py create_topic --topic backup-test-empty-topic --partitions 3' 38 | - name: Produce Messages 39 | entries: 40 | - name: Produce 300 messages 41 | command: docker-compose exec -T to-backup-kafka bash -c ' 42 | utils.py produce_messages --topic backup-test-1partition --partition 0 --start_num 0 --count 300 && 43 | utils.py produce_messages --topic backup-test-empty-topic --partition 0 --start_num 0 --count 300' 44 | - name: Produce 'weird' messages 45 | command: docker-compose exec -T to-backup-kafka bash -c ' 46 | utils.py produce_weird_messages --partition 0 --topic backup-test-weird-msgs' 47 | - name: Consume messages 48 | entries: 49 | - name: Consume 100 messages with cg-100 50 | command: docker-compose exec -T to-backup-kafka 51 | utils.py consume_messages --topic backup-test-1partition --consumer_group cg-100 --count 100 52 | - name: Consume 200 messages with cg-200 53 | command: docker-compose exec -T to-backup-kafka 54 | utils.py consume_messages --topic backup-test-1partition --consumer_group cg-200 --count 200 55 | - name: Consume 300 messages with cg-300 56 | command: docker-compose exec -T to-backup-kafka 57 | utils.py consume_messages --topic backup-test-1partition --consumer_group cg-300 --count 300 58 | - name: Check Consumer Group Offsets 59 | entries: 60 | - name: Count Messages 61 | command: docker-compose exec -T to-backup-kafka 62 | utils.py count_messages 63 | stdout_has: 64 | - "backup-test-1partition 0: 300" 65 | - "backup-test-empty-topic 0: 300" 66 | - "backup-test-empty-topic 1: 0" 67 | - "backup-test-empty-topic 2: 0" 68 | - name: Check Consumer Group cg-100 69 | command: docker-compose exec -T to-backup-kafka 70 | kafka-consumer-groups.sh --bootstrap-server localhost:9092 --describe --group cg-100 71 | stdout_has: [ 'backup-test-1partition 0 100 300 200' ] 72 | - name: Check Consumer Group cg-200 73 | command: docker-compose exec -T to-backup-kafka 74 | kafka-consumer-groups.sh --bootstrap-server localhost:9092 --describe --group cg-200 75 | stdout_has: [ 'backup-test-1partition 0 200 300 100' ] 76 | - name: Check Consumer Group cg-200 77 | command: docker-compose exec -T to-backup-kafka 78 | kafka-consumer-groups.sh --bootstrap-server localhost:9092 --describe --group cg-300 79 | stdout_has: [ 'backup-test-1partition 0 300 300 0' ] 80 | 81 | - name: Start Kafka Backup 82 | entries: 83 | - name: Start Kafka Backup 84 | command: > 85 | docker run -d -v /tmp/kafka-backup/:/kafka-backup/ --net=system_test_to-backup -p 18083:8083 --name to-backup --rm 86 | kafka-backup-dev:latest backup-standalone.sh --bootstrap-server to-backup-kafka:9092 87 | --target-dir /kafka-backup/001_simple_1partition_test/ --topics-regex 'backup-test.*' 88 | - command: sleep 30 89 | nolog: true 90 | - name: Check For errors 91 | timeout: 300s 92 | command: docker exec to-backup curl -vs "http://localhost:8083/connectors/backup-sink/status" 93 | stderr_has: ["200 OK"] 94 | stdout_has: ["RUNNING"] 95 | stdout_not_has: ["FAILED"] 96 | 97 | - name: Stop Cluster that was backed up 98 | entries: 99 | - name: Stop Kafka Backup 100 | command: docker kill to-backup 101 | - name: Docker Compose Down 102 | command: docker-compose stop to-backup-kafka 103 | 104 | - name: Restore 105 | entries: 106 | - name: Create Topic 107 | command: docker-compose exec -T restore-to-kafka bash -c ' 108 | utils.py create_topic --topic backup-test-1partition --partitions 1 && 109 | utils.py create_topic --topic backup-test-weird-msgs --partitions 1 && 110 | utils.py create_topic --topic backup-test-empty-topic --partitions 3' 111 | - name: Run Kafka Restore 112 | command: > 113 | docker run -v /tmp/kafka-backup/:/kafka-backup/ --net=system_test_restore-to --name restore-to --rm 114 | kafka-backup-dev:latest restore-standalone.sh --bootstrap-server restore-to-kafka:9092 115 | --source-dir /kafka-backup/001_simple_1partition_test/ --topics 'backup-test-1partition,backup-test-empty-topic,backup-test-weird-msgs' 116 | timeout: 60s 117 | stdout_has: ['All records read.'] 118 | 119 | - name: Verify Backup 120 | entries: 121 | - name: Verify Records 122 | timeout: 30s 123 | command: docker-compose exec -T restore-to-kafka bash -c ' 124 | utils.py consume_verify_messages --topic backup-test-1partition --partition 0 --count 300 && 125 | utils.py consume_verify_messages --topic backup-test-empty-topic --partition 0 --count 300' 126 | - name: Verify Weird Records 127 | timeout: 15s 128 | command: docker-compose exec -T restore-to-kafka bash -c ' 129 | utils.py consume_verify_weird_messages --partition 0 --topic backup-test-weird-msgs' 130 | - name: Count Messages 131 | timeout: 30s 132 | command: docker-compose exec -T restore-to-kafka 133 | utils.py count_messages 134 | stdout_has: 135 | - "backup-test-1partition 0: 300" 136 | - "backup-test-empty-topic 0: 300" 137 | - "backup-test-empty-topic 1: 0" 138 | - "backup-test-empty-topic 2: 0" 139 | - name: Check Consumer Group cg-100 140 | timeout: 30s 141 | command: docker-compose exec -T restore-to-kafka 142 | kafka-consumer-groups.sh --bootstrap-server localhost:9092 --describe --group cg-100 143 | stdout_has: [ 'backup-test-1partition 0 100' ] 144 | - name: Check Consumer Group cg-200 145 | timeout: 30s 146 | command: docker-compose exec -T restore-to-kafka 147 | kafka-consumer-groups.sh --bootstrap-server localhost:9092 --describe --group cg-200 148 | stdout_has: [ 'backup-test-1partition 0 200' ] 149 | - name: Check Consumer Group cg-200 150 | timeout: 30s 151 | command: docker-compose exec -T restore-to-kafka 152 | kafka-consumer-groups.sh --bootstrap-server localhost:9092 --describe --group cg-300 153 | stdout_has: [ 'backup-test-1partition 0 300' ] 154 | 155 | - name: Clean-up Containers 156 | entries: 157 | - name: Docker Compose Down 158 | command: docker-compose down 159 | timeout: 15s 160 | -------------------------------------------------------------------------------- /system_test/04_delete_old_segments.yaml: -------------------------------------------------------------------------------- 1 | # * Create a backup with multiple segments 2 | # * Delete some old segments 3 | # * Delete all indexes 4 | # * Recreate all indexes 5 | # * Do a restore 6 | - name: coyote 7 | title: kafka-backup 8 | 9 | - name: Setup Cluster to Backup 10 | entries: 11 | - name: Docker Compose Up 12 | command: docker-compose up -d 13 | - name: Clean previous data 14 | command: docker run -v /tmp/kafka-backup/:/kafka-backup/ kafka-backup-dev:latest rm -rf "/kafka-backup/04_delete_old_segment/" 15 | - name: Wait for Kafka to get up 16 | command: docker logs to-backup-kafka 2>&1 | grep -q '\[KafkaServer id=1\] started' 17 | timeout: 30s 18 | 19 | - name: Create Topic for tests 20 | entries: 21 | - command: docker-compose exec -T to-backup-kafka 22 | utils.py create_topic --topic backup-test-1partition --partitions 1 23 | - name: Produce Messages 24 | entries: 25 | - name: Produce 3000 messages 26 | command: docker-compose exec -T to-backup-kafka 27 | utils.py produce_messages --topic backup-test-1partition --partition 0 --start_num 0 --count 3000 28 | - name: Count Messages 29 | command: docker-compose exec -T to-backup-kafka 30 | utils.py count_messages 31 | stdout_has: 32 | - "backup-test-1partition 0: 3000" 33 | 34 | - name: Start Kafka Backup 35 | entries: 36 | - name: Start Kafka Backup 37 | command: > 38 | docker run -d -v /tmp/kafka-backup/:/kafka-backup/ --net=system_test_to-backup --name to-backup --rm 39 | kafka-backup-dev:latest backup-standalone.sh --bootstrap-server to-backup-kafka:9092 40 | --target-dir /kafka-backup/04_delete_old_segment/ --topics-regex 'backup-test.*' --max-segment-size 10485760 41 | - command: sleep 30 42 | nolog: true 43 | 44 | - name: Stop Cluster that was backed up 45 | entries: 46 | - name: Stop Kafka Backup 47 | command: docker kill to-backup 48 | - name: Docker Compose Down 49 | command: docker-compose stop to-backup-kafka 50 | 51 | - name: Delete old segment and restore the index 52 | entries: 53 | - name: Delete all indexes 54 | command: docker run -v /tmp/kafka-backup/:/kafka-backup/ kafka-backup-dev:latest bash -c \ 55 | 'rm /kafka-backup/04_delete_old_segment/backup-test-1partition/*index*' 56 | - name: Delete old segment 57 | command: docker run -v /tmp/kafka-backup/:/kafka-backup/ kafka-backup-dev:latest bash -c \ 58 | 'rm /kafka-backup/04_delete_old_segment/backup-test-1partition/segment_partition_000_from_offset_0000000000_records' 59 | - name: Restore segment and partition indexes 60 | command: > 61 | docker run -v /tmp/kafka-backup/:/kafka-backup/ kafka-backup-dev:latest bash -c ' 62 | export TOPICDIR="/kafka-backup/04_delete_old_segment/backup-test-1partition/" && 63 | export CLASSPATH="/connect-plugins/kafka-backup.jar" && 64 | for f in "$TOPICDIR"/segment_partition_*_records ; do 65 | segment-index.sh --restore-index \ 66 | --segment $f 67 | done && 68 | partition-index.sh --restore --partition 0 --topic-dir "$TOPICDIR"' 69 | 70 | - name: Restore 71 | entries: 72 | - name: Create Topic 73 | command: docker-compose exec -T restore-to-kafka 74 | utils.py create_topic --topic backup-test-1partition --partitions 1 75 | - name: Run Kafka Restore 76 | command: > 77 | docker run -v /tmp/kafka-backup/:/kafka-backup/ --net=system_test_restore-to --name restore-to --rm 78 | kafka-backup-dev:latest restore-standalone.sh --bootstrap-server restore-to-kafka:9092 79 | --source-dir /kafka-backup/04_delete_old_segment/ 80 | --topics 'backup-test-1partition' 81 | timeout: 60s 82 | stdout_has: ['All records read.'] 83 | 84 | - name: Verify Backup 85 | entries: 86 | - name: Count Messages 87 | command: docker-compose exec -T restore-to-kafka 88 | utils.py count_messages 89 | stdout_has: 90 | - "backup-test-1partition 0: 1959" 91 | - name: Verify Records 92 | command: docker-compose exec -T restore-to-kafka 93 | utils.py consume_verify_messages --topic backup-test-1partition --partition 0 --start_num 1041 --count 1959 94 | 95 | - name: Clean-up Containers 96 | entries: 97 | - name: Docker Compose Down 98 | command: docker-compose down 99 | -------------------------------------------------------------------------------- /system_test/README.md: -------------------------------------------------------------------------------- 1 | ## Usage 2 | 3 | Install coyote 4 | 5 | ```sh 6 | go get github.com/landoop/coyote 7 | ``` 8 | 9 | Build Kafka Backup (from the root directory): 10 | 11 | ```sh 12 | ./gradlew shadowJar 13 | ``` 14 | 15 | Then, just run coyote inside this directory. 16 | 17 | ``` 18 | coyote 19 | ``` 20 | 21 | When finished, open `coyote.html`. 22 | 23 | ## Software 24 | 25 | You need these programs to run the test: 26 | - [Coyote](https://github.com/Landoop/coyote/releases) 27 | - [Docker](https://docs.docker.com/engine/installation/) 28 | - [Docker Compose](https://docs.docker.com/engine/installation/) 29 | 30 | Everything else is set up automatically inside containers. -------------------------------------------------------------------------------- /system_test/archive/001_simple_1partition_test/connect-backup-sink.properties: -------------------------------------------------------------------------------- 1 | name=backup-sink 2 | connector.class=de.azapps.kafkabackup.sink.BackupSinkConnector 3 | tasks.max=1 4 | topics.regex=backup-test.* 5 | key.converter=org.apache.kafka.connect.converters.ByteArrayConverter 6 | value.converter=org.apache.kafka.connect.converters.ByteArrayConverter 7 | header.converter=org.apache.kafka.connect.converters.ByteArrayConverter 8 | target.dir=/tmp/kafka-backup/001_simple_1partition_test/ 9 | # 10MiB 10 | max.segment.size.bytes=10485760 11 | cluster.bootstrap.servers=localhost:9092 -------------------------------------------------------------------------------- /system_test/archive/001_simple_1partition_test/connect-backup-source.properties: -------------------------------------------------------------------------------- 1 | name=backup-source 2 | connector.class=de.azapps.kafkabackup.source.BackupSourceConnector 3 | tasks.max=1 4 | topics=backup-test-1partition,backup-test-empty 5 | key.converter=org.apache.kafka.connect.converters.ByteArrayConverter 6 | value.converter=org.apache.kafka.connect.converters.ByteArrayConverter 7 | header.converter=org.apache.kafka.connect.converters.ByteArrayConverter 8 | source.dir=/tmp/kafka-backup/001_simple_1partition_test/ 9 | batch.size=1000 10 | cluster.bootstrap.servers=localhost:9092 11 | cluster.key.deserializer=org.apache.kafka.common.serialization.ByteArrayDeserializer 12 | cluster.value.deserializer=org.apache.kafka.common.serialization.ByteArrayDeserializer -------------------------------------------------------------------------------- /system_test/archive/001_simple_1partition_test/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | SCRIPT_DIR=$(dirname $0) 3 | DATADIR=/tmp/kafka-backup/001_simple_1partition_test/ 4 | source $SCRIPT_DIR/../utils.sh 5 | 6 | # Uses confluent cli 7 | # 8 | # * Stop kafka if its running 9 | kafka_stop 10 | # * Delete all data 11 | kafka_delete_data 12 | rm -rf $DATADIR 13 | mkdir -p $DATADIR 14 | 15 | 16 | ########################## Generate Data 17 | # * Start Kafka 18 | kafka_start 19 | # * Configure following topics: 20 | # * backup-test-1partition 21 | create_topic backup-test-1partition 1 22 | produce_messages backup-test-1partition 0 0 300 23 | consume_messages backup-test-1partition cg-100 100 24 | consume_messages backup-test-1partition cg-200 200 25 | consume_messages backup-test-1partition cg-300 300 26 | 27 | # 1 empty partition, one full 28 | create_topic backup-test-empty 3 29 | produce_messages backup-test-empty 0 0 300 30 | 31 | 32 | 33 | ########################## Backup 34 | # * Start Kafka Connect distributed 35 | kafka_connect_start 36 | # * Configure backup-sink: 37 | # * segment size: 10MiB 38 | # * topics.regex: backup-test-1partition 39 | kafka_connect_load_connector 001_simple_1partition_test_sink "$SCRIPT_DIR/connect-backup-sink.properties" 40 | # * Wait a few minutes 41 | sleep $((60*5)) 42 | 43 | 44 | ########################## Destroy & Restore Cluster 45 | # * Stop Kafka 46 | kafka_stop 47 | # * Delete all data 48 | kafka_delete_data 49 | # * Start Kafka 50 | kafka_start 51 | # * Create all 3 topics as above (we are not testing zookeeper backup!) 52 | create_topic backup-test-1partition 1 53 | create_topic backup-test-empty 3 54 | 55 | 56 | ########################## Restore topic 57 | # * Start Kafka Connect distributed 58 | kafka_connect_start 59 | # * Configure backup-source 60 | kafka_connect_load_connector 001_simple_1partition_test_source $SCRIPT_DIR/connect-backup-source.properties 61 | kafka_connect_unload_connector 001_simple_1partition_test_source 62 | 63 | consume_verify_messages backup-test-1partition 0 300 64 | -------------------------------------------------------------------------------- /system_test/archive/backup_with_burry/connect-backup-sink.properties: -------------------------------------------------------------------------------- 1 | name=backup-sink 2 | connector.class=de.azapps.kafkabackup.sink.BackupSinkConnector 3 | tasks.max=1 4 | topics.regex=backup-test.* 5 | key.converter=org.apache.kafka.connect.converters.ByteArrayConverter 6 | value.converter=org.apache.kafka.connect.converters.ByteArrayConverter 7 | header.converter=org.apache.kafka.connect.converters.ByteArrayConverter 8 | target.dir=/tmp/kafka-backup/backup_with_burry/topics 9 | # 10MiB 10 | max.segment.size.bytes=10485760 11 | cluster.bootstrap.servers=localhost:9092 -------------------------------------------------------------------------------- /system_test/archive/backup_with_burry/connect-backup-source.properties: -------------------------------------------------------------------------------- 1 | name=backup-source 2 | connector.class=de.azapps.kafkabackup.source.BackupSourceConnector 3 | tasks.max=1 4 | topics=backup-test-1partition 5 | key.converter=org.apache.kafka.connect.converters.ByteArrayConverter 6 | value.converter=org.apache.kafka.connect.converters.ByteArrayConverter 7 | header.converter=org.apache.kafka.connect.converters.ByteArrayConverter 8 | source.dir=/tmp/kafka-backup/backup_with_burry/topics 9 | batch.size=1000 10 | cluster.bootstrap.servers=localhost:9092 11 | cluster.key.deserializer=org.apache.kafka.common.serialization.ByteArrayDeserializer 12 | cluster.value.deserializer=org.apache.kafka.common.serialization.ByteArrayDeserializer -------------------------------------------------------------------------------- /system_test/archive/backup_with_burry/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | SCRIPT_DIR=$(dirname $0) 3 | DATADIR=/tmp/kafka-backup/backup_with_burry 4 | source $SCRIPT_DIR/../utils.sh 5 | 6 | # Uses confluent cli 7 | # 8 | # * Stop kafka if its running 9 | kafka_stop 10 | # * Delete all data 11 | kafka_delete_data 12 | rm -rf $DATADIR 13 | mkdir -p $DATADIR/{burry,topics} 14 | 15 | 16 | ########################## Generate Data 17 | # * Start Kafka 18 | kafka_start 19 | # * Configure following topics: 20 | # * backup-test-1partition 21 | create_topic backup-test-1partition 1 22 | produce_messages backup-test-1partition 0 0 300 23 | consume_messages backup-test-1partition cg-100 100 24 | consume_messages backup-test-1partition cg-200 200 25 | consume_messages backup-test-1partition cg-300 300 26 | 27 | 28 | ########################## Backup 29 | # * Start Kafka Connect distributed 30 | kafka_connect_start 31 | # * Configure backup-sink: 32 | # * segment size: 10MiB 33 | # * topics.regex: backup-test-1partition 34 | kafka_connect_load_connector 001_simple_1partition_test_sink "$SCRIPT_DIR/connect-backup-sink.properties" 35 | # * Wait a few minutes 36 | sleep $((60*5)) 37 | 38 | ########################## Backup Zookeeper 39 | burry_backup $DATADIR/burry 40 | 41 | ########################## Destroy & Restore Cluster 42 | # * Stop Kafka 43 | kafka_stop 44 | # * Delete all data 45 | kafka_delete_data 46 | # * Start Kafka 47 | kafka_start 48 | 49 | ########################## Restore Zookeeper 50 | burry_restore $DATADIR/burry 51 | # Restart Kafka 52 | kafka_stop 53 | kafka_start 54 | 55 | 56 | ########################## Restore topic 57 | # * Start Kafka Connect distributed 58 | kafka_connect_start 59 | # * Configure backup-source 60 | kafka_connect_load_connector 001_simple_1partition_test_source $SCRIPT_DIR/connect-backup-source.properties 61 | sleep $((60*5)) 62 | kafka_connect_unload_connector 001_simple_1partition_test_source 63 | 64 | consume_verify_messages backup-test-1partition 0 300 65 | -------------------------------------------------------------------------------- /system_test/archive/full_test/connect-backup-sink.properties: -------------------------------------------------------------------------------- 1 | name=backup-sink 2 | connector.class=de.azapps.kafkabackup.sink.BackupSinkConnector 3 | tasks.max=1 4 | topics.regex=backup-test.* 5 | key.converter=org.apache.kafka.connect.converters.ByteArrayConverter 6 | value.converter=org.apache.kafka.connect.converters.ByteArrayConverter 7 | header.converter=org.apache.kafka.connect.converters.ByteArrayConverter 8 | target.dir=/tmp/kafka-backup/full_test 9 | # 10MiB 10 | max.segment.size.bytes=10485760 11 | cluster.bootstrap.servers=localhost:9092 -------------------------------------------------------------------------------- /system_test/archive/full_test/connect-backup-source.properties: -------------------------------------------------------------------------------- 1 | name=backup-source 2 | connector.class=de.azapps.kafkabackup.source.BackupSourceConnector 3 | tasks.max=1 4 | topics=backup-test-1partition,backup-test-3partitions,backup-test-10partitions 5 | key.converter=org.apache.kafka.connect.converters.ByteArrayConverter 6 | value.converter=org.apache.kafka.connect.converters.ByteArrayConverter 7 | header.converter=org.apache.kafka.connect.converters.ByteArrayConverter 8 | source.dir=/tmp/kafka-backup/full_test/ 9 | batch.size=1000 10 | cluster.bootstrap.servers=localhost:9092 11 | cluster.key.deserializer=org.apache.kafka.common.serialization.ByteArrayDeserializer 12 | cluster.value.deserializer=org.apache.kafka.common.serialization.ByteArrayDeserializer -------------------------------------------------------------------------------- /system_test/archive/full_test/round_trip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | SCRIPT_DIR=$(dirname $0) 3 | DATADIR=/tmp/kafka-backup/full_test/ 4 | source $SCRIPT_DIR/../utils.sh 5 | NUM_MSG=100 6 | 7 | # Uses confluent cli 8 | # 9 | # * Stop kafka if its running 10 | kafka_stop 11 | # * Delete all data 12 | kafka_delete_data 13 | rm -rf $DATADIR 14 | mkdir -p $DATADIR 15 | # * Start Kafka 16 | kafka_start 17 | # * Configure following topics: 18 | # * backup-test-1partition 19 | # * backup-test-3partitions 20 | create_topic backup-test-1partition 1 21 | create_topic backup-test-3partitions 3 22 | # * Produce 3,00 messages, 10KiB each to each partition 23 | # 24 | # We need to chunk the production of messages as otherwise we cannot 25 | # guarantee that the group consumer will evenly consume the partitions. 26 | produce_messages backup-test-1partition 0 0 $((3 * NUM_MSG)) 27 | # backup-test-3partition 28 | produce_messages backup-test-3partitions 0 0 $((3 * NUM_MSG)) 29 | produce_messages backup-test-3partitions 1 0 $((3 * NUM_MSG)) 30 | produce_messages backup-test-3partitions 2 0 $((3 * NUM_MSG)) 31 | # * Consume all messages with consumer-group `cg-3k` 32 | consume_messages backup-test-1partition cg-3k $((3 * NUM_MSG)) 33 | consume_messages backup-test-3partitions cg-3k $((9 * NUM_MSG)) 34 | kafka_group_describe cg-3k 35 | # * Produce 2 * NUM_MSG messages 36 | produce_messages backup-test-1partition 0 $((3 * NUM_MSG)) $((2 * NUM_MSG)) 37 | # backup-test-3partition 38 | produce_messages backup-test-3partitions 0 $((3 * NUM_MSG)) $((2 * NUM_MSG)) 39 | produce_messages backup-test-3partitions 1 $((3 * NUM_MSG)) $((2 * NUM_MSG)) 40 | produce_messages backup-test-3partitions 2 $((3 * NUM_MSG)) $((2 * NUM_MSG)) 41 | # * Consume all messages with consumer-group `cg-5k` 42 | consume_messages backup-test-1partition cg-5k $((5 * NUM_MSG)) 43 | consume_messages backup-test-3partitions cg-5k $((15 * NUM_MSG)) 44 | # * Produce 100 more messages 45 | produce_messages backup-test-1partition 0 $((5 * NUM_MSG)) $((1 * NUM_MSG)) 46 | # backup-test-3partition 47 | produce_messages backup-test-3partitions 0 $((5 * NUM_MSG)) $((1 * NUM_MSG)) 48 | produce_messages backup-test-3partitions 1 $((5 * NUM_MSG)) $((1 * NUM_MSG)) 49 | produce_messages backup-test-3partitions 2 $((5 * NUM_MSG)) $((1 * NUM_MSG)) 50 | # * Start Kafka Connect distributed 51 | kafka_connect_start 52 | # * Configure backup-sink: 53 | # * segment size: 10MiB 54 | # * topics.regex: backup-test-* 55 | sleep 10 56 | kafka_connect_load_connector backup-sink "$SCRIPT_DIR/connect-backup-sink.properties" 57 | sleep 10 58 | # * Create another topic: 59 | # * backup-test-10partitions 60 | 61 | create_topic backup-test-10partitions 10 62 | # * Produce 1,00 messages as above and consume 500 messages as above 63 | for i in {0..9} ; do 64 | produce_messages backup-test-10partitions $i 0 $((5 * NUM_MSG)) 65 | done 66 | # To force segmentation rolling 67 | produce_messages backup-test-1partition 0 $((6 * NUM_MSG)) $((15 * NUM_MSG)) 68 | # Consume some messages 69 | # * Wait a few minutes 70 | sleep $((60*5)) 71 | # * Stop Kafka 72 | kafka_stop 73 | # * Delete all data 74 | kafka_delete_data 75 | # * Start Kafka 76 | kafka_start 77 | # * Create all 3 topics as above (we are not testing zookeeper backup!) 78 | create_topic backup-test-1partition 1 79 | create_topic backup-test-3partitions 3 80 | create_topic backup-test-10partitions 10 81 | # * Start Kafka Connect distributed 82 | kafka_connect_start 83 | # * Configure backup-source 84 | kafka_connect_load_connector backup-source $SCRIPT_DIR/connect-backup-source.properties 85 | # * Wait for restore to finish 86 | sleep $((60*15)) 87 | kafka_connect_unload_connector backup-source 88 | # * Read all messages and check that they are the same as the ones that were written. 89 | # * Subscribe to Kafka using the consumer groups as above (`cg-5k` and `cg-3k`) and check whether they are at the correct position 90 | 91 | consume_verify_messages backup-test-1partition 0 $((21 * NUM_MSG)) 92 | for i in {0..2} ; do 93 | consume_verify_messages backup-test-3partitions $i $((6 * NUM_MSG)) 94 | done 95 | for i in {0..9} ; do 96 | consume_verify_messages backup-test-10partitions $i $((5 * NUM_MSG)) 97 | done 98 | -------------------------------------------------------------------------------- /system_test/archive/utils.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | kafka_start() { 4 | confluent local start kafka 5 | } 6 | 7 | kafka_stop() { 8 | confluent local stop 9 | } 10 | 11 | kafka_delete_data() { 12 | rm -r /tmp/confluent.* 13 | } 14 | 15 | kafka-topics() { 16 | "$CONFLUENT_HOME/bin/kafka-topics" $@ 17 | } 18 | kafka-console-consumer() { 19 | "$CONFLUENT_HOME/bin/kafka-console-consumer" $@ 20 | } 21 | kafka-consumer-groups() { 22 | "$CONFLUENT_HOME/bin/kafka-consumer-groups" $@ 23 | } 24 | 25 | create_topic() { 26 | TOPIC=$1 27 | PARTITIONS=$2 28 | if [ -z "$PARTITIONS" ]; then 29 | echo "USAGE: $0 [TOPIC] [PARTITIONS]" 30 | return 255 31 | fi 32 | 33 | kafka-topics --create --bootstrap-server localhost:9092 --topic "$TOPIC" --partitions "$PARTITIONS" --replication-factor 1 34 | } 35 | 36 | gen_message() { 37 | PARTITION=$1 38 | NUM=$2 39 | if [ -z "$NUM" ]; then 40 | echo "USAGE: $0 [PARTITION] [NUM] (SIZE)" 41 | return 255 42 | fi 43 | SIZE=$3 44 | if [ -z "$SIZE" ]; then 45 | SIZE=7500 # 10k Bytes base64 46 | fi 47 | VALUE=$(dd if=/dev/urandom bs=$SIZE count=1 2>/dev/null | base64 -w0) 48 | CHECKSUM=$(echo "$VALUE" | md5sum | cut -d' ' -f1) 49 | KEY="part_${PARTITION}_num_${NUM}_${CHECKSUM}" 50 | echo "${KEY},${VALUE}" 51 | } 52 | 53 | gen_messages() { 54 | PARTITION=$1 55 | START_NUM=$2 56 | COUNT=$3 57 | if [ -z "$COUNT" ]; then 58 | echo "USAGE: $0 [PARTITION] [START_NUM] [COUNT] (SIZE)" 59 | return 255 60 | fi 61 | SIZE=$4 62 | for NUM in {$START_NUM..$((START_NUM + COUNT - 1))}; do 63 | if [ "0" -eq "$(((NUM - START_NUM) % 100))" ]; then 64 | echo -e -n "\rProduced $((NUM - START_NUM))/$COUNT messages" >/dev/stderr 65 | fi 66 | gen_message "$PARTITION" $NUM "$SIZE" 67 | done 68 | echo "" 69 | } 70 | 71 | produce_messages() { 72 | TOPIC=$1 73 | PARTITION=$2 74 | START_NUM=$3 75 | COUNT=$4 76 | if [ -z "$COUNT" ]; then 77 | echo "USAGE: $0 [TOPIC] [PARTITION] [START_NUM] [COUNT] (SIZE)" 78 | return 255 79 | fi 80 | SIZE=$5 81 | 82 | gen_messages "$PARTITION" "$START_NUM" "$COUNT" "$SIZE" | kafkacat -P -b localhost:9092 -t "$TOPIC" -p "$PARTITION" -K "," 83 | } 84 | 85 | verify_messages() { 86 | PREVIOUS_NUM="-1" 87 | while read -r MESSAGE; do 88 | if [ "0" -eq "$(((PREVIOUS_NUM + 1) % 10))" ]; then 89 | echo -e -n "\rVerified $((PREVIOUS_NUM + 1)) messages" >/dev/stderr 90 | fi 91 | KEY=$(echo "$MESSAGE" | awk '{print $1}') 92 | KEY_MATCH=$(echo "$KEY" | sed 's/part_\([0-9]*\)_num_\([0-9]*\)_\(.*\)$/\1\t\2\t\3/') 93 | KEY_PARTITION=$(echo "$KEY_MATCH" | awk '{print $1}') 94 | KEY_NUM=$(echo "$KEY_MATCH" | awk '{print $2}') 95 | KEY_CHECKSUM=$(echo "$KEY_MATCH" | awk '{print $3}') 96 | 97 | VALUE=$(echo "$MESSAGE" | awk '{print $2}') 98 | VALUE_CHECKSUM=$(echo "$VALUE" | md5sum | cut -d' ' -f1) 99 | 100 | if [ ! "$KEY_NUM" -eq "$((PREVIOUS_NUM + 1))" ]; then 101 | echo "Missing message. Previous message has num $PREVIOUS_NUM. This message has num $KEY_NUM" 102 | return 255 103 | fi 104 | PREVIOUS_NUM=$KEY_NUM 105 | 106 | if [ "$KEY_CHECKSUM" != "$VALUE_CHECKSUM" ]; then 107 | echo "Partition $KEY_PARTITION, Key $KEY_NUM, KChk $KEY_CHECKSUM, vlength ${#VALUE}, vchk: $VALUE_CHECKSUM" 108 | 109 | echo "Checksum mismatch: Checksum in key ($KEY_CHECKSUM) does not match Checksum of value ($VALUE_CHECKSUM)" 110 | return 255 111 | fi 112 | done 113 | echo -e "\rVerified $((PREVIOUS_NUM + 1)) messages" 114 | } 115 | 116 | consume_verify_messages() { 117 | TOPIC=$1 118 | PARTITION=$2 119 | COUNT=$3 120 | if [ -z "$COUNT" ]; then 121 | echo "USAGE: $0 [TOPIC] [PARTITION] [COUNT]" 122 | return 255 123 | fi 124 | 125 | kafka-console-consumer \ 126 | --bootstrap-server localhost:9092 \ 127 | --from-beginning --property print.key=true \ 128 | --topic "$TOPIC" \ 129 | --max-messages="$COUNT" \ 130 | --partition="$PARTITION" 2>&/dev/null | 131 | verify_messages 132 | } 133 | 134 | consume_messages() { 135 | TOPIC=$1 136 | CONSUMER_GROUP=$2 137 | COUNT=$3 138 | if [ -z "$COUNT" ]; then 139 | echo "USAGE: $0 [TOPIC] [CONSUMER GROUP] [COUNT]" 140 | return 255 141 | fi 142 | 143 | MESSAGES=$(kafka-console-consumer \ 144 | --bootstrap-server localhost:9092 \ 145 | --from-beginning --property print.key=true \ 146 | --topic "$TOPIC" \ 147 | --max-messages "$COUNT" \ 148 | --group "$CONSUMER_GROUP") # 2>/dev/null) 149 | echo "Consumed $(echo "$MESSAGES" | wc -l) messages" 150 | } 151 | 152 | kafka_connect_start() { 153 | confluent local start connect 154 | } 155 | 156 | kafka_connect_load_connector() { 157 | NAME=$1 158 | PROPS="$2" 159 | if [ -z "$PROPS" ]; then 160 | echo "USAGE: $0 [NAME] [PROPS FILE]" 161 | return 255 162 | fi 163 | cp "$PROPS" /tmp/connect.properties 164 | confluent local load "$NAME" -- -d /tmp/connect.properties 165 | rm /tmp/connect.properties 166 | } 167 | 168 | kafka_connect_unload_connector() { 169 | NAME=$1 170 | if [ -z "$NAME" ]; then 171 | echo "USAGE: $0 [NAME]" 172 | return 255 173 | fi 174 | confluent local unload "$NAME" 175 | } 176 | 177 | kafka_group_describe() { 178 | GROUP=$1 179 | if [ -z "$GROUP" ]; then 180 | echo "USAGE: $0 [GROUP]" 181 | return 255 182 | fi 183 | kafka-consumer-groups --bootstrap-server localhost:9092 --describe --group "$GROUP" 184 | } 185 | 186 | burry_backup() { 187 | TARGET_DIR=$1 188 | if [ -z "$TARGET_DIR" ]; then 189 | echo "USAGE: $0 [TARGET_DIR]" 190 | return 255 191 | fi 192 | docker run --network=host -v "$TARGET_DIR":/data azapps/burry -e localhost:2181 -t local 193 | } 194 | 195 | burry_restore() { 196 | SOURCE_DIR=$1 197 | if [ -z "$SOURCE_DIR" ]; then 198 | echo "USAGE: $0 [SOURCE_DIR]" 199 | return 255 200 | fi 201 | SNAPSHOT=$(ls "$DATADIR"/burry | tail -n 1 | sed 's/.zip//') 202 | docker run --network=host -v "$SOURCE_DIR":/data azapps/burry --operation=restore --snapshot="$SNAPSHOT" -e localhost:2181 -t local 203 | } 204 | -------------------------------------------------------------------------------- /system_test/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.3' 2 | services: 3 | # To Backup 4 | to-backup-zk: 5 | build: 6 | context: ./utils 7 | dockerfile: Dockerfile 8 | container_name: to-backup-zk 9 | hostname: to-backup-zk 10 | ports: 11 | - 12181:2181 12 | volumes: 13 | - "./utils/kafka-configs/zookeeper.properties:/etc/zookeeper.properties" 14 | command: "zookeeper-server-start.sh /etc/zookeeper.properties" 15 | networks: 16 | - to-backup 17 | 18 | to-backup-kafka: 19 | build: 20 | context: ./utils 21 | dockerfile: Dockerfile 22 | container_name: to-backup-kafka 23 | hostname: to-backup-kafka 24 | volumes: 25 | - "./utils/kafka-configs/to-backup-kafka.properties:/etc/kafka.properties" 26 | - "./utils/utils.py:/usr/bin/utils.py" 27 | ports: 28 | - 19092:19092 29 | networks: 30 | - to-backup 31 | depends_on: 32 | - to-backup-zk 33 | command: "kafka-server-start.sh /etc/kafka.properties" 34 | 35 | ################################################################################################ 36 | # Restore to 37 | 38 | restore-to-zk: 39 | build: 40 | context: ./utils 41 | dockerfile: Dockerfile 42 | container_name: restore-to-zk-1 43 | hostname: restore-to-zk-1 44 | ports: 45 | - 22181:2181 46 | volumes: 47 | - "./utils/kafka-configs/zookeeper.properties:/etc/zookeeper.properties" 48 | command: "zookeeper-server-start.sh /etc/zookeeper.properties" 49 | networks: 50 | - restore-to 51 | 52 | restore-to-kafka: 53 | build: 54 | context: ./utils 55 | dockerfile: Dockerfile 56 | container_name: restore-to-kafka 57 | hostname: restore-to-kafka 58 | volumes: 59 | - "./utils/kafka-configs/restore-to-kafka.properties:/etc/kafka.properties" 60 | - "./utils/utils.py:/usr/bin/utils.py" 61 | ports: 62 | - 29092:29092 63 | networks: 64 | - restore-to 65 | depends_on: 66 | - restore-to-zk 67 | command: "kafka-server-start.sh /etc/kafka.properties" 68 | 69 | networks: 70 | to-backup: 71 | restore-to: -------------------------------------------------------------------------------- /system_test/utils/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM kafka-backup-dev:latest 2 | 3 | RUN apk add --no-cache make gcc g++ cmake curl pkgconfig perl bsd-compat-headers zlib-dev lz4-dev openssl-dev \ 4 | curl-dev libcurl lz4-libs ca-certificates python3 bash python3-dev 5 | 6 | # Build librdkafka 7 | RUN mkdir /usr/src && cd /usr/src/ && \ 8 | curl https://codeload.github.com/edenhill/librdkafka/tar.gz/master | tar xzf - && \ 9 | cd librdkafka-master && \ 10 | ./configure && \ 11 | make && make install && \ 12 | cd / && rm -rf /usr/src/ 13 | 14 | # Install confluent-kafka python 15 | 16 | RUN pip3 install confluent-kafka==1.3.0 pykafka==2.8.0dev1 17 | COPY utils.py /usr/bin/utils.py -------------------------------------------------------------------------------- /system_test/utils/kafka-configs/restore-to-kafka.properties: -------------------------------------------------------------------------------- 1 | broker.id=0 2 | log.dirs=/tmp/kafka-logs 3 | zookeeper.connect=restore-to-zk:2181 4 | advertised.listeners=INDOCKER://restore-to-kafka:9092,OUTDOCKER://localhost:29092 5 | listeners=INDOCKER://:9092,OUTDOCKER://:29092 6 | listener.security.protocol.map=INDOCKER:PLAINTEXT,OUTDOCKER:PLAINTEXT 7 | inter.broker.listener.name=INDOCKER 8 | offsets.topic.replication.factor=1 9 | offsets.topic.num.partitions=1 10 | auto.create.topics.enable=false -------------------------------------------------------------------------------- /system_test/utils/kafka-configs/to-backup-kafka.properties: -------------------------------------------------------------------------------- 1 | broker.id=0 2 | log.dirs=/tmp/kafka-logs 3 | zookeeper.connect=to-backup-zk:2181 4 | advertised.listeners=INDOCKER://to-backup-kafka:9092,OUTDOCKER://localhost:19092 5 | listeners=INDOCKER://:9092,OUTDOCKER://:19092 6 | listener.security.protocol.map=INDOCKER:PLAINTEXT,OUTDOCKER:PLAINTEXT 7 | inter.broker.listener.name=INDOCKER 8 | offsets.topic.replication.factor=1 9 | offsets.topic.num.partitions=1 10 | auto.create.topics.enable=false -------------------------------------------------------------------------------- /system_test/utils/kafka-configs/zookeeper.properties: -------------------------------------------------------------------------------- 1 | dataDir=/tmp/zookeeper 2 | clientPort=2181 3 | maxClientCnxns=0 -------------------------------------------------------------------------------- /system_test/utils/runutil: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source /root/utils.sh 3 | "$@" -------------------------------------------------------------------------------- /system_test/utils/utils.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | create_topic() { 4 | TOPIC=$1 5 | PARTITIONS=$2 6 | if [ -z "$PARTITIONS" ] || [ -n "$3" ]; then 7 | echo "USAGE: $0 [TOPIC] [PARTITIONS]" 8 | return 255 9 | fi 10 | 11 | kafka-topics --create --bootstrap-server localhost:9092 --topic "$TOPIC" --partitions "$PARTITIONS" --replication-factor 1 12 | } 13 | export -f create_topic 14 | 15 | gen_message() { 16 | PARTITION=$1 17 | NUM=$2 18 | SIZE=$3 19 | if [ -z "$NUM" ] || [ -n "$4" ]; then 20 | echo "USAGE: $0 [PARTITION] [NUM] (SIZE)" 21 | return 255 22 | fi 23 | if [ -z "$SIZE" ]; then 24 | SIZE=7500 # 10k Bytes base64 25 | fi 26 | VALUE=$(dd if=/dev/urandom bs=$SIZE count=1 2>/dev/null | base64 -w0) 27 | CHECKSUM=$(echo "$VALUE" | md5sum | cut -d' ' -f1) 28 | KEY="part_${PARTITION}_num_${NUM}_${CHECKSUM}" 29 | echo "${KEY},${VALUE}" 30 | } 31 | export -f gen_message 32 | 33 | gen_messages() { 34 | PARTITION=$1 35 | START_NUM=$2 36 | COUNT=$3 37 | if [ -z "$COUNT" ] || [ -n "$4" ]; then 38 | echo "USAGE: $0 [PARTITION] [START_NUM] [COUNT] (SIZE)" 39 | return 255 40 | fi 41 | SIZE=$4 42 | for NUM in $(seq $START_NUM $((START_NUM + COUNT - 1))); do 43 | if [ "0" -eq "$(((NUM - START_NUM) % 100))" ]; then 44 | echo -e -n "\rProduced $((NUM - START_NUM))/$COUNT messages" >/dev/stderr 45 | fi 46 | gen_message "$PARTITION" $NUM "$SIZE" 47 | done 48 | echo "" 49 | } 50 | export -f gen_messages 51 | 52 | produce_messages() { 53 | TOPIC=$1 54 | PARTITION=$2 55 | START_NUM=$3 56 | COUNT=$4 57 | SIZE=$5 58 | if [ -z "$COUNT" ] || [ -n "$6" ]; then 59 | echo "USAGE: $0 [TOPIC] [PARTITION] [START_NUM] [COUNT] (SIZE)" 60 | return 255 61 | fi 62 | 63 | gen_messages "$PARTITION" "$START_NUM" "$COUNT" "$SIZE" | kafkacat -P -b localhost:9092 -t "$TOPIC" -p "$PARTITION" -K "," 64 | } 65 | export -f produce_messages 66 | 67 | verify_messages() { 68 | PREVIOUS_NUM="$1" 69 | if [ -z "$PREVIOUS_NUM" ]; then 70 | PREVIOUS_NUM="-1" 71 | fi 72 | while read -r MESSAGE; do 73 | if [ "0" -eq "$(((PREVIOUS_NUM + 1) % 100))" ]; then 74 | echo -e -n "\rVerified $((PREVIOUS_NUM + 1)) messages" >/dev/stderr 75 | fi 76 | KEY=$(echo "$MESSAGE" | awk '{print $1}') 77 | KEY_MATCH=$(echo "$KEY" | sed 's/part_\([0-9]*\)_num_\([0-9]*\)_\(.*\)$/\1\t\2\t\3/') 78 | KEY_PARTITION=$(echo "$KEY_MATCH" | awk '{print $1}') 79 | KEY_NUM=$(echo "$KEY_MATCH" | awk '{print $2}') 80 | KEY_CHECKSUM=$(echo "$KEY_MATCH" | awk '{print $3}') 81 | 82 | VALUE=$(echo "$MESSAGE" | awk '{print $2}') 83 | VALUE_CHECKSUM=$(echo "$VALUE" | md5sum | cut -d' ' -f1) 84 | 85 | if [ ! "$KEY_NUM" -eq "$((PREVIOUS_NUM + 1))" ]; then 86 | echo "Missing message. Previous message has num $PREVIOUS_NUM. This message has num $KEY_NUM" 87 | return 255 88 | fi 89 | PREVIOUS_NUM=$KEY_NUM 90 | 91 | if [ "$KEY_CHECKSUM" != "$VALUE_CHECKSUM" ]; then 92 | echo "Partition $KEY_PARTITION, Key $KEY_NUM, KChk $KEY_CHECKSUM, vlength ${#VALUE}, vchk: $VALUE_CHECKSUM" 93 | 94 | echo "Checksum mismatch: Checksum in key ($KEY_CHECKSUM) does not match Checksum of value ($VALUE_CHECKSUM)" 95 | return 255 96 | fi 97 | done 98 | echo -e "\rVerified $((PREVIOUS_NUM + 1)) messages" 99 | } 100 | export -f verify_messages 101 | 102 | consume_verify_messages() { 103 | TOPIC=$1 104 | PARTITION=$2 105 | START_NUM=$3 106 | COUNT=$4 107 | if [ -z "$COUNT" ]; then 108 | COUNT="$START_NUM" 109 | START_NUM="0" 110 | fi 111 | 112 | if [ -z "$COUNT" ] || [ -n "$5" ]; then 113 | echo "USAGE: $0 [TOPIC] [PARTITION] ([START_NUM]) [COUNT]" 114 | return 255 115 | fi 116 | 117 | kafka-console-consumer \ 118 | --bootstrap-server localhost:9092 \ 119 | --from-beginning --property print.key=true \ 120 | --topic "$TOPIC" \ 121 | --max-messages="$COUNT" \ 122 | --partition="$PARTITION" 2>/dev/null | 123 | verify_messages $((START_NUM - 1)) 124 | } 125 | export -f consume_verify_messages 126 | 127 | consume_messages() { 128 | TOPIC=$1 129 | CONSUMER_GROUP=$2 130 | COUNT=$3 131 | if [ -z "$COUNT" ] || [ -n "$4" ]; then 132 | echo "USAGE: $0 [TOPIC] [CONSUMER GROUP] [COUNT]" 133 | return 255 134 | fi 135 | 136 | MESSAGES=$(kafka-console-consumer \ 137 | --bootstrap-server localhost:9092 \ 138 | --from-beginning --property print.key=true \ 139 | --topic "$TOPIC" \ 140 | --max-messages "$COUNT" \ 141 | --group "$CONSUMER_GROUP") # 2>/dev/null) 142 | echo "Consumed $(echo "$MESSAGES" | wc -l) messages" 143 | } 144 | export -f consume_messages 145 | 146 | kafka_group_describe() { 147 | GROUP=$1 148 | if [ -z "$GROUP" ] || [ -n "$2" ]; then 149 | echo "USAGE: $0 [GROUP]" 150 | return 255 151 | fi 152 | kafka-consumer-groups --bootstrap-server localhost:9092 --describe --group "$GROUP" 153 | } 154 | export -f kafka_group_describe 155 | 156 | burry_backup() { 157 | TARGET_DIR=$1 158 | if [ -z "$TARGET_DIR" ] || [ -n "$2" ]; then 159 | echo "USAGE: $0 [TARGET_DIR]" 160 | return 255 161 | fi 162 | docker run --network=host -v "$TARGET_DIR":/data azapps/burry -e localhost:2181 -t local 163 | } 164 | export -f burry_backup 165 | 166 | burry_restore() { 167 | SOURCE_DIR=$1 168 | if [ -z "$SOURCE_DIR" ] || [ -n "$2" ]; then 169 | echo "USAGE: $0 [SOURCE_DIR]" 170 | return 255 171 | fi 172 | SNAPSHOT=$(ls "$DATADIR"/burry | tail -n 1 | sed 's/.zip//') 173 | docker run --network=host -v "$SOURCE_DIR":/data azapps/burry --operation=restore --snapshot="$SNAPSHOT" -e localhost:2181 -t local 174 | } 175 | export -f burry_restore 176 | --------------------------------------------------------------------------------