├── .github
    └── workflows
    │   └── workflow.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── bin
    ├── backup-standalone.sh
    ├── completed_segments.py
    ├── partition-index.sh
    ├── restore-standalone.sh
    ├── segment-index.sh
    └── segment.sh
├── build.gradle
├── docs
    ├── Blogposts
    │   └── 2019-06_Introducing_Kafka_Backup.md
    ├── Comparing_Kafka_Backup_Solutions.md
    ├── FAQ.md
    ├── Kafka_Backup_Architecture.md
    ├── Tooling.md
    └── Usage.md
├── gradle
    └── wrapper
    │   ├── gradle-wrapper.jar
    │   └── gradle-wrapper.properties
├── gradlew
├── gradlew.bat
├── settings.gradle
├── src
    ├── main
    │   └── java
    │   │   └── de
    │   │       └── azapps
    │   │           └── kafkabackup
    │   │               ├── cli
    │   │                   ├── PartitionIndexCLI.java
    │   │                   ├── SegmentCLI.java
    │   │                   ├── SegmentIndexCLI.java
    │   │                   └── formatters
    │   │                   │   ├── Base64Formatter.java
    │   │                   │   ├── ByteFormatter.java
    │   │                   │   ├── DetailedRecordFormatter.java
    │   │                   │   ├── ListRecordFormatter.java
    │   │                   │   ├── RawFormatter.java
    │   │                   │   ├── RecordFormatter.java
    │   │                   │   └── UTF8Formatter.java
    │   │               ├── common
    │   │                   ├── BackupConfig.java
    │   │                   ├── offset
    │   │                   │   ├── EndOffsetReader.java
    │   │                   │   ├── OffsetSink.java
    │   │                   │   ├── OffsetSource.java
    │   │                   │   └── OffsetUtils.java
    │   │                   ├── partition
    │   │                   │   ├── PartitionIndex.java
    │   │                   │   ├── PartitionIndexEntry.java
    │   │                   │   ├── PartitionIndexRestore.java
    │   │                   │   ├── PartitionReader.java
    │   │                   │   ├── PartitionUtils.java
    │   │                   │   └── PartitionWriter.java
    │   │                   ├── record
    │   │                   │   ├── Record.java
    │   │                   │   └── RecordSerde.java
    │   │                   └── segment
    │   │                   │   ├── SegmentIndex.java
    │   │                   │   ├── SegmentIndexEntry.java
    │   │                   │   ├── SegmentIndexRestore.java
    │   │                   │   ├── SegmentReader.java
    │   │                   │   ├── SegmentUtils.java
    │   │                   │   ├── SegmentWriter.java
    │   │                   │   └── UnverifiedSegmentReader.java
    │   │               ├── sink
    │   │                   ├── BackupSinkConfig.java
    │   │                   ├── BackupSinkConnector.java
    │   │                   └── BackupSinkTask.java
    │   │               └── source
    │   │                   ├── BackupSourceConfig.java
    │   │                   ├── BackupSourceConnector.java
    │   │                   └── BackupSourceTask.java
    └── test
    │   ├── assets
    │       └── v1
    │       │   ├── partitionindex
    │       │       └── testIndex
    │       │   ├── records
    │       │       ├── empty_record
    │       │       ├── header_record
    │       │       ├── null_record
    │       │       └── simple_record
    │       │   ├── segmentindex
    │       │       └── testIndex
    │       │   └── segments
    │       │       ├── segment_partition_000_from_offset_0000000000_index
    │       │       └── segment_partition_000_from_offset_0000000000_records
    │   └── java
    │       └── de
    │           └── azapps
    │               └── kafkabackup
    │                   ├── common
    │                       ├── TestUtils.java
    │                       ├── partition
    │                       │   ├── PartitionIndexTest.java
    │                       │   └── PartitionSerdeTest.java
    │                       ├── record
    │                       │   ├── RecordSerdeTest.java
    │                       │   └── RecordTest.java
    │                       └── segment
    │                       │   ├── SegmentIndexTest.java
    │                       │   └── SegmentSerdeTest.java
    │                   └── sink
    │                       ├── BackupSinkTaskTest.java
    │                       ├── MockEndOffsetReader.java
    │                       ├── MockOffsetSink.java
    │                       └── MockSinkTaskContext.java
└── system_test
    ├── .gitignore
    ├── 01_simple_roundtrip_test.yaml
    ├── 02_full_test.yaml
    ├── 03_start_n_stop.yaml
    ├── 04_delete_old_segments.yaml
    ├── README.md
    ├── archive
        ├── 001_simple_1partition_test
        │   ├── connect-backup-sink.properties
        │   ├── connect-backup-source.properties
        │   └── test.sh
        ├── backup_with_burry
        │   ├── connect-backup-sink.properties
        │   ├── connect-backup-source.properties
        │   └── test.sh
        ├── full_test
        │   ├── connect-backup-sink.properties
        │   ├── connect-backup-source.properties
        │   └── round_trip.sh
        └── utils.sh
    ├── docker-compose.yml
    └── utils
        ├── Dockerfile
        ├── kafka-configs
            ├── restore-to-kafka.properties
            ├── to-backup-kafka.properties
            └── zookeeper.properties
        ├── runutil
        ├── utils.py
        └── utils.sh


/.github/workflows/workflow.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   buildAndTest:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - uses: actions/checkout@v1
10 |       # Build
11 |       - name: Build Image
12 |         run: docker build . --tag kafka-backup-dev:latest
13 |       # Get the jar file and upload it as an artifact
14 |       - name: Prepare upload kafka-backup.jar
15 |         run: |
16 |           id=$(docker create kafka-backup-dev:latest)
17 |           docker cp $id:/opt/kafka-backup/kafka-backup.jar .
18 |           docker rm -v $id
19 |       - uses: actions/upload-artifact@v1
20 |         with:
21 |           name: kafka-backup-jar
22 |           path: ./kafka-backup.jar
23 |       # Integration Tests
24 |       - name: setup env
25 |         run: |
26 |           echo "::set-env name=GOPATH::$(go env GOPATH)"
27 |           echo "::add-path::$(go env GOPATH)/bin"
28 |       - uses: actions/setup-go@v2-beta
29 |         with:
30 |           go-version: '^1.13.1'
31 |       # Prepare Tests
32 |       - name: Setup Coyote
33 |         run: go get github.com/landoop/coyote
34 |       - name: Setup Tests
35 |         working-directory: ./system_test/
36 |         run: |
37 |           docker-compose pull
38 |           docker-compose build
39 |           mkdir -p /tmp/kafka-backup out
40 |           chmod 777 /tmp/kafka-backup
41 |       # Run system tests
42 |       - name: Simple Roundtrip Test
43 |         working-directory: ./system_test/
44 |         run: coyote -c 01_simple_roundtrip_test.yaml --out out/01_simple_roundtrip_test.html
45 |       - name: Full Test
46 |         working-directory: ./system_test/
47 |         run: coyote -c 02_full_test.yaml --out out/02_full_test.html
48 |       - name: Start and Stop Kafka Connect
49 |         working-directory: ./system_test/
50 |         run: coyote -c 03_start_n_stop.yaml --out out/03_start_n_stop.html
51 |       - name: Delete old Segments before restore
52 |         working-directory: ./system_test/
53 |         run: coyote -c 04_delete_old_segments.yaml --out out/04_delete_old_segments.html
54 |       # Upload Test Results
55 |       - name: Upload Test Results
56 |         uses: actions/upload-artifact@v1
57 |         with:
58 |           name: Test Results
59 |           path: ./system_test/out/
60 | 
61 | # Create a Release
62 |   create-release:
63 |     needs: buildAndTest
64 |     if: startsWith(github.ref, 'refs/tags/')
65 |     runs-on: ubuntu-latest
66 |     steps:
67 |       - uses: actions/checkout@v1
68 |       - uses: actions/download-artifact@v1
69 |         with:
70 |           name: kafka-backup-jar
71 |       - name: Prepare Artifact
72 |         run: |
73 |           mv kafka-backup-jar/kafka-backup.jar bin/kafka-backup.jar
74 |           mv bin kafka-backup
75 |           tar cfz kafka-backup.tar.gz kafka-backup
76 |       - name: Create Release
77 |         id: create_release
78 |         uses: actions/create-release@v1
79 |         env:
80 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
81 |         with:
82 |           tag_name: ${{ github.ref }}
83 |           release_name: Release ${{ github.ref }}
84 |           draft: true
85 |           prerelease: false
86 |       - name: Upload Release Asset
87 |         id: upload-release-asset
88 |         uses: actions/upload-release-asset@v1
89 |         env:
90 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
91 |         with:
92 |           upload_url: ${{ steps.create_release.outputs.upload_url }}
93 |           asset_path: ./kafka-backup.tar.gz
94 |           asset_name: kafka-backup.tar.gz
95 |           asset_content_type: application/gzip


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | local-*
3 | .idea
4 | *.iml
5 | .gradle
6 | *\#*
7 | *~
8 | bin/*.jar


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Build Kafka Backup
 2 | FROM gradle:6.3.0-jdk8 AS builder
 3 | WORKDIR /opt/kafka-backup
 4 | COPY . /opt/kafka-backup
 5 | RUN gradle --no-daemon check test shadowJar
 6 | 
 7 | # Build Docker Image with Kafka Backup Jar
 8 | FROM openjdk:8u212-jre-alpine
 9 | 
10 | ARG kafka_version=2.5.0
11 | ARG scala_version=2.12
12 | ARG glibc_version=2.31-r0
13 | 
14 | ENV KAFKA_VERSION=$kafka_version \
15 |     SCALA_VERSION=$scala_version \
16 |     KAFKA_HOME=/opt/kafka \
17 |     GLIBC_VERSION=$glibc_version
18 | 
19 | ENV PATH=${PATH}:${KAFKA_HOME}/bin
20 | 
21 | RUN apk add --no-cache bash curl \
22 |  && wget "https://archive.apache.org/dist/kafka/${KAFKA_VERSION}/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz" -O "/tmp/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz" \
23 |  && tar xfz /tmp/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz -C /opt \
24 |  && rm /tmp/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz \
25 |  && ln -s /opt/kafka_${SCALA_VERSION}-${KAFKA_VERSION} ${KAFKA_HOME} \
26 |  && wget https://github.com/sgerrand/alpine-pkg-glibc/releases/download/${GLIBC_VERSION}/glibc-${GLIBC_VERSION}.apk \
27 |  && apk add --no-cache --allow-untrusted glibc-${GLIBC_VERSION}.apk \
28 |  && rm glibc-${GLIBC_VERSION}.apk
29 | 
30 | COPY ./bin /opt/kafka-backup/
31 | COPY --from=builder /opt/kafka-backup/build/libs/kafka-backup.jar /opt/kafka-backup/
32 | 
33 | ENV PATH="${KAFKA_HOME}/bin:/opt/kafka-backup/:${PATH}"
34 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Kafka Backup
  2 | 
  3 | > **Update:** I am no longer maintaining the Kafka Backup project. As an alternative, I recommend [Kannika](https://kannika.io/?utm_source=github_anatoly), a commercial backup solution developed by my friends at [Cymo](https://cymo.eu/?utm_source=github_anatoly) (and don't forget to say hello from Anatoly 😊).
  4 | > [Disclosure: I am a business partner of Cymo and may receive compensation for referrals to Kannika]
  5 | > 
  6 | > Please contact me if you want to continue maintaining this project.
  7 | 
  8 | Kafka Backup is a tool to back up and restore your Kafka data
  9 | including all (configurable) topic data and especially also consumer
 10 | group offsets. To the best of our knowledge, Kafka Backup is the only
 11 | viable solution to take a cold backup of your Kafka data and restore
 12 | it correctly.
 13 | 
 14 | It is designed as two connectors for Kafka
 15 | Connect: A sink connector (backing data up) and a source connector
 16 | (restoring data).
 17 | 
 18 | Currently `kafka-backup` supports backup and restore to/from the file
 19 | system.
 20 | 
 21 | ## Features
 22 | 
 23 | * Backup and restore topic data
 24 | * Backup and restore consumer-group offsets
 25 | * Currently supports only backup/restore to/from local file system
 26 | * Released as a jar file or packaged as a Docker image
 27 | 
 28 | # Getting Started
 29 | 
 30 | **Option A) Download binary**
 31 | 
 32 | Download the latest release [from GitHub](https://github.com/itadventurer/kafka-backup/releases) and unzip it.
 33 | 
 34 | **Option B) Use Docker image**
 35 | 
 36 | Pull the latest Docker image from [Docker Hub](https://hub.docker.com/repository/docker/itadventurer/kafka-backup/tags)
 37 | 
 38 | **DO NOT USE THE `latest` STAGE IN PRODUCTION**. `latest` are automatic builds of the master branch. Be careful!
 39 | 
 40 | **Option C) Build from source**
 41 | 
 42 | Just run `./gradlew shadowJar` in the root directory of Kafka Backup. You will find the CLI tools in the `bin` directory.
 43 | 
 44 | ## Start Kafka Backup
 45 | 
 46 | ```sh
 47 | backup-standalone.sh --bootstrap-server localhost:9092 \
 48 |     --target-dir /path/to/backup/dir --topics 'topic1,topic2'
 49 | ```
 50 | 
 51 | In Docker:
 52 | ```sh
 53 | docker run -d -v /path/to/backup-dir/:/kafka-backup/ --rm \
 54 |     kafka-backup:[LATEST_TAG] \
 55 |     backup-standalone.sh --bootstrap-server kafka:9092 \
 56 |     --target-dir /kafka-backup/ --topics 'topic1,topic2'
 57 | ```
 58 | 
 59 | You can pass options via CLI arguments or using environment variables:
 60 | 
 61 | | Parameter                                   | Type/required? | Description                                                                                                          |
 62 | |---------------------------------------------|----------------|----------------------------------------------------------------------------------------------------------------------|
 63 | | `--bootstrap-server`<br/>`BOOTSTRAP_SERVER` | [REQUIRED]     | The Kafka server to connect to                                                                                       |
 64 | | `--target-dir`<br/>`TARGET_DIR`             | [REQUIRED]     | Directory where the backup files should be stored                                                                    |
 65 | | `--topics`<br/>`TOPICS`                     | <T1,T2,…>      | List of topics to be backed up. You must provide either `--topics` or `--topics-regex`. Not both                     |
 66 | | `--topics-regex`<br/>`TOPICS_REGEX`         |                | Regex of topics to be backed up. You must provide either `--topics` or `--topics-regex`. Not both                    |
 67 | | `--max-segment-size`<br/>`MAX_SEGMENT_SIZE` |                | Size of the backup segments in bytes DEFAULT: 1GiB                                                                   |
 68 | | `--command-config`<br/>`COMMAND_CONFIG`     | <FILE>         | Property file containing configs to be passed to Admin Client. Only useful if you have additional connection options |
 69 | | `--debug`<br/>`DEBUG=y`                     |                | Print Debug information                                                                                              |
 70 | | `--help`                                    |                | Prints this message                                                                                                  |
 71 | 
 72 | **Kafka Backup does not stop!** The Backup process is a continous background job that runs forever as Kafka models data as a stream without end. See [Issue 52: Support point-in-time snapshots](https://github.com/itadventurer/kafka-backup/issues/52) for more information.
 73 | 
 74 | ## Restore data
 75 | 
 76 | ```sh
 77 | restore-standalone.sh --bootstrap-server localhost:9092 \
 78 |     --target-dir /path/to/backup/dir --topics 'topic1,topic2'
 79 | ```
 80 | 
 81 | In Docker:
 82 | ```sh
 83 | docker run -v /path/to/backup/dir:/kafka-backup/ --rm \
 84 |     kafka-backup:[LATEST_TAG]
 85 |     restore-standalone.sh --bootstrap-server kafka:9092 \
 86 |     --source-dir /kafka-backup/ --topics 'topic1,topic2'
 87 | ```
 88 | 
 89 | You can pass options via CLI arguments or using environment variables:
 90 | 
 91 | 
 92 | | Parameter                                   | Type/required? | Description                                                                                                                                                                                                                |
 93 | |---------------------------------------------|----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 94 | | `--bootstrap-server`<br/>`BOOTSTRAP_SERVER` | [REQUIRED]     | The Kafka server to connect to                                                                                                                                                                                             |
 95 | | `--source-dir`<br/>`SOURCE_DIR`             | [REQUIRED]     | Directory where the backup files are found                                                                                                                                                                                 |
 96 | | `--topics`<br/>`TOPICS`                     | [REQUIRED]     | List of topics to restore                                                                                                                                                                                                  |
 97 | | `--batch-size`<br/>`BATCH_SIZE`             |                | Batch size (Default: 1MiB)                                                                                                                                                                                                 |
 98 | | `--offset-file`<br/>`OFFSET_FILE`           |                | File where to store offsets. THIS FILE IS CRUCIAL FOR A CORRECT RESTORATION PROCESS IF YOU LOSE IT YOU NEED TO START THE BACKUP FROM SCRATCH. OTHERWISE YOU WILL HAVE DUPLICATE DATA Default: [source-dir]/restore.offsets |
 99 | | `--command-config`<br/>`COMMAND_CONFIG`     | <FILE>         | Property file containing configs to be passed to Admin Client. Only useful if you have additional connection options                                                                                                       |
100 | | `--help`<br/>`HELP`                         |                | Prints this message                                                                                                                                                                                                        |
101 | | `--debug`<br/>`DEBUG`                       |                | Print Debug information (if using the environment variable, set it to 'y')                                                                                                                                                 |
102 | 
103 | ## More Documentation
104 | 
105 | * [FAQ](./docs/FAQ.md)
106 | * [High Level
107 |   Introduction](./docs/Blogposts/2019-06_Introducing_Kafka_Backup.md)
108 | * [Comparing Kafka Backup
109 |   Solutions](./docs/Comparing_Kafka_Backup_Solutions.md)
110 | * [Architecture](./docs/Kafka_Backup_Architecture.md)
111 | * [Tooling](./docs/Tooling.md)
112 | 
113 | ## License
114 | 
115 | This project is licensed under the Apache License Version 2.0 (see
116 | [LICENSE](./LICENSE)).
117 | 


--------------------------------------------------------------------------------
/bin/completed_segments.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | Find (and delete) kafka-backup segment files
  4 | """
  5 | 
  6 | import os
  7 | import re
  8 | import sys
  9 | import argparse
 10 | 
 11 | 
 12 | def filename_pattern():
 13 |     """ Kafka-backup segment/index filename pattern.
 14 | 
 15 |     Using function as constant string.
 16 |     This pattern is used to make filename-matching regex below too.
 17 |     """
 18 |     return 'segment_partition_%s_from_offset_%s_%s'
 19 | 
 20 | 
 21 | def oneplus(string):
 22 |     """ Check argument is >= 1 """
 23 |     value = int(string)
 24 |     if value < 1:
 25 |         raise argparse.ArgumentTypeError("cannot be less than 1")
 26 |     return value
 27 | 
 28 | 
 29 | def parse_args():
 30 |     """ Parse cmdline args """
 31 |     parser = argparse.ArgumentParser(
 32 |         description='Find (and delete) kafka-backup segment files'
 33 |     )
 34 |     parser.add_argument(
 35 |         '-d', '--delete',
 36 |         help='delete segment files',
 37 |         action='store_true',
 38 |         default=False,
 39 |     )
 40 |     parser.add_argument(
 41 |         '-k', '--keep',
 42 |         help='keep N segment files (one by default)',
 43 |         type=oneplus,
 44 |         default=1,
 45 |         metavar='N',
 46 |     )
 47 |     parser.add_argument(
 48 |         '-l', '--list',
 49 |         help='list segment files',
 50 |         action='store_true',
 51 |         default=False,
 52 |     )
 53 |     parser.add_argument(
 54 |         'target_dir',
 55 |         help='backup directory (target.dir)',
 56 |         default=os.getcwd(),
 57 |     )
 58 |     return parser.parse_args()
 59 | 
 60 | 
 61 | def collect_segments(target_dir):
 62 |     """ Collect segment partitions & offsets per directory (topic)
 63 | 
 64 |     Args:
 65 |         target_dir (str): Kafka-backup target.dir
 66 | 
 67 |     Returns:
 68 |         A dict mapping keys to topics. Each value is dict of partitions with
 69 |         list of offsets in it. For example
 70 | 
 71 |         topic1: {
 72 |             000: [ 0000000000 ],
 73 |         },
 74 |         topic2: {
 75 |             000: [ 0000000021 ],
 76 |             001: [ 0000000391 ],
 77 |             002: [ 0000001291, 0000018423 ]
 78 |         },
 79 |         ...
 80 | 
 81 |     """
 82 | 
 83 |     # Make regex from pattern
 84 |     # Implemented this way to keep file pattern just in single place (filename_pattern() above)
 85 |     fregex = re.compile(r"^%s$" % (filename_pattern() % (r'(\d{3})', r'(\d{10})', 'records')))
 86 |     res = {}
 87 |     # Traverse dirtree to collect offsets in partitions per topic
 88 |     for tdir, _, files in os.walk(target_dir):
 89 |         if tdir == target_dir:
 90 |             continue
 91 |         res[tdir] = {}
 92 |         for segfile in sorted(files):
 93 |             match = fregex.match(segfile)
 94 |             if match:
 95 |                 (partition, offset) = match.groups()
 96 |                 if partition not in res[tdir]:
 97 |                     res[tdir][partition] = []
 98 |                 res[tdir][partition].append(offset)
 99 |     return res
100 | 
101 | 
102 | def process_segment(tdir, partition, offset, do_delete, do_list):
103 |     """ Process segment
104 | 
105 |     Args:
106 |         tdir (str): topic directory
107 |         partition (str): topic partition
108 |         offset (str): starting segment offset
109 |         do_delete (bool): delete segment files?
110 |         do_list (boot): list segment files?
111 |     """
112 |     index_file = filename_pattern() % (partition, offset, 'index')
113 |     records_file = filename_pattern() % (partition, offset, 'records')
114 | 
115 |     if do_delete or do_list:
116 |         index_path = os.path.join(tdir, index_file)
117 |         records_path = os.path.join(tdir, records_file)
118 |         if do_list:
119 |             print(index_path)
120 |             print(records_path)
121 |         if do_delete:
122 |             os.unlink(index_path)
123 |             os.unlink(records_path)
124 |     else:
125 |         print("Topic %s, First offset %s - Index file: %s Records File: %s"
126 |               % (os.path.basename(tdir), offset, index_file, records_file))
127 | 
128 | 
129 | def main():
130 |     """ int main(int argc, char **argv) """
131 |     args = parse_args()
132 | 
133 |     segs = collect_segments(args.target_dir)
134 | 
135 |     for tdir, seg_data in segs.items():
136 |         for partition, offsets in seg_data.items():
137 |             for i, offset in enumerate(offsets):
138 |                 # Perform action requested (list/delete/default) on the file
139 |                 # Skip (keep) last `args.keep` files (1 by default)
140 |                 # That one skipped by default file is usually incompleted so must be kept anyway
141 |                 if i < len(offsets) - args.keep:
142 |                     process_segment(tdir, partition, offset, args.delete, args.list)
143 |     return 0
144 | 
145 | 
146 | if __name__ == '__main__':
147 |     sys.exit(main())
148 | 


--------------------------------------------------------------------------------
/bin/partition-index.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | java -cp "$( dirname "${BASH_SOURCE[0]}" )/kafka-backup.jar" de.azapps.kafkabackup.cli.PartitionIndexCLI "$@"


--------------------------------------------------------------------------------
/bin/segment-index.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | java -cp "$( dirname "${BASH_SOURCE[0]}" )/kafka-backup.jar" de.azapps.kafkabackup.cli.SegmentIndexCLI "$@"


--------------------------------------------------------------------------------
/bin/segment.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | java -cp "$( dirname "${BASH_SOURCE[0]}" )/kafka-backup.jar" de.azapps.kafkabackup.cli.SegmentCLI "$@"


--------------------------------------------------------------------------------
/build.gradle:
--------------------------------------------------------------------------------
 1 | buildscript {
 2 |     repositories {
 3 |         jcenter()
 4 |         maven {
 5 |             url "https://plugins.gradle.org/m2/"
 6 |         }
 7 |     }
 8 |     dependencies {
 9 |         classpath 'com.github.jengelman.gradle.plugins:shadow:4.0.2'
10 |         classpath "io.codearte.gradle.nexus:gradle-nexus-staging-plugin:0.20.0"
11 |         classpath "gradle.plugin.com.github.spotbugs.snom:spotbugs-gradle-plugin:4.3.0"
12 |     }
13 | }
14 | 
15 | apply plugin: 'java'
16 | apply plugin: 'idea'
17 | apply plugin: "com.github.spotbugs"
18 | 
19 | description = "kafka-backup"
20 | group = 'de.azapps.kafkabackup'
21 | 
22 | ext {
23 |     pomHumanName = 'Kafka Backup'
24 |     pomDesc = 'Kafka Backup Connector'
25 | }
26 | 
27 | allprojects {
28 |     sourceCompatibility = 1.8
29 |     targetCompatibility = 1.8
30 | }
31 | 
32 | repositories {
33 |     mavenCentral()
34 | }
35 | 
36 | dependencies {
37 |     implementation "org.slf4j:slf4j-api:1.7.26"
38 |     implementation group: 'org.apache.kafka', name: 'connect-api', version: '2.4.0'
39 |     implementation group: 'org.apache.kafka', name: 'kafka-clients', version: '2.4.0'
40 |     implementation group: 'com.fasterxml.jackson.core', name: 'jackson-core', version: '2.10.1'
41 |     implementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: '2.10.1'
42 |     implementation group: 'net.sf.jopt-simple', name: 'jopt-simple', version: '6.0-alpha-3'
43 |     implementation 'com.github.spotbugs:spotbugs-annotations:4.0.1'
44 |     testImplementation('org.junit.jupiter:junit-jupiter:5.6.0')
45 | }
46 | 
47 | apply plugin: "com.github.johnrengelman.shadow"
48 | 
49 | shadowJar {
50 |     dependencies {
51 |     }
52 | }
53 | 
54 | task javadocJar(type: Jar, dependsOn: javadoc) {
55 |     classifier = 'javadoc'
56 |     from subprojects*.tasks.javadoc.destinationDir
57 | }
58 | 
59 | task sourcesJar(type: Jar) {
60 |     from subprojects*.sourceSets.main.allSource
61 |     classifier = 'sources'
62 | }
63 | 
64 | artifacts {
65 |     archives javadocJar, sourcesJar
66 | }
67 | test {
68 |     useJUnitPlatform()
69 | }


--------------------------------------------------------------------------------
/docs/Comparing_Kafka_Backup_Solutions.md:
--------------------------------------------------------------------------------
  1 | # Comparing Kafka Backup Solutions
  2 | 
  3 | 
  4 | > **Update:** I am no longer maintaining the Kafka Backup project. As an alternative, I recommend [Kannika](https://kannika.io/?utm_source=github_anatoly), a commercial backup solution developed by my friends at [Cymo](https://cymo.eu/?utm_source=github_anatoly) (and don't forget to say hello from Anatoly 😊).
  5 | > [Disclosure: I am a business partner of Cymo and may receive compensation for referrals to Kannika]
  6 | > 
  7 | > Please contact me if you want to continue maintaining this project.
  8 | 
  9 | Basically there are three other ways to backup and restore data
 10 | from/to Kafka:
 11 | 
 12 | ## File System Snapshots
 13 | 
 14 | This was the easiest and most reliable way to backup data and consumer
 15 | offsets from Kafka. The procedure basically shuts down one broker
 16 | after another and performs a file system snapshot which is stored on
 17 | another (cold) disk.
 18 | 
 19 | **Backup Procedure:**
 20 | 
 21 | * Repeat for each Kafka broker:
 22 |   1. Shut down the broker
 23 |   2. Take a snapshot of the Filesystem (optional)
 24 |   3. Copy the snapshot (or simply the files) to the backup storage
 25 |   4. Turn on the broker and wait until all partitions are in sync
 26 | 
 27 | **Restore Procedure:**
 28 | 
 29 | * Restore the snapshot for each broker
 30 | * Boot the brokers
 31 | 
 32 | **Advantages:**
 33 | 
 34 | * Uses native OS tools
 35 | * As this procedure needs to be done very often, the fear of shutting
 36 |   down a broker is minimized (especially for a team and environment
 37 |   with few Kafka expertise)
 38 | * Offsets are backed up and restored correctly
 39 | * Internal topics are backed up and restored correctly
 40 | * Compacted messages are deleted too
 41 | * Messages older than the retention time are deleted too
 42 | * Uses cold storage
 43 | 
 44 | **Disadvantages:**
 45 | 
 46 | * Each message is backed up `replication factor`-times. Even if it
 47 |   enough to store it without replication.
 48 | * Reduced availability as every broker needs to be turned of for a
 49 |   backup
 50 | * Incremental Backups are harder to achieve (e.g. due to partition
 51 |   rebalancing)
 52 | * **POTENTIAL DATA LOSS**: If the backup is performed during a
 53 |   partition rebalance (very likely when the backup takes a loooong
 54 |   time) the backup could miss a whole partition due to bad timing.
 55 | 
 56 | 
 57 | ## Using Mirror Maker 2 to backup data to another Cluster
 58 | 
 59 | The traditional Mirror Maker has many issues as discussed in
 60 | [KIP-382](https://cwiki.apache.org/confluence/display/KAFKA/KIP-382%3A+MirrorMaker+2.0). Mirror
 61 | Maker 2 approaches many of them and can be used to back up data from
 62 | one cluster to another.
 63 | 
 64 | Mirror Maker 2 is also (as `kafka-backup`) based on Kafka connect and
 65 | copies consumer offsets too.
 66 | 
 67 | **Backup Procedure A+B (normal setup):**
 68 | 
 69 | * Set up the MM2 Connector that copies the data from the topic
 70 |   `[topic]` on the source cluster to the topic
 71 |   `[source-cluster-name].[topic]` on the cluster name.
 72 | * Mirror Maker 2 ensures that the messages are copied continously
 73 |   offsets are also copied to a separate topic
 74 | 
 75 | **Backup Procedure C (for consistent Snapshots):**
 76 | 
 77 | * Set up the sink (backup) cluster with one broker
 78 | * Set up the topics on the sink cluster with a replication factor of
 79 |   `1`
 80 | * Set up MM2 to copy data from the source cluster to the sink cluster
 81 | * Use a cronjob to shut down the sink cluster (with one broker)
 82 |   regularly and take a snapshot of the file system and store them on
 83 |   cold storage.
 84 | 
 85 | **Restore Procedure A (Use other cluster):**
 86 | 
 87 | * Use the offset sync topic to configure the consumer groups to
 88 |   consume from the correct offset.
 89 | * Setup the consumers to use the other cluster. Throw away the old
 90 |   one.
 91 | * Set up the clients to produce and consume from the new topics in the
 92 |   new cluster
 93 | * Set up a new Backup Cluster
 94 | 
 95 | **Restore Procedure B (Mirror data back):**
 96 | 
 97 | * Create a new Kafka Cluster
 98 | * Set up Mirror Maker 2 to copy the data to the new cluster
 99 | * Continue with procedure A
100 | 
101 | **Restore Procedure C (Mirror + Snapshot):**
102 | 
103 | * Use Procedure B or restore a new cluster from the file system
104 |   snapshots
105 | * Add more nodes accordingly
106 | * Increase the replication factor to match the requirements
107 | * Rebalance the partitions if needed
108 | * Continue with procedure A
109 | 
110 | **Advantages:**
111 | 
112 | * Support for warm cluster fail-over (active-active, active-passive)
113 | * Support for more advanced cluster topologies
114 | 
115 | **Disadvantages:**
116 | 
117 | * Requires a second Kafka Cluster
118 | * Apart from `C` this is a warm backup and does not protect from
119 |   bugs in Kafka or the underlying OS
120 | * Requires custom implementation of the switch-over handling to the
121 |   restored cluster
122 | * Adds a lot of complexity in the setup
123 | 
124 | ## `kafka-connect-s3`
125 | 
126 | `kafka-connect-s3` is a popular Kafka Connect connector to mirror the
127 | data from topics to Amazon S3 (or compatible other services like
128 | Minio). Zalando describes a setup in their article [Surviving Data
129 | Loss](https://jobs.zalando.com/tech/blog/backing-up-kafka-zookeeper/)
130 | 
131 | **Backup procedure:**
132 | 
133 | * Set up the sink connector to use your S3 endpoint
134 | * Set up another sink connector that backs up the `__consumer_offsets` topic.
135 | 
136 | **Restore procedure:**
137 | 
138 | * Set up the source connector to read the data from S3 into Kafka
139 | * Manually extract the new offset for the consumers and manually
140 |   identify which offset on the new Kafka cluster matches the old
141 |   one. (This is not a trivial task – you would need to count the ACK'd
142 |   messages from the beginning to find out the exact offset – and not
143 |   forgetting about compacted and deleted messages)
144 | 
145 | **Advantages:**
146 | 
147 | * Cold backup (to S3)
148 | * Possible to use in downstream services that work only with S3 (e.l. Data
149 |   Warehouses)
150 | 
151 | **Disadvantages:**
152 | 
153 | * Supports only S3 (and compatible systems) as the storage backend
154 | * No support for restoring consumer offsets (the method described
155 |   above could be described as guesstimating and will not work in many
156 |   edge cases)
157 | 
158 | ## `kafka-backup`
159 | 
160 | `kafka-backup` is inspired heavily by the Mirror Maker 2 and
161 | `kafka-connect-s3`. It consists of a sink and a source connector both
162 | of which support the backup and restore of the topic data and also
163 | consumer offsets.
164 | 
165 | **Backup Procedure:**
166 | 
167 | * Set up the Kafka Backup Sink connector
168 | * Copy the backed up data to a backup storage of your choice
169 | * See [GitHub](http://github.com/azapps/kafka-backup) for more
170 |   information of how to back up your Kafka Cluster
171 | 
172 | **Restore Procedure**
173 | 
174 | * Set up the Kafka Backup Source connector
175 | * Wait until it finished (see logs for information)
176 | * Use the restored cluster
177 | 
178 | **Advantages:**
179 | 
180 | * Only solution which is able to restore topic data and also consumer
181 |   offsets
182 | * Only solution designed to take cold backups of Kafka
183 | * Simple to do incremental backups
184 | 
185 | **Disadvantages:**
186 | 
187 | * See [GitHub](http://github.com/azapps/kafka-backup) for the current
188 |   maturity status of the project
189 | * Currently supports only the file system as the storage backend
190 | * Requires Kafka Connect binaries of Kafka 2.3
191 | 


--------------------------------------------------------------------------------
/docs/FAQ.md:
--------------------------------------------------------------------------------
 1 | # How to restore to a different topic
 2 | 
 3 | > I didn't see in the documentation if it's possible to be able to restore to a different destination topic, such as mybackupedtopic-restored. It would help with testing restore procedures without disturbing the existing topic, among other things.
 4 | 
 5 | Simply rename the topic directories in the Backup target.
 6 | 
 7 | # Restoring a multi-partition topic does not work
 8 | 
 9 | > When I restore topic with 24 partitions it creates topic with one partitions and restore failed.
10 | > Restore successful if I create 24 partitions topic before restore. 
11 | 
12 | You need to create the topic manually before restore. For a "real" backup scenario you also need to backup and restore Zookeeper
13 | 
14 | # Error "Plugin class loader for connector was not found" 
15 | 
16 | ```sh
17 | ERROR Plugin class loader for connector: 'de.azapps.kafkabackup.sink.BackupSinkConnector' was not found. Returning: org.apache.kafka.connect.runtime.isolation.DelegatingClassLoader@5b068087 (org.apache.kafka.connect.runtime.isolation.DelegatingClassLoader:165)
18 | ```
19 | 
20 | You forgot to build the jar file. Either get an official release of Kafka Backup or run `./gradlew shadowJar` in the root directory of Kafka Backup.


--------------------------------------------------------------------------------
/docs/Tooling.md:
--------------------------------------------------------------------------------
  1 | # Kafka Backup: Tooling
  2 | 
  3 | Before you go, you need to add the `kafka-backup.jar` to your
  4 | classpath:
  5 | 
  6 | ```sh
  7 | export CLASSPATH="./path/to/kafka-backup.jar:$CLASSPATH"
  8 | ```
  9 | 
 10 | If you are in the root directory of Kafka Backup, you can use:
 11 | 
 12 | ```sh
 13 | export CLASSPATH="`pwd`/build/libs/kafka-backup.jar:$CLASSPATH"
 14 | ```
 15 | 
 16 | ## SegmentCLI
 17 | 
 18 | Basic usage:
 19 | 
 20 | ```sh
 21 | java de.azapps.kafkabackup.cli.SegmentCLI
 22 | ```
 23 | 
 24 | ### List all records
 25 | 
 26 | ```sh
 27 | java de.azapps.kafkabackup.cli.SegmentCLI \
 28 |   --list \
 29 |   --segment /path/to/segment_partition_123_from_offset_0000000123_records
 30 | ```
 31 | 
 32 | ### Show key and value of a specific offset in a segment
 33 | 
 34 | ```sh
 35 | java de.azapps.kafkabackup.cli.SegmentCLI --show --segment /path/to/segment_partition_123_from_offset_0000000123_records --offset 597
 36 | ```
 37 | 
 38 | ### Formatting Options
 39 | 
 40 | Using the `--formatter` option you can customize how the keys and
 41 | values of the messages are formatted. The default is the
 42 | `RawFormatter` which prints the bytes as they are (i.e. as characters
 43 | to the console.
 44 | 
 45 | Implemented options:
 46 | 
 47 | * `de.azapps.kafkabackup.cli.formatters.RawFormatter`
 48 | * `de.azapps.kafkabackup.cli.formatters.UTF8Formatter`
 49 | * `de.azapps.kafkabackup.cli.formatters.Base64Formatter`
 50 | 
 51 | Example:
 52 | 
 53 | ```sh
 54 | java de.azapps.kafkabackup.cli.SegmentCLI --list \
 55 |   --segment /path/to/segment_partition_123_from_offset_0000000123_records \
 56 |   --key-formatter de.azapps.kafkabackup.cli.formatters.Base64Formatter
 57 | ```
 58 | ## SegmentIndexCLI
 59 | 
 60 | The segment index is required for faster access to the records in the
 61 | segment file. It also simplifies the implementation of the idempotent
 62 | sink connector. The segment index does not need to be backed up, but
 63 | must exist before performing a restore.
 64 | 
 65 | ### List Index entries
 66 | 
 67 | Displays information about the records referenced in the index.
 68 | 
 69 | ```sh
 70 | java de.azapps.kafkabackup.cli.SegmentIndexCLI --list \
 71 |   --segment-index /path/to/segment_partition_123_from_offset_0000000123_records \
 72 | ```
 73 | 
 74 | ### Restore Index
 75 | 
 76 | Given a record file, restores the segment index for that file.
 77 | 
 78 | ```sh
 79 | java de.azapps.kafkabackup.cli.SegmentIndexCLI --restore-index \
 80 |   --segment /path/to/segment_partition_123_from_offset_0000000123_records
 81 | ```
 82 | 
 83 | ### Restoring all Segment Indexes
 84 | 
 85 | ```sh
 86 | export TOPICDIR="/path/to/topicdir/"
 87 | for f in "$TOPICDIR"/segment_partition_*_records ; do
 88 |   java de.azapps.kafkabackup.cli.SegmentIndexCLI --restore-index \
 89 |   --segment $f
 90 | done
 91 | ```
 92 | 
 93 | ## PartitionIndexCLI
 94 | 
 95 | The partition index contains the information about which offsets are
 96 | located in which segment. This file too, does not need to be backed up
 97 | but is required for restoration.
 98 | 
 99 | It is totally ok to delete old segments that are not needed
100 | anymore. But it is crucial to restore the partition index afterwards.
101 | 
102 | ### List Index entries
103 | 
104 | ```sh
105 | java de.azapps.kafkabackup.cli.PartitionIndexCLI --list \
106 |   --partition-index /path/to/index_partition_123
107 | ```
108 | 
109 | ### Restore Partition Index
110 | 
111 | ```sh
112 | java de.azapps.kafkabackup.cli.PartitionIndexCLI --restore \
113 |   --partition 0 \
114 |   --topic-dir /path/to/topicdir/
115 | ```
116 | 
117 | #### Restore Indexes for all Partitions
118 | 
119 | ```sh
120 | export NUM_PARTITIONS=9
121 | export TOPICDIR="/path/to/topicdir/"
122 | for i in {0..$(( $NUM_PARTITIONS - 1 ))} ; do
123 |     java de.azapps.kafkabackup.cli.PartitionIndexCLI --restore --partition $i --topic-dir "$TOPICDIR"
124 | done
125 | ```
126 | 
127 | ## Completed segments processing
128 | 
129 | You may want to process completed segment files. Let's say you have your
130 | `target.dir` backed up to cloud storage daily. So you don't need to keep all
131 | the files locally then. To save some space you may delete completed segment
132 | files. There is `bin/completed_segments.py` script for your convenience.
133 | 
134 | To get some information on segment files just call script with path to your
135 | backup directory.
136 | 
137 | ```sh
138 | completed_segments.py /path/to/target_dir
139 | ```
140 | 
141 | To delete completed segments use `-d` option.
142 | ```sh
143 | completed_segments.py -d /path/to/target_dir
144 | ```
145 | 
146 | You may keep last N completed segments by using `-k N` option.
147 | 
148 | If you need more complex processing you may just list completed segment files
149 | and pass them for further processing. E.g. to keep last 2 segments and `shred`
150 | the rest run the following command.
151 | 
152 | ```sh
153 | completed_segments.py -l -k 2 /path/to/target_dir | xargs shred -u
154 | ```
155 | 


--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itadventurer/kafka-backup/4692ffeaf2f314aa9ad0d7a2346e47f24ab2dc3d/gradle/wrapper/gradle-wrapper.jar


--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Sun Jun 07 12:49:55 CEST 2020
2 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.5-all.zip
3 | distributionBase=GRADLE_USER_HOME
4 | distributionPath=wrapper/dists
5 | zipStorePath=wrapper/dists
6 | zipStoreBase=GRADLE_USER_HOME
7 | 


--------------------------------------------------------------------------------
/gradlew:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env sh
  2 | 
  3 | ##############################################################################
  4 | ##
  5 | ##  Gradle start up script for UN*X
  6 | ##
  7 | ##############################################################################
  8 | 
  9 | # Attempt to set APP_HOME
 10 | # Resolve links: $0 may be a link
 11 | PRG="$0"
 12 | # Need this for relative symlinks.
 13 | while [ -h "$PRG" ] ; do
 14 |     ls=`ls -ld "$PRG"`
 15 |     link=`expr "$ls" : '.*-> \(.*\)$'`
 16 |     if expr "$link" : '/.*' > /dev/null; then
 17 |         PRG="$link"
 18 |     else
 19 |         PRG=`dirname "$PRG"`"/$link"
 20 |     fi
 21 | done
 22 | SAVED="`pwd`"
 23 | cd "`dirname \"$PRG\"`/" >/dev/null
 24 | APP_HOME="`pwd -P`"
 25 | cd "$SAVED" >/dev/null
 26 | 
 27 | APP_NAME="Gradle"
 28 | APP_BASE_NAME=`basename "$0"`
 29 | 
 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
 31 | DEFAULT_JVM_OPTS='"-Xmx64m"'
 32 | 
 33 | # Use the maximum available, or set MAX_FD != -1 to use that value.
 34 | MAX_FD="maximum"
 35 | 
 36 | warn () {
 37 |     echo "$*"
 38 | }
 39 | 
 40 | die () {
 41 |     echo
 42 |     echo "$*"
 43 |     echo
 44 |     exit 1
 45 | }
 46 | 
 47 | # OS specific support (must be 'true' or 'false').
 48 | cygwin=false
 49 | msys=false
 50 | darwin=false
 51 | nonstop=false
 52 | case "`uname`" in
 53 |   CYGWIN* )
 54 |     cygwin=true
 55 |     ;;
 56 |   Darwin* )
 57 |     darwin=true
 58 |     ;;
 59 |   MINGW* )
 60 |     msys=true
 61 |     ;;
 62 |   NONSTOP* )
 63 |     nonstop=true
 64 |     ;;
 65 | esac
 66 | 
 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
 68 | 
 69 | # Determine the Java command to use to start the JVM.
 70 | if [ -n "$JAVA_HOME" ] ; then
 71 |     if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
 72 |         # IBM's JDK on AIX uses strange locations for the executables
 73 |         JAVACMD="$JAVA_HOME/jre/sh/java"
 74 |     else
 75 |         JAVACMD="$JAVA_HOME/bin/java"
 76 |     fi
 77 |     if [ ! -x "$JAVACMD" ] ; then
 78 |         die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
 79 | 
 80 | Please set the JAVA_HOME variable in your environment to match the
 81 | location of your Java installation."
 82 |     fi
 83 | else
 84 |     JAVACMD="java"
 85 |     which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
 86 | 
 87 | Please set the JAVA_HOME variable in your environment to match the
 88 | location of your Java installation."
 89 | fi
 90 | 
 91 | # Increase the maximum file descriptors if we can.
 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
 93 |     MAX_FD_LIMIT=`ulimit -H -n`
 94 |     if [ $? -eq 0 ] ; then
 95 |         if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
 96 |             MAX_FD="$MAX_FD_LIMIT"
 97 |         fi
 98 |         ulimit -n $MAX_FD
 99 |         if [ $? -ne 0 ] ; then
100 |             warn "Could not set maximum file descriptor limit: $MAX_FD"
101 |         fi
102 |     else
103 |         warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
104 |     fi
105 | fi
106 | 
107 | # For Darwin, add options to specify how the application appears in the dock
108 | if $darwin; then
109 |     GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
110 | fi
111 | 
112 | # For Cygwin, switch paths to Windows format before running java
113 | if $cygwin ; then
114 |     APP_HOME=`cygpath --path --mixed "$APP_HOME"`
115 |     CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
116 |     JAVACMD=`cygpath --unix "$JAVACMD"`
117 | 
118 |     # We build the pattern for arguments to be converted via cygpath
119 |     ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120 |     SEP=""
121 |     for dir in $ROOTDIRSRAW ; do
122 |         ROOTDIRS="$ROOTDIRS$SEP$dir"
123 |         SEP="|"
124 |     done
125 |     OURCYGPATTERN="(^($ROOTDIRS))"
126 |     # Add a user-defined pattern to the cygpath arguments
127 |     if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128 |         OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129 |     fi
130 |     # Now convert the arguments - kludge to limit ourselves to /bin/sh
131 |     i=0
132 |     for arg in "$@" ; do
133 |         CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134 |         CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
135 | 
136 |         if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
137 |             eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138 |         else
139 |             eval `echo args$i`="\"$arg\""
140 |         fi
141 |         i=$((i+1))
142 |     done
143 |     case $i in
144 |         (0) set -- ;;
145 |         (1) set -- "$args0" ;;
146 |         (2) set -- "$args0" "$args1" ;;
147 |         (3) set -- "$args0" "$args1" "$args2" ;;
148 |         (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149 |         (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150 |         (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151 |         (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152 |         (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153 |         (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154 |     esac
155 | fi
156 | 
157 | # Escape application args
158 | save () {
159 |     for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
160 |     echo " "
161 | }
162 | APP_ARGS=$(save "$@")
163 | 
164 | # Collect all arguments for the java command, following the shell quoting and substitution rules
165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
166 | 
167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
169 |   cd "$(dirname "$0")"
170 | fi
171 | 
172 | exec "$JAVACMD" "$@"
173 | 


--------------------------------------------------------------------------------
/gradlew.bat:
--------------------------------------------------------------------------------
 1 | @if "%DEBUG%" == "" @echo off
 2 | @rem ##########################################################################
 3 | @rem
 4 | @rem  Gradle startup script for Windows
 5 | @rem
 6 | @rem ##########################################################################
 7 | 
 8 | @rem Set local scope for the variables with windows NT shell
 9 | if "%OS%"=="Windows_NT" setlocal
10 | 
11 | set DIRNAME=%~dp0
12 | if "%DIRNAME%" == "" set DIRNAME=.
13 | set APP_BASE_NAME=%~n0
14 | set APP_HOME=%DIRNAME%
15 | 
16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17 | set DEFAULT_JVM_OPTS="-Xmx64m"
18 | 
19 | @rem Find java.exe
20 | if defined JAVA_HOME goto findJavaFromJavaHome
21 | 
22 | set JAVA_EXE=java.exe
23 | %JAVA_EXE% -version >NUL 2>&1
24 | if "%ERRORLEVEL%" == "0" goto init
25 | 
26 | echo.
27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28 | echo.
29 | echo Please set the JAVA_HOME variable in your environment to match the
30 | echo location of your Java installation.
31 | 
32 | goto fail
33 | 
34 | :findJavaFromJavaHome
35 | set JAVA_HOME=%JAVA_HOME:"=%
36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37 | 
38 | if exist "%JAVA_EXE%" goto init
39 | 
40 | echo.
41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42 | echo.
43 | echo Please set the JAVA_HOME variable in your environment to match the
44 | echo location of your Java installation.
45 | 
46 | goto fail
47 | 
48 | :init
49 | @rem Get command-line arguments, handling Windows variants
50 | 
51 | if not "%OS%" == "Windows_NT" goto win9xME_args
52 | 
53 | :win9xME_args
54 | @rem Slurp the command line arguments.
55 | set CMD_LINE_ARGS=
56 | set _SKIP=2
57 | 
58 | :win9xME_args_slurp
59 | if "x%~1" == "x" goto execute
60 | 
61 | set CMD_LINE_ARGS=%*
62 | 
63 | :execute
64 | @rem Setup the command line
65 | 
66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67 | 
68 | @rem Execute Gradle
69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70 | 
71 | :end
72 | @rem End local scope for the variables with windows NT shell
73 | if "%ERRORLEVEL%"=="0" goto mainEnd
74 | 
75 | :fail
76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77 | rem the _cmd.exe /c_ return code!
78 | if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79 | exit /b 1
80 | 
81 | :mainEnd
82 | if "%OS%"=="Windows_NT" endlocal
83 | 
84 | :omega
85 | 


--------------------------------------------------------------------------------
/settings.gradle:
--------------------------------------------------------------------------------
1 | rootProject.name = 'kafka-backup'


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/cli/PartitionIndexCLI.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.cli;
 2 | 
 3 | import de.azapps.kafkabackup.common.partition.PartitionIndex;
 4 | import de.azapps.kafkabackup.common.partition.PartitionIndexEntry;
 5 | import de.azapps.kafkabackup.common.partition.PartitionIndexRestore;
 6 | import joptsimple.OptionParser;
 7 | import joptsimple.OptionSet;
 8 | import org.apache.kafka.common.utils.Exit;
 9 | 
10 | import java.io.IOException;
11 | import java.nio.file.Paths;
12 | import java.util.Arrays;
13 | import java.util.stream.Stream;
14 | 
15 | public class PartitionIndexCLI {
16 |     private static final String CMD_LIST = "list";
17 |     private static final String CMD_RESTORE = "restore-index";
18 |     private static final String[] COMMANDS = {CMD_LIST, CMD_RESTORE};
19 |     private static final String ARG_PARTITION_INDEX = "partition-index";
20 |     private static final String ARG_TOPIC_DIR = "topic-dir";
21 |     private static final String ARG_PARTITION = "partition";
22 | 
23 |     public static void main(String[] args) throws Exception {
24 |         /*
25 |         cli --list --partition-index [file]
26 |         cli --restore-index --partition 0 --topic-dir [dir]
27 |         // ideas for later
28 |         cli --validate --partition-index [file] --partition 0 --topic-dir [dir]
29 |          */
30 |         final OptionParser optionParser = new OptionParser();
31 |         // Commands
32 |         optionParser.accepts(CMD_LIST);
33 |         optionParser.accepts(CMD_RESTORE);
34 |         // Arguments
35 |         optionParser.accepts(ARG_PARTITION_INDEX)
36 |                 .requiredIf(CMD_LIST)
37 |                 .withRequiredArg().ofType(String.class);
38 |         optionParser.accepts(ARG_TOPIC_DIR)
39 |                 .requiredIf(CMD_RESTORE)
40 |                 .withRequiredArg().ofType(String.class);
41 |         optionParser.accepts(ARG_PARTITION)
42 |                 .requiredIf(CMD_RESTORE)
43 |                 .withRequiredArg().ofType(Integer.class);
44 | 
45 |         OptionSet options;
46 |         try {
47 |             options = optionParser.parse(args);
48 |             if (Stream.of(COMMANDS).filter(options::has).count() != 1) {
49 |                 throw new Exception("Must contain exactly one of " + Arrays.toString(COMMANDS));
50 |             }
51 |         } catch (Exception e) {
52 |             System.err.println(e.getMessage());
53 |             optionParser.printHelpOn(System.err);
54 |             Exit.exit(-1);
55 |             return;
56 |         }
57 |         if (options.has(CMD_LIST)) {
58 |             list((String) options.valueOf(ARG_PARTITION_INDEX));
59 |         } else if (options.has(CMD_RESTORE)) {
60 |             restore((String) options.valueOf(ARG_TOPIC_DIR), (Integer) options.valueOf(ARG_PARTITION));
61 |         } else {
62 |             optionParser.printHelpOn(System.err);
63 |         }
64 |     }
65 | 
66 |     private static void restore(String topicDir, int partition) throws PartitionIndex.IndexException, PartitionIndexRestore.RestoreException, IOException {
67 |         PartitionIndexRestore restore = new PartitionIndexRestore(Paths.get(topicDir), partition);
68 |         restore.restore();
69 |     }
70 | 
71 |     private static void list(String partitionIndexFileName) throws IOException, PartitionIndex.IndexException {
72 |         System.out.println(partitionIndexFileName);
73 |         PartitionIndex partitionIndex = new PartitionIndex(Paths.get(partitionIndexFileName));
74 |         for (PartitionIndexEntry entry : partitionIndex.index()) {
75 |             System.out.println(String.format("File: %s StartOffset: %d", entry.filename(), entry.startOffset()));
76 |         }
77 |     }
78 | }
79 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/cli/SegmentCLI.java:
--------------------------------------------------------------------------------
  1 | package de.azapps.kafkabackup.cli;
  2 | 
  3 | import de.azapps.kafkabackup.cli.formatters.*;
  4 | import de.azapps.kafkabackup.common.record.Record;
  5 | import de.azapps.kafkabackup.common.segment.UnverifiedSegmentReader;
  6 | import joptsimple.OptionParser;
  7 | import joptsimple.OptionSet;
  8 | import org.apache.kafka.common.utils.Exit;
  9 | 
 10 | import java.io.EOFException;
 11 | import java.io.IOException;
 12 | import java.lang.reflect.InvocationTargetException;
 13 | import java.nio.file.Paths;
 14 | import java.util.Arrays;
 15 | import java.util.stream.Stream;
 16 | 
 17 | public class SegmentCLI {
 18 |     private static final String CMD_LIST = "list";
 19 |     private static final String ARG_SEGMENT = "segment";
 20 |     private static final String ARG_SEGMENT_HELP = "Segment file (of the form segment_partition_xxx_from_offset_xxxxxx_records)";
 21 |     private static final String CMD_SHOW = "show";
 22 |     private static final String ARG_OFFSET = "offset";
 23 |     private static final String ARG_OFFSET_HELP = "The offset of the message to display";
 24 |     private static final String CMD_LIST_HELP = "Lists all records in the segment. Just counting the value length – not displaying it.";
 25 |     private static final String CMD_SHOW_HELP = "Shows a specific record in the segment. Displays key and value";
 26 |     private static final String[] COMMANDS = {CMD_LIST, CMD_SHOW};
 27 |     private static final String ARG_KEY_FORMAT = "key-formatter";
 28 |     private static final String ARG_KEY_FORMAT_HELP = "Which formatter to use to display the key (default: StringFormatter)";
 29 |     private static final String ARG_VALUE_FORMAT = "value-formatter";
 30 |     private static final String ARG_VALUE_FORMAT_HELP = "Which formatter to use to display the value (default: StringFormatter)";
 31 | 
 32 |     public static void main(String[] args) throws IOException {
 33 |         /*
 34 |         cli --list --segment [file]
 35 |         cli --show --segment [file] --offset 0
 36 |          */
 37 |         final OptionParser optionParser = new OptionParser();
 38 |         optionParser.accepts(ARG_SEGMENT, ARG_SEGMENT_HELP).withRequiredArg().ofType(String.class);
 39 |         optionParser.accepts(CMD_LIST, CMD_LIST_HELP);
 40 |         optionParser.accepts(CMD_SHOW, CMD_SHOW_HELP);
 41 |         optionParser.accepts(ARG_OFFSET, ARG_OFFSET_HELP).requiredIf(CMD_SHOW).withRequiredArg().ofType(Long.class);
 42 |         optionParser.accepts(ARG_KEY_FORMAT, ARG_KEY_FORMAT_HELP).withRequiredArg().ofType(String.class)
 43 |                 .defaultsTo(RawFormatter.class.getCanonicalName());
 44 |         optionParser.accepts(ARG_VALUE_FORMAT, ARG_VALUE_FORMAT_HELP).withRequiredArg().ofType(String.class)
 45 |                 .defaultsTo(RawFormatter.class.getCanonicalName());
 46 | 
 47 | 
 48 |         OptionSet options;
 49 |         try {
 50 |             options = optionParser.parse(args);
 51 |             if (Stream.of(COMMANDS).filter(options::has).count() != 1) {
 52 |                 throw new Exception("Must contain exactly one of " + Arrays.toString(COMMANDS));
 53 |             }
 54 |         } catch (Exception e) {
 55 |             System.err.println(e.getMessage());
 56 |             optionParser.printHelpOn(System.err);
 57 |             Exit.exit(-1);
 58 |             return;
 59 |         }
 60 | 
 61 |         String segmentIndexFileName = (String) options.valueOf(ARG_SEGMENT);
 62 |         if (!segmentIndexFileName.endsWith("_records")) {
 63 |             segmentIndexFileName += "_records";
 64 |         }
 65 |         UnverifiedSegmentReader segmentReader = new UnverifiedSegmentReader(Paths.get(segmentIndexFileName));
 66 | 
 67 |         ByteFormatter keyFormatter = (ByteFormatter) instanciateClass((String) options.valueOf(ARG_KEY_FORMAT));
 68 |         ByteFormatter valueFormatter = (ByteFormatter) instanciateClass((String) options.valueOf(ARG_VALUE_FORMAT));
 69 |         if (options.has(CMD_LIST)) {
 70 |             RecordFormatter formatter = new ListRecordFormatter(keyFormatter, valueFormatter);
 71 |             list(segmentReader, formatter);
 72 |         } else if (options.has(CMD_SHOW)) {
 73 |             RecordFormatter formatter = new DetailedRecordFormatter(keyFormatter, valueFormatter);
 74 |             show(segmentReader, formatter, (Long) options.valueOf(ARG_OFFSET));
 75 |         }
 76 | 
 77 |     }
 78 | 
 79 |     private static Object instanciateClass(String name) {
 80 |         try {
 81 |             Class formatterClass = Class.forName(name);
 82 |             return formatterClass.getDeclaredConstructor().newInstance();
 83 |         } catch (ClassNotFoundException | IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) {
 84 |             System.err.println("formatter must be a valid class");
 85 |             Exit.exit(1);
 86 |             // impossible to reach
 87 |             throw new RuntimeException("…");
 88 |         }
 89 |     }
 90 | 
 91 |     private static void show(UnverifiedSegmentReader segmentReader, RecordFormatter formatter, Long offset) {
 92 |         long maxOffset = -1;
 93 |         while (true) {
 94 |             try {
 95 |                 Record record = segmentReader.read();
 96 |                 maxOffset = record.kafkaOffset();
 97 |                 if (record.kafkaOffset() == offset) {
 98 |                     formatter.writeTo(record, System.out);
 99 |                     Exit.exit(0);
100 |                 }
101 |             } catch (EOFException e) {
102 |                 System.out.println("Did not found offset " + offset + " in file. Max offset is " + maxOffset);
103 |                 Exit.exit(-2);
104 |             } catch (IOException e) {
105 |                 e.printStackTrace();
106 |                 Exit.exit(-3);
107 |             }
108 |         }
109 |     }
110 | 
111 |     private static void list(UnverifiedSegmentReader segmentReader, RecordFormatter formatter) {
112 |         int cnt = 0;
113 |         while (true) {
114 |             try {
115 |                 Record record = segmentReader.read();
116 |                 formatter.writeTo(record, System.out);
117 |                 cnt++;
118 |             } catch (EOFException e) {
119 |                 break;
120 |             } catch (IOException e) {
121 |                 e.printStackTrace();
122 |                 Exit.exit(-2);
123 |             }
124 |         }
125 |         System.out.println(String.format("%d entries in Segment", cnt));
126 | 
127 |     }
128 | }
129 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/cli/SegmentIndexCLI.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.cli;
 2 | 
 3 | import de.azapps.kafkabackup.common.segment.SegmentIndex;
 4 | import de.azapps.kafkabackup.common.segment.SegmentIndexEntry;
 5 | import de.azapps.kafkabackup.common.segment.SegmentIndexRestore;
 6 | import joptsimple.OptionParser;
 7 | import joptsimple.OptionSet;
 8 | import org.apache.kafka.common.utils.Exit;
 9 | 
10 | import java.io.IOException;
11 | import java.nio.file.Paths;
12 | import java.util.Arrays;
13 | import java.util.List;
14 | import java.util.stream.Stream;
15 | 
16 | public class SegmentIndexCLI {
17 |     private static final String CMD_LIST = "list";
18 |     private static final String CMD_LIST_HELP = "List all Records in a segment";
19 |     private static final String ARG_SEGMENT_INDEX = "segment-index";
20 |     private static final String ARG_SEGMENT_INDEX_HELP = "Segment index file (of the form segment_partition_xxx_from_offset_xxxxx_index)";
21 |     private static final String CMD_RESTORE = "restore-index";
22 |     private static final String CMD_RESTORE_HELP = "Restores the segment index given the segment file";
23 |     private static final String ARG_SEGMENT = "segment";
24 |     private static final String ARG_SEGMENT_HELP = "Segment file (of the form segment_partition_xxx_from_offset_xxxxxx_records)";
25 |     private static final String[] COMMANDS = {CMD_LIST, CMD_RESTORE};
26 | 
27 |     public static void main(String[] args) throws Exception {
28 |         /*
29 |         cli --list --segment-index [file]
30 |         cli --restore-index --segment [file]
31 |         // ideas for later
32 |         cli --show --segment-index [file] --offset [offset]
33 |         cli --validate --segment-index [file] --segment [file]
34 |          */
35 |         final OptionParser optionParser = new OptionParser();
36 |         // Commands
37 |         optionParser.accepts(CMD_LIST, CMD_LIST_HELP);
38 |         optionParser.accepts(CMD_RESTORE, CMD_RESTORE_HELP);
39 |         // Arguments
40 |         optionParser.accepts(ARG_SEGMENT_INDEX, ARG_SEGMENT_INDEX_HELP)
41 |                 .requiredIf(CMD_LIST)
42 |                 .withRequiredArg().ofType(String.class);
43 |         optionParser.accepts(ARG_SEGMENT, ARG_SEGMENT_HELP)
44 |                 .requiredIf(CMD_RESTORE)
45 |                 .withRequiredArg().ofType(String.class);
46 | 
47 |         OptionSet options;
48 |         try {
49 |             options = optionParser.parse(args);
50 |             if (Stream.of(COMMANDS).filter(options::has).count() != 1) {
51 |                 throw new Exception("Must contain exactly one of " + Arrays.toString(COMMANDS));
52 |             }
53 |         } catch (Exception e) {
54 |             System.err.println(e.getMessage());
55 |             optionParser.printHelpOn(System.err);
56 |             Exit.exit(-1);
57 |             return;
58 |         }
59 | 
60 |         if (options.has(CMD_LIST)) {
61 |             list((String) options.valueOf(ARG_SEGMENT_INDEX));
62 |         } else if (options.has(CMD_RESTORE)) {
63 |             restore((String) options.valueOf(ARG_SEGMENT));
64 |         }
65 |     }
66 | 
67 |     private static void restore(String segmentFileName) throws SegmentIndex.IndexException, SegmentIndexRestore.RestoreException, IOException {
68 |         if (!segmentFileName.endsWith("_records")) {
69 |             segmentFileName += "_records";
70 |         }
71 |         SegmentIndexRestore restore = new SegmentIndexRestore(Paths.get(segmentFileName));
72 |         restore.restore();
73 |     }
74 | 
75 |     private static void list(String segmentIndexFileName) throws IOException, SegmentIndex.IndexException {
76 |         if (!segmentIndexFileName.endsWith("_index")) {
77 |             segmentIndexFileName += "_index";
78 |         }
79 |         SegmentIndex segmentIndex = new SegmentIndex(Paths.get(segmentIndexFileName));
80 |         List<SegmentIndexEntry> index = segmentIndex.index();
81 |         long previousOffset = index.get(0).getOffset() - 1;
82 |         for (SegmentIndexEntry entry : index) {
83 |             System.out.print(String.format("Offset: %d Position: %d Length: %d", entry.getOffset(), entry.recordFilePosition(), entry.recordByteLength()));
84 |             if (entry.getOffset() > previousOffset + 1) {
85 |                 System.out.print(" <- FYI Here is a gap");
86 |             }
87 |             System.out.println();
88 |             previousOffset = entry.getOffset();
89 |         }
90 |         System.out.println(String.format("%d entries in Index", index.size()));
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/cli/formatters/Base64Formatter.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.cli.formatters;
 2 | 
 3 | import java.util.Base64;
 4 | 
 5 | public class Base64Formatter implements ByteFormatter {
 6 |     @Override
 7 |     public String toString(byte[] in) {
 8 |         Base64.Encoder encoder = Base64.getEncoder();
 9 |         return encoder.encodeToString(in);
10 |     }
11 | }
12 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/cli/formatters/ByteFormatter.java:
--------------------------------------------------------------------------------
1 | package de.azapps.kafkabackup.cli.formatters;
2 | 
3 | public interface ByteFormatter {
4 |     String toString(byte[] in);
5 | }
6 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/cli/formatters/DetailedRecordFormatter.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.cli.formatters;
 2 | 
 3 | import de.azapps.kafkabackup.common.record.Record;
 4 | 
 5 | import java.io.PrintStream;
 6 | 
 7 | public class DetailedRecordFormatter extends RecordFormatter {
 8 | 
 9 |     public DetailedRecordFormatter(ByteFormatter keyFormatter, ByteFormatter valueFormatter) {
10 |         super(keyFormatter, valueFormatter);
11 |     }
12 | 
13 |     @Override
14 |     public void writeTo(Record record, PrintStream outputStream) {
15 |         outputStream.println(keyFormatter.toString(record.key())
16 |                 + ", "
17 |                 + valueFormatter.toString(record.value()));
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/cli/formatters/ListRecordFormatter.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.cli.formatters;
 2 | 
 3 | import de.azapps.kafkabackup.common.record.Record;
 4 | 
 5 | import java.io.PrintStream;
 6 | import java.text.DateFormat;
 7 | import java.text.SimpleDateFormat;
 8 | 
 9 | public class ListRecordFormatter extends RecordFormatter {
10 |     private final DateFormat timestampFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
11 | 
12 |     public ListRecordFormatter(ByteFormatter keyFormatter, ByteFormatter valueFormatter) {
13 |         super(keyFormatter, valueFormatter);
14 |     }
15 | 
16 |     @Override
17 |     public void writeTo(Record record, PrintStream outputStream) {
18 |         String offset = "Offset: " + record.kafkaOffset();
19 |         String key;
20 |         if (record.key() == null) {
21 |             key = "NULL Key";
22 |         } else {
23 |             key = "Key: " + keyFormatter.toString(record.key());
24 |         }
25 |         String timestamp = "Timestamp: ";
26 |         System.out.println(record);
27 | 
28 |         switch (record.timestampType()) {
29 |             case NO_TIMESTAMP_TYPE:
30 |                 timestamp += "No Timestamp";
31 |                 break;
32 |             case CREATE_TIME:
33 |                 timestamp += "(create)";
34 |                 timestamp += timestampFormat.format(record.timestamp());
35 |                 break;
36 |             case LOG_APPEND_TIME:
37 |                 timestamp += "(log append)";
38 |                 timestamp += timestampFormat.format(record.timestamp());
39 |                 break;
40 |         }
41 |         String data_length;
42 |         if (record.value() == null) {
43 |             data_length = "NULL Value";
44 |         } else {
45 |             data_length = "Data: " + valueFormatter.toString(record.value());
46 |         }
47 | 
48 |         outputStream.println(offset + " " + key + " " + timestamp + " " + data_length);
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/cli/formatters/RawFormatter.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.cli.formatters;
 2 | 
 3 | import java.nio.charset.StandardCharsets;
 4 | 
 5 | public class RawFormatter implements ByteFormatter {
 6 |     @Override
 7 |     public String toString(byte[] in) {
 8 |         return new String(in, StandardCharsets.UTF_8);
 9 |     }
10 | }
11 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/cli/formatters/RecordFormatter.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.cli.formatters;
 2 | 
 3 | import de.azapps.kafkabackup.common.record.Record;
 4 | 
 5 | import java.io.PrintStream;
 6 | 
 7 | public abstract class RecordFormatter {
 8 |     ByteFormatter keyFormatter;
 9 |     ByteFormatter valueFormatter;
10 | 
11 |     RecordFormatter(ByteFormatter keyFormatter, ByteFormatter valueFormatter) {
12 |         this.keyFormatter = keyFormatter;
13 |         this.valueFormatter = valueFormatter;
14 |     }
15 | 
16 |     public abstract void writeTo(Record record, PrintStream outputStream);
17 | }
18 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/cli/formatters/UTF8Formatter.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.cli.formatters;
 2 | 
 3 | import java.nio.charset.StandardCharsets;
 4 | 
 5 | public class UTF8Formatter implements ByteFormatter {
 6 |     @Override
 7 |     public String toString(byte[] in) {
 8 |         return new String(in, StandardCharsets.UTF_8);
 9 |     }
10 | }
11 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/BackupConfig.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.common;
 2 | 
 3 | import org.apache.kafka.common.config.AbstractConfig;
 4 | import org.apache.kafka.common.config.ConfigDef;
 5 | 
 6 | import java.util.Map;
 7 | 
 8 | public abstract class BackupConfig extends AbstractConfig {
 9 |     public static final String CLUSTER_PREFIX = "cluster.";
10 |     public static final String CLUSTER_BOOTSTRAP_SERVERS = CLUSTER_PREFIX + "bootstrap.servers";
11 |     public static final String KEY_CONVERTER = "key.converter";
12 |     public static final String VALUE_CONVERTER = "value.converter";
13 |     public static final String HEADER_CONVERTER = "header.converter";
14 |     public static final String MANDATORY_CONVERTER = "org.apache.kafka.connect.converters.ByteArrayConverter";
15 | 
16 |     public BackupConfig(ConfigDef configDef, Map<?, ?> props) {
17 |         super(configDef, props);
18 |         if (!props.containsKey(CLUSTER_BOOTSTRAP_SERVERS)) {
19 |             throw new RuntimeException("Missing Configuration Variable: " + CLUSTER_BOOTSTRAP_SERVERS);
20 |         }
21 | 
22 |         if(!props.containsKey(KEY_CONVERTER) || ! props.get(KEY_CONVERTER).equals(MANDATORY_CONVERTER)) {
23 |             throw new RuntimeException(KEY_CONVERTER + " must be set and must equal " + MANDATORY_CONVERTER);
24 |         }
25 | 
26 |         if(!props.containsKey(VALUE_CONVERTER) || !props.get(VALUE_CONVERTER).equals(MANDATORY_CONVERTER)) {
27 |             throw new RuntimeException(VALUE_CONVERTER + " must be set and must equal " + MANDATORY_CONVERTER);
28 |         }
29 | 
30 |         if(!props.containsKey(HEADER_CONVERTER) || !props.get(HEADER_CONVERTER).equals(MANDATORY_CONVERTER)) {
31 |             throw new RuntimeException(HEADER_CONVERTER + " must be set and must equal " + MANDATORY_CONVERTER);
32 |         }
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/offset/EndOffsetReader.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.common.offset;
 2 | 
 3 | import org.apache.kafka.clients.consumer.KafkaConsumer;
 4 | import org.apache.kafka.common.TopicPartition;
 5 | import org.apache.kafka.common.serialization.ByteArrayDeserializer;
 6 | 
 7 | import java.util.*;
 8 | 
 9 | public class EndOffsetReader {
10 |   private final Map<String, Object> consumerConfig;
11 | 
12 |   public EndOffsetReader(Map<String, Object> consumerConfig) {
13 |     this.consumerConfig = consumerConfig;
14 |   }
15 | 
16 |   /**
17 |    * Obtain end offsets for each given partition
18 |    */
19 |   public Map<TopicPartition, Long> getEndOffsets(Collection<TopicPartition> partitions) {
20 |     Map<String, Object> serializerConfig = new HashMap<>(consumerConfig);
21 |     serializerConfig.put("key.deserializer", ByteArrayDeserializer.class.getName());
22 |     serializerConfig.put("value.deserializer", ByteArrayDeserializer.class.getName());
23 |     try (KafkaConsumer<Byte[], Byte[]> consumer = new KafkaConsumer<>(serializerConfig)) {
24 |       consumer.assign(partitions);
25 | 
26 |       Map<TopicPartition, Long> offsets = consumer.endOffsets(partitions);
27 |       List<TopicPartition> toRemove = new ArrayList<>();
28 | 
29 |       for (Map.Entry<TopicPartition, Long> partitionOffset : offsets.entrySet()) {
30 |         if (partitionOffset.getValue() == 0L) {
31 |           toRemove.add(partitionOffset.getKey()); // don't store empty offsets
32 |         }
33 |       }
34 | 
35 |       for (TopicPartition partition : toRemove) {
36 |         offsets.remove(partition);
37 |       }
38 | 
39 |       return offsets;
40 |     }
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/offset/OffsetSink.java:
--------------------------------------------------------------------------------
  1 | package de.azapps.kafkabackup.common.offset;
  2 | 
  3 | import com.fasterxml.jackson.databind.ObjectMapper;
  4 | import org.apache.kafka.clients.admin.AdminClient;
  5 | import org.apache.kafka.clients.admin.ConsumerGroupListing;
  6 | import org.apache.kafka.clients.consumer.OffsetAndMetadata;
  7 | import org.apache.kafka.common.TopicPartition;
  8 | import org.apache.kafka.connect.errors.RetriableException;
  9 | 
 10 | import java.io.IOException;
 11 | import java.nio.file.Files;
 12 | import java.nio.file.Path;
 13 | import java.nio.file.Paths;
 14 | import java.util.ArrayList;
 15 | import java.util.HashMap;
 16 | import java.util.List;
 17 | import java.util.Map;
 18 | import java.util.concurrent.ExecutionException;
 19 | import java.util.stream.Collectors;
 20 | 
 21 | public class OffsetSink {
 22 |     private final Path targetDir;
 23 |     private final Map<TopicPartition, OffsetStoreFile> topicOffsets = new HashMap<>();
 24 |     private List<String> consumerGroups = new ArrayList<>();
 25 |     private final AdminClient adminClient;
 26 | 
 27 |     public OffsetSink(AdminClient adminClient, Path targetDir) {
 28 |         this.adminClient = adminClient;
 29 |         this.targetDir = targetDir;
 30 |     }
 31 | 
 32 |     public void syncConsumerGroups() {
 33 |         try {
 34 |             consumerGroups = adminClient.listConsumerGroups().all().get().stream().map(ConsumerGroupListing::groupId).collect(Collectors.toList());
 35 |         } catch (InterruptedException | ExecutionException e) {
 36 |             throw new RetriableException(e);
 37 |         }
 38 |     }
 39 | 
 40 |     public void syncOffsets() throws IOException {
 41 |         boolean error = false;
 42 |         for (String consumerGroup : consumerGroups) {
 43 |             try {
 44 |                 syncOffsetsForGroup(consumerGroup);
 45 |             } catch (IOException e) {
 46 |                 e.printStackTrace();
 47 |                 error = true;
 48 |             }
 49 |         }
 50 |         if (error) {
 51 |             throw new IOException("syncOffsets() threw an IOException");
 52 |         }
 53 |     }
 54 | 
 55 |     private void syncOffsetsForGroup(String consumerGroup) throws IOException {
 56 |         Map<TopicPartition, OffsetAndMetadata> topicOffsetsAndMetadata;
 57 |         try {
 58 |             topicOffsetsAndMetadata = adminClient.listConsumerGroupOffsets(consumerGroup).partitionsToOffsetAndMetadata().get();
 59 |         } catch (InterruptedException | ExecutionException e) {
 60 |             throw new RetriableException(e);
 61 |         }
 62 |         for (Map.Entry<TopicPartition, OffsetAndMetadata> entry : topicOffsetsAndMetadata.entrySet()) {
 63 |             TopicPartition tp = entry.getKey();
 64 |             OffsetAndMetadata offsetAndMetadata = entry.getValue();
 65 | 
 66 |             if (validTopic(tp.topic())) {
 67 |                 if (!this.topicOffsets.containsKey(tp)) {
 68 |                     this.topicOffsets.put(tp, new OffsetStoreFile(targetDir, tp));
 69 |                 }
 70 |                 OffsetStoreFile offsets = this.topicOffsets.get(tp);
 71 |                 offsets.put(consumerGroup, offsetAndMetadata.offset());
 72 |             }
 73 |         }
 74 |     }
 75 | 
 76 |     private boolean validTopic(String topic) {
 77 |         return Files.isDirectory(Paths.get(targetDir.toString(), topic));
 78 |     }
 79 | 
 80 |     public void flush() throws IOException {
 81 |         boolean error = false;
 82 |         for (OffsetStoreFile offsetStoreFile : topicOffsets.values()) {
 83 |             try {
 84 |                 offsetStoreFile.flush();
 85 |             } catch (IOException e) {
 86 |                 e.printStackTrace();
 87 |                 error = true;
 88 |             }
 89 |         }
 90 |         if (error) {
 91 |             throw new IOException("syncOffsets() threw an IOException");
 92 |         }
 93 |     }
 94 | 
 95 |     public void close() throws IOException {
 96 |         flush();
 97 |     }
 98 | 
 99 |     private static class OffsetStoreFile {
100 |         private Map<String, Long> groupOffsets = new HashMap<>();
101 | 
102 |         private final ObjectMapper mapper = new ObjectMapper();
103 |         private final Path storeFile;
104 | 
105 |         OffsetStoreFile(Path targetDir, TopicPartition topicPartition) throws IOException {
106 |             storeFile = OffsetUtils.offsetStoreFile(targetDir, topicPartition);
107 |             if (!Files.isRegularFile(storeFile)) {
108 |                 Files.createFile(storeFile);
109 |             }
110 |             if (Files.size(storeFile) > 0) {
111 |                 groupOffsets = mapper.readValue(storeFile.toFile(), Map.class);
112 |             }
113 |         }
114 | 
115 |         void put(String consumerGroup, long offset) {
116 |             groupOffsets.put(consumerGroup, offset);
117 |         }
118 | 
119 |         void flush() throws IOException {
120 |             mapper.writeValue(storeFile.toFile(), groupOffsets);
121 |         }
122 |     }
123 | }
124 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/offset/OffsetSource.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.common.offset;
 2 | 
 3 | import com.fasterxml.jackson.core.type.TypeReference;
 4 | import com.fasterxml.jackson.databind.ObjectMapper;
 5 | import org.apache.kafka.clients.consumer.Consumer;
 6 | import org.apache.kafka.clients.consumer.KafkaConsumer;
 7 | import org.apache.kafka.clients.consumer.OffsetAndMetadata;
 8 | import org.apache.kafka.common.TopicPartition;
 9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 | 
12 | import java.io.IOException;
13 | import java.nio.file.Files;
14 | import java.nio.file.Path;
15 | import java.nio.file.Paths;
16 | import java.util.*;
17 | import java.util.stream.Collectors;
18 | 
19 | public class OffsetSource {
20 |     private static final Logger log = LoggerFactory.getLogger(OffsetSource.class);
21 |     private final Map<TopicPartition, OffsetStoreFile> topicOffsets = new HashMap<>();
22 |     private final Map<String, Object> consumerConfig;
23 | 
24 |     public OffsetSource(Path backupDir, List<String> topics, Map<String, Object> consumerConfig) throws IOException {
25 |         this.consumerConfig = consumerConfig;
26 |         for (String topic : topics) {
27 |             findOffsetStores(backupDir, topic);
28 |         }
29 |     }
30 | 
31 |     private void findOffsetStores(Path backupDir, String topic) throws IOException {
32 |         Path topicDir = Paths.get(backupDir.toString(), topic);
33 |         for (Path f : Files.list(topicDir).collect(Collectors.toList())) {
34 |             Optional<Integer> partition = OffsetUtils.isOffsetStoreFile(f);
35 |             if (partition.isPresent()) {
36 |                 TopicPartition topicPartition = new TopicPartition(topic, partition.get());
37 |                 topicOffsets.put(topicPartition, new OffsetStoreFile(f));
38 |             }
39 |         }
40 |     }
41 | 
42 |     public void syncGroupForOffset(TopicPartition topicPartition, long sourceOffset, long targetOffset) {
43 |         OffsetStoreFile offsetStoreFile = topicOffsets.get(topicPartition);
44 |         // __consumer_offsets contains the offset of the message to read next. So we need to search for the offset + 1
45 |         // if we do not do that we might miss
46 |         List<String> groups = offsetStoreFile.groupForOffset(sourceOffset + 1);
47 |         if (groups != null && groups.size() > 0) {
48 |             for (String group : groups) {
49 |                 Map<String, Object> groupConsumerConfig = new HashMap<>(consumerConfig);
50 |                 groupConsumerConfig.put("group.id", group);
51 |                 Consumer<byte[], byte[]> consumer = new KafkaConsumer<>(groupConsumerConfig);
52 |                 consumer.assign(Collections.singletonList(topicPartition));
53 |                 // ! Target Offset + 1 as we commit the offset of the "next message to read"
54 |                 OffsetAndMetadata offsetAndMetadata = new OffsetAndMetadata(targetOffset + 1);
55 |                 Map<TopicPartition, OffsetAndMetadata> offsets = Collections.singletonMap(topicPartition, offsetAndMetadata);
56 |                 consumer.commitSync(offsets);
57 |                 consumer.close();
58 |                 log.debug("Committed target offset {} for group {} for topic {} partition {}",
59 |                         (targetOffset + 1), group, topicPartition.topic(), topicPartition.partition());
60 |             }
61 |         }
62 |     }
63 | 
64 |     private static class OffsetStoreFile {
65 |         TypeReference<HashMap<String, Long>> typeRef
66 |                 = new TypeReference<HashMap<String, Long>>() {
67 |         };
68 |         private final Map<Long, List<String>> offsetGroups = new HashMap<>();
69 | 
70 |         OffsetStoreFile(Path storeFile) throws IOException {
71 |             ObjectMapper mapper = new ObjectMapper();
72 |             Map<String, Long> groupOffsets = mapper.readValue(storeFile.toFile(), typeRef);
73 |             for (Map.Entry<String, Long> entry : groupOffsets.entrySet()) {
74 |                 String group = entry.getKey();
75 |                 Long offset = entry.getValue();
76 | 
77 |                 if (offsetGroups.containsKey(offset)) {
78 |                     List<String> groups = offsetGroups.get(offset);
79 |                     groups.add(group);
80 |                 } else {
81 |                     List<String> groups = new ArrayList<>(1);
82 |                     groups.add(group);
83 |                     offsetGroups.put(offset, groups);
84 |                 }
85 |             }
86 |         }
87 | 
88 |         List<String> groupForOffset(Long offset) {
89 |             return offsetGroups.get(offset);
90 |         }
91 |     }
92 | 
93 | }
94 | 
95 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/offset/OffsetUtils.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.common.offset;
 2 | 
 3 | import org.apache.kafka.common.TopicPartition;
 4 | 
 5 | import java.nio.file.Path;
 6 | import java.nio.file.Paths;
 7 | import java.util.Optional;
 8 | import java.util.regex.Matcher;
 9 | import java.util.regex.Pattern;
10 | 
11 | class OffsetUtils {
12 | 
13 |     private static final String OFFSET_STORE_FILE_PREFIX = "consumer_offsets_partition";
14 |     private static final Pattern FILE_PATTERN = Pattern.compile("consumer_offsets_partition_([0-9]+)");
15 | 
16 |     static String offsetStoreFileName(int partition) {
17 |         return String.format(OFFSET_STORE_FILE_PREFIX + "_%03d", partition);
18 |     }
19 | 
20 |     static Path offsetStoreFile(Path backupDir, TopicPartition topicPartition) {
21 |         return Paths.get(backupDir.toString(), topicPartition.topic(), OffsetUtils.offsetStoreFileName(topicPartition.partition()));
22 |     }
23 | 
24 |     static Optional<Integer> isOffsetStoreFile(Path f) {
25 |         Path fpath = f.getFileName();
26 |         if (fpath == null) {
27 |             return Optional.empty();
28 |         }
29 |         String fname = fpath.toString();
30 |         Matcher m = FILE_PATTERN.matcher(fname);
31 |         if (m.find()) {
32 |             String partitionStr = m.group(1);
33 |             return Optional.of(Integer.valueOf(partitionStr));
34 |         } else {
35 |             return Optional.empty();
36 |         }
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/partition/PartitionIndex.java:
--------------------------------------------------------------------------------
  1 | package de.azapps.kafkabackup.common.partition;
  2 | 
  3 | import java.io.EOFException;
  4 | import java.io.FileInputStream;
  5 | import java.io.FileOutputStream;
  6 | import java.io.IOException;
  7 | import java.nio.file.Files;
  8 | import java.nio.file.Path;
  9 | import java.util.ArrayList;
 10 | import java.util.List;
 11 | import java.util.Optional;
 12 | 
 13 | public class PartitionIndex {
 14 |     private static final byte V1_MAGIC_BYTE = 0x01;
 15 |     private Path indexFile;
 16 |     private List<PartitionIndexEntry> index = new ArrayList<>();
 17 |     private FileOutputStream fileOutputStream;
 18 |     private FileInputStream fileInputStream;
 19 |     private int position = 0;
 20 |     private long latestStartOffset = -1;
 21 | 
 22 |     public PartitionIndex(Path indexFile) throws IOException, IndexException {
 23 |         this.indexFile = indexFile;
 24 |         initFile();
 25 |         while (true) {
 26 |             try {
 27 |                 PartitionIndexEntry partitionIndexEntry = PartitionIndexEntry.fromStream(fileInputStream);
 28 |                 if (partitionIndexEntry.startOffset() <= latestStartOffset) {
 29 |                     throw new IndexException("Offsets must be always increasing! There is something terribly wrong in your index " + indexFile + "! Got " + partitionIndexEntry.startOffset() + " expected an offset larger than " + latestStartOffset);
 30 |                 }
 31 |                 index.add(partitionIndexEntry);
 32 |                 latestStartOffset = partitionIndexEntry.startOffset();
 33 |             } catch (EOFException e) {
 34 |                 // reached End of File
 35 |                 break;
 36 |             }
 37 |         }
 38 |     }
 39 | 
 40 |     private void initFile() throws IOException, IndexException {
 41 |         if (!Files.isRegularFile(indexFile)) {
 42 |             Files.createFile(indexFile);
 43 |             fileOutputStream = new FileOutputStream(indexFile.toFile());
 44 |             fileOutputStream.write(V1_MAGIC_BYTE);
 45 |         } else {
 46 |             fileOutputStream = new FileOutputStream(indexFile.toFile(), true);
 47 |         }
 48 |         this.fileInputStream = new FileInputStream(indexFile.toFile());
 49 |         fileInputStream.getChannel().position(0);
 50 |         byte[] v1Validation = new byte[1];
 51 |         if (fileInputStream.read(v1Validation) != 1 || v1Validation[0] != V1_MAGIC_BYTE) {
 52 |             throw new IndexException("Cannot validate Magic Byte in the beginning of the index " + indexFile);
 53 |         }
 54 |     }
 55 | 
 56 |     void appendSegment(String segmentFile, long startOffset) throws IOException, IndexException {
 57 |         if (startOffset <= latestStartOffset) {
 58 |             throw new IndexException("Offsets must be always increasing! There is something terribly wrong in your index " + indexFile + "! Got " + startOffset + " expected an offset larger than " + latestStartOffset);
 59 |         }
 60 |         PartitionIndexEntry indexEntry = new PartitionIndexEntry(fileOutputStream, segmentFile, startOffset);
 61 |         index.add(indexEntry);
 62 |         latestStartOffset = startOffset;
 63 |     }
 64 | 
 65 |     Optional<PartitionIndexEntry> latestSegmentFile() {
 66 |         if (index.isEmpty()) {
 67 |             return Optional.empty();
 68 |         } else {
 69 |             return Optional.of(index.get(index.size() - 1));
 70 |         }
 71 |     }
 72 | 
 73 |     long latestStartOffset() {
 74 |         return latestStartOffset;
 75 |     }
 76 | 
 77 |     void close() throws IOException {
 78 |         fileInputStream.close();
 79 |         fileOutputStream.close();
 80 |     }
 81 | 
 82 |     void flush() throws IOException {
 83 |         fileOutputStream.flush();
 84 |     }
 85 | 
 86 |     long firstOffset() throws IndexException {
 87 |         if (index.size() == 0) {
 88 |             throw new PartitionIndex.IndexException("Partition Index is empty. Something is wrong with your partition index. Try to rebuild the index " + indexFile);
 89 |         }
 90 |         return index.get(0).startOffset();
 91 |     }
 92 | 
 93 |     void seek(long offset) throws PartitionIndex.IndexException {
 94 |         int previousPosition = -1;
 95 |         // Iterate the index after the last element
 96 |         // Such that we can seek to an offset in the last index entry
 97 |         for (int i = 0; i <= index.size(); i++) {
 98 |             if (i == index.size()) {
 99 |                 // Offset must be in the last index entry
100 |                 position = previousPosition;
101 |             } else {
102 |                 PartitionIndexEntry current = index.get(i);
103 |                 if (current.startOffset() > offset) {
104 |                     if (previousPosition >= 0) {
105 |                         position = previousPosition;
106 |                         //
107 |                         return;
108 |                     } else {
109 |                         throw new PartitionIndex.IndexException("No Index file found matching the target offset in partition index " + indexFile + ". Search for offset " + offset + ", smallest offset in index: " + current.startOffset());
110 |                     }
111 |                 } else {
112 |                     previousPosition = i;
113 |                 }
114 |             }
115 |         }
116 |     }
117 | 
118 |     boolean hasMoreData() {
119 |         return position < index.size();
120 |     }
121 | 
122 |     String readFileName() {
123 |         String fileName = index.get(position).filename();
124 |         position++;
125 |         // allow the cursor to be one after the index size.
126 |         // This way we can detect easier when we reached the end of the index
127 |         if (position > index.size()) {
128 |             throw new IndexOutOfBoundsException("Index " + indexFile + " out of bound");
129 |         }
130 |         return fileName;
131 |     }
132 | 
133 |     public List<PartitionIndexEntry> index() {
134 |         return index;
135 |     }
136 | 
137 |     public static class IndexException extends Exception {
138 |         IndexException(String message) {
139 |             super(message);
140 |         }
141 |     }
142 | }
143 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/partition/PartitionIndexEntry.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.common.partition;
 2 | 
 3 | import java.io.*;
 4 | import java.nio.charset.StandardCharsets;
 5 | import java.util.Objects;
 6 | 
 7 | /**
 8 |  * Format:
 9 |  * fileNameLength: int32
10 |  * fileName: UTF8-String[fileNameLength]
11 |  * startOffset: int64
12 |  * [endOffset: int64]
13 |  */
14 | public class PartitionIndexEntry {
15 |     private final String filename;
16 |     private final long startOffset;
17 | 
18 |     PartitionIndexEntry(OutputStream byteStream, String filename, long startOffset) throws IOException {
19 |         this.filename = filename;
20 |         this.startOffset = startOffset;
21 |         DataOutputStream stream = new DataOutputStream(byteStream);
22 |         byte[] filenameBytes = filename.getBytes(StandardCharsets.UTF_8);
23 |         stream.writeInt(filenameBytes.length);
24 |         stream.write(filenameBytes);
25 |         stream.writeLong(startOffset);
26 |     }
27 | 
28 |     PartitionIndexEntry(String filename, long startOffset) {
29 |         this.filename = filename;
30 |         this.startOffset = startOffset;
31 |     }
32 | 
33 |     static PartitionIndexEntry fromStream(InputStream byteStream) throws IOException {
34 |         DataInputStream stream = new DataInputStream(byteStream);
35 |         int filenameLength = stream.readInt();
36 |         byte[] filenameBytes = new byte[filenameLength];
37 |         int readBytes = stream.read(filenameBytes);
38 |         if (readBytes != filenameLength) {
39 |             throw new IOException(String.format("Expected to read %d bytes, got %d", filenameLength, readBytes));
40 |         }
41 |         String filename = new String(filenameBytes, StandardCharsets.UTF_8);
42 |         long startOffset = stream.readLong();
43 |         return new PartitionIndexEntry(filename, startOffset);
44 |     }
45 | 
46 |     public long startOffset() {
47 |         return startOffset;
48 |     }
49 | 
50 |     public String filename() {
51 |         return filename;
52 |     }
53 | 
54 |     @Override
55 |     public int hashCode() {
56 |         return Objects.hash(filename, startOffset);
57 |     }
58 | 
59 |     @Override
60 |     public boolean equals(Object o) {
61 |         if (this == o)
62 |             return true;
63 |         if (o == null || getClass() != o.getClass())
64 |             return false;
65 | 
66 |         PartitionIndexEntry that = (PartitionIndexEntry) o;
67 | 
68 |         return Objects.equals(filename(), that.filename())
69 |                 && Objects.equals(startOffset(), that.startOffset());
70 |     }
71 | 
72 |     @Override
73 |     public String toString() {
74 |         return String.format("PartitionIndexEntry{filename: %s, startOffset: %d}",
75 |                 filename, startOffset);
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/partition/PartitionIndexRestore.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.common.partition;
 2 | 
 3 | import de.azapps.kafkabackup.common.segment.SegmentUtils;
 4 | 
 5 | import java.io.IOException;
 6 | import java.nio.file.Files;
 7 | import java.nio.file.Path;
 8 | 
 9 | public class PartitionIndexRestore {
10 |     private final Path indexFile;
11 |     private final int partition;
12 |     PartitionIndex index;
13 |     Path topicDir;
14 | 
15 |     public PartitionIndexRestore(Path topicDir, int partition) throws RestoreException, IOException, PartitionIndex.IndexException {
16 |         this.topicDir = topicDir;
17 |         this.indexFile = PartitionUtils.indexFile(topicDir, partition);
18 |         this.partition = partition;
19 | 
20 |         if (Files.isRegularFile(indexFile)) {
21 |             throw new RestoreException("Index file " + indexFile + " must not exist");
22 |         }
23 |         index = new PartitionIndex(indexFile);
24 |         if (!Files.isDirectory(topicDir)) {
25 |             throw new RuntimeException("Topic directory " + topicDir + " does not exist");
26 |         }
27 |     }
28 | 
29 |     public void restore() throws IOException {
30 |         Files.list(topicDir)
31 |                 .filter(x -> SegmentUtils.isSegment(x)
32 |                         && SegmentUtils.getPartitionFromSegment(x) == partition)
33 |                 .sorted()
34 |                 .forEach((Path f) -> {
35 | 
36 |                     long offset = SegmentUtils.getStartOffsetFromSegment(f);
37 |                     try {
38 |                         index.appendSegment(SegmentUtils.filePrefix(partition, offset), offset);
39 |                     } catch (IOException | PartitionIndex.IndexException e) {
40 |                         throw new RuntimeException(e);
41 |                     }
42 |                 });
43 |         index.flush();
44 |         index.close();
45 |     }
46 | 
47 | 
48 |     public static class RestoreException extends Exception {
49 |         RestoreException(String message) {
50 |             super(message);
51 |         }
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/partition/PartitionReader.java:
--------------------------------------------------------------------------------
  1 | package de.azapps.kafkabackup.common.partition;
  2 | 
  3 | import de.azapps.kafkabackup.common.record.Record;
  4 | import de.azapps.kafkabackup.common.segment.SegmentIndex;
  5 | import de.azapps.kafkabackup.common.segment.SegmentReader;
  6 | 
  7 | import java.io.IOException;
  8 | import java.nio.file.Files;
  9 | import java.nio.file.Path;
 10 | import java.util.ArrayList;
 11 | import java.util.List;
 12 | 
 13 | public class PartitionReader {
 14 |     private final String topic;
 15 |     private final int partition;
 16 |     private final Path topicDir;
 17 |     private SegmentReader currentSegment;
 18 |     private final PartitionIndex partitionIndex;
 19 | 
 20 |     public PartitionReader(String topic, int partition, Path topicDir) throws IOException, PartitionIndex.IndexException, PartitionException, SegmentIndex.IndexException {
 21 |         this.topic = topic;
 22 |         this.partition = partition;
 23 |         this.topicDir = topicDir;
 24 |         Path indexFile = PartitionUtils.indexFile(topicDir, partition);
 25 |         if (!Files.isDirectory(this.topicDir)) {
 26 |             throw new PartitionException("Cannot find topic directory for topic " + topic);
 27 |         }
 28 |         if (!Files.isRegularFile(indexFile)) {
 29 |             throw new PartitionException("Cannot find index file for partition " + partition);
 30 |         }
 31 |         partitionIndex = new PartitionIndex(indexFile);
 32 |         if (partitionIndex.hasMoreData()) {
 33 |             seek(partitionIndex.firstOffset());
 34 |         }
 35 |     }
 36 | 
 37 |     public void close() throws IOException {
 38 |         partitionIndex.close();
 39 |         if (currentSegment != null) {
 40 |             currentSegment.close();
 41 |         }
 42 |     }
 43 | 
 44 |     public void seek(long offset) throws PartitionIndex.IndexException, IOException, SegmentIndex.IndexException, IndexOutOfBoundsException {
 45 |         partitionIndex.seek(offset);
 46 |         String segmentFilePrefix = partitionIndex.readFileName();
 47 |         currentSegment = new SegmentReader(topic, partition, topicDir, segmentFilePrefix);
 48 |         currentSegment.seek(offset);
 49 |     }
 50 | 
 51 |     public boolean hasMoreData() throws IOException {
 52 |         if (currentSegment != null) {
 53 |             return currentSegment.hasMoreData() || partitionIndex.hasMoreData();
 54 |         } else {
 55 |             return false;
 56 |         }
 57 |     }
 58 | 
 59 |     public Record read() throws IOException, SegmentIndex.IndexException {
 60 |         if (currentSegment.hasMoreData()) {
 61 |             return currentSegment.read();
 62 |         } else if (partitionIndex.hasMoreData()) {
 63 |             currentSegment.close();
 64 |             String segmentFilePrefix = partitionIndex.readFileName();
 65 |             currentSegment = new SegmentReader(topic, partition, topicDir, segmentFilePrefix);
 66 |             return currentSegment.read();
 67 |         } else {
 68 |             throw new IndexOutOfBoundsException("No more data available");
 69 |         }
 70 |     }
 71 | 
 72 |     public List<Record> readN(int n) throws IOException, SegmentIndex.IndexException {
 73 |         List<Record> records = new ArrayList<>();
 74 |         while (hasMoreData() && records.size() < n) {
 75 |             Record record = read();
 76 |             records.add(record);
 77 |         }
 78 |         return records;
 79 |     }
 80 | 
 81 |     public List<Record> readBytesBatch(long batchsize) throws IOException, SegmentIndex.IndexException {
 82 |         List<Record> records = new ArrayList<>();
 83 |         long currentSize = 0;
 84 |         while (hasMoreData() && currentSize < batchsize) {
 85 |             Record record = read();
 86 |             records.add(record);
 87 |             if (record.value() != null) {
 88 |                 currentSize += record.value().length;
 89 |             }
 90 |             if (record.key() != null) {
 91 |                 currentSize += record.key().length;
 92 |             }
 93 |         }
 94 |         return records;
 95 |     }
 96 | 
 97 | 
 98 |     public List<Record> readFully() throws IOException, SegmentIndex.IndexException {
 99 |         List<Record> records = new ArrayList<>();
100 |         while (hasMoreData()) {
101 |             Record record = read();
102 |             records.add(record);
103 |         }
104 |         return records;
105 |     }
106 | 
107 | 
108 |     public static class PartitionException extends Exception {
109 |         PartitionException(String message) {
110 |             super(message);
111 |         }
112 |     }
113 | }
114 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/partition/PartitionUtils.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.common.partition;
 2 | 
 3 | import java.nio.file.Path;
 4 | import java.nio.file.Paths;
 5 | import java.util.Optional;
 6 | import java.util.regex.Matcher;
 7 | import java.util.regex.Pattern;
 8 | 
 9 | public class PartitionUtils {
10 |     private static final Pattern PARTITION_INDEX_PATTERN = Pattern.compile("^index_partition_([0-9]+)$");
11 | 
12 |     static Path indexFile(Path topicDir, int partition) {
13 |         return Paths.get(topicDir.toString(), String.format("index_partition_%03d", partition));
14 |     }
15 | 
16 |     public static Optional<Integer> isPartitionIndex(Path f) {
17 |         Path fpath = f.getFileName();
18 |         if (fpath == null) {
19 |             return Optional.empty();
20 |         }
21 |         String fname = fpath.toString();
22 |         Matcher m = PARTITION_INDEX_PATTERN.matcher(fname);
23 |         if (m.find()) {
24 |             String partitionStr = m.group(1);
25 |             return Optional.of(Integer.valueOf(partitionStr));
26 |         } else {
27 |             return Optional.empty();
28 |         }
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/partition/PartitionWriter.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.common.partition;
 2 | 
 3 | import de.azapps.kafkabackup.common.record.Record;
 4 | import de.azapps.kafkabackup.common.segment.SegmentIndex;
 5 | import de.azapps.kafkabackup.common.segment.SegmentWriter;
 6 | 
 7 | import java.io.IOException;
 8 | import java.nio.file.Files;
 9 | import java.nio.file.Path;
10 | import java.util.Optional;
11 | 
12 | public class PartitionWriter {
13 |     private String topic;
14 |     private int partition;
15 |     private Path topicDir;
16 |     private SegmentWriter currentSegment;
17 |     private PartitionIndex partitionIndex;
18 |     private long maxSegmentSizeBytes;
19 | 
20 |     public PartitionWriter(String topic, int partition, Path topicDir, long maxSegmentSizeBytes) throws IOException, PartitionIndex.IndexException, SegmentIndex.IndexException {
21 |         this.topic = topic;
22 |         this.partition = partition;
23 |         this.topicDir = topicDir;
24 |         this.maxSegmentSizeBytes = maxSegmentSizeBytes;
25 |         Path indexFile = PartitionUtils.indexFile(topicDir, partition);
26 |         if (!Files.isDirectory(this.topicDir)) {
27 |             Files.createDirectories(this.topicDir);
28 |         }
29 |         partitionIndex = new PartitionIndex(indexFile);
30 |         Optional<PartitionIndexEntry> optionalPartitionIndexEntry = partitionIndex.latestSegmentFile();
31 |         if (optionalPartitionIndexEntry.isPresent()) {
32 |             currentSegment = new SegmentWriter(topic, partition, optionalPartitionIndexEntry.get().startOffset(), topicDir);
33 |         } else {
34 |             currentSegment = new SegmentWriter(topic, partition, 0, topicDir);
35 |             // do not forget to add the current segment to the partition index. Even if it is empty
36 |             partitionIndex.appendSegment(currentSegment.filePrefix(), 0);
37 |         }
38 |     }
39 | 
40 |     private void nextSegment(long startOffset) throws IOException, SegmentIndex.IndexException, PartitionIndex.IndexException {
41 |         currentSegment.close();
42 |         SegmentWriter segment = new SegmentWriter(topic, partition, startOffset, topicDir);
43 |         if (startOffset > partitionIndex.latestStartOffset()) {
44 |             partitionIndex.appendSegment(segment.filePrefix(), startOffset);
45 |         }
46 |         currentSegment = segment;
47 |     }
48 | 
49 |     public long lastWrittenOffset() {
50 |         return currentSegment.lastWrittenOffset();
51 |     }
52 | 
53 |     public void append(Record record) throws IOException, SegmentIndex.IndexException, PartitionIndex.IndexException, SegmentWriter.SegmentException {
54 |         if (currentSegment.size() > maxSegmentSizeBytes) {
55 |             nextSegment(record.kafkaOffset());
56 |         }
57 |         currentSegment.append(record);
58 |     }
59 | 
60 |     public void close() throws IOException {
61 |         partitionIndex.close();
62 |         currentSegment.close();
63 |     }
64 | 
65 |     public void flush() throws IOException {
66 |         partitionIndex.flush();
67 |         currentSegment.flush();
68 |     }
69 | 
70 |     public String topic() {
71 |         return topic;
72 |     }
73 | 
74 |     public int partition() {
75 |         return partition;
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/record/Record.java:
--------------------------------------------------------------------------------
  1 | package de.azapps.kafkabackup.common.record;
  2 | 
  3 | import org.apache.kafka.common.header.Header;
  4 | import org.apache.kafka.common.header.Headers;
  5 | import org.apache.kafka.common.header.internals.RecordHeaders;
  6 | import org.apache.kafka.common.record.TimestampType;
  7 | import org.apache.kafka.connect.data.Schema;
  8 | import org.apache.kafka.connect.errors.DataException;
  9 | import org.apache.kafka.connect.header.ConnectHeaders;
 10 | import org.apache.kafka.connect.sink.SinkRecord;
 11 | 
 12 | import java.util.Arrays;
 13 | import java.util.Iterator;
 14 | import java.util.Objects;
 15 | 
 16 | public class Record {
 17 |     private final String topic;
 18 |     private final Integer kafkaPartition;
 19 |     private final byte[] key;
 20 |     private final byte[] value;
 21 |     private final Long timestamp;
 22 |     private final Headers headers;
 23 |     private final long kafkaOffset;
 24 |     private final TimestampType timestampType;
 25 | 
 26 |     public Record(String topic, int partition, byte[] key, byte[] value, long kafkaOffset) {
 27 |         this(topic, partition, key, value, kafkaOffset, null, TimestampType.NO_TIMESTAMP_TYPE);
 28 |     }
 29 | 
 30 |     public Record(String topic, int partition, byte[] key, byte[] value, long kafkaOffset, Long timestamp, TimestampType timestampType) {
 31 |         this(topic, partition, key, value, kafkaOffset, timestamp, timestampType, new RecordHeaders());
 32 |     }
 33 | 
 34 |     // We do not want to copy the data and assume that Kafka Connect is not malicious
 35 |     @edu.umd.cs.findbugs.annotations.SuppressFBWarnings("EI_EXPOSE_REP")
 36 |     public Record(String topic, int partition, byte[] key, byte[] value, long kafkaOffset, Long timestamp, TimestampType timestampType, Headers headers) {
 37 |         this.topic = topic;
 38 |         this.kafkaPartition = partition;
 39 |         this.key = key;
 40 |         this.value = value;
 41 |         this.timestamp = timestamp;
 42 |         this.headers = headers;
 43 |         this.kafkaOffset = kafkaOffset;
 44 |         this.timestampType = timestampType;
 45 |     }
 46 | 
 47 |     public static Record fromSinkRecord(SinkRecord sinkRecord) {
 48 |         byte[] key = connectDataToBytes(sinkRecord.keySchema(), sinkRecord.key());
 49 |         byte[] value = connectDataToBytes(sinkRecord.valueSchema(), sinkRecord.value());
 50 |         RecordHeaders recordHeaders = new RecordHeaders();
 51 |         for (org.apache.kafka.connect.header.Header connectHeader : sinkRecord.headers()) {
 52 |             byte[] headerValue = connectDataToBytes(connectHeader.schema(), connectHeader.value());
 53 |             recordHeaders.add(connectHeader.key(), headerValue);
 54 |         }
 55 |         return new Record(sinkRecord.topic(), sinkRecord.kafkaPartition(), key, value, sinkRecord.kafkaOffset(), sinkRecord.timestamp(), sinkRecord.timestampType(), recordHeaders);
 56 |     }
 57 | 
 58 |     private static byte[] connectDataToBytes(Schema schema, Object value) {
 59 |         if (schema != null && schema.type() != Schema.Type.BYTES)
 60 |             throw new DataException("Invalid schema type for ByteArrayConverter: " + schema.type().toString());
 61 | 
 62 |         if (value != null && !(value instanceof byte[]))
 63 |             throw new DataException("ByteArrayConverter is not compatible with objects of type " + value.getClass());
 64 | 
 65 |         return (byte[]) value;
 66 |     }
 67 | 
 68 |     public SinkRecord toSinkRecord() {
 69 |         ConnectHeaders connectHeaders = new ConnectHeaders();
 70 |         for (Header header : headers) {
 71 |             connectHeaders.addBytes(header.key(), header.value());
 72 |         }
 73 |         return new SinkRecord(topic, kafkaPartition, Schema.OPTIONAL_BYTES_SCHEMA, key, Schema.OPTIONAL_BYTES_SCHEMA, value, kafkaOffset,
 74 |                 timestamp, timestampType, connectHeaders);
 75 |     }
 76 | 
 77 |     public String topic() {
 78 |         return topic;
 79 |     }
 80 | 
 81 |     public Integer kafkaPartition() {
 82 |         return kafkaPartition;
 83 |     }
 84 | 
 85 |     // We do not want to copy the data and assume that Kafka Connect is not malicious
 86 |     @edu.umd.cs.findbugs.annotations.SuppressFBWarnings("EI_EXPOSE_REP")
 87 |     public byte[] key() {
 88 |         return key;
 89 |     }
 90 | 
 91 |     // We do not want to copy the data and assume that Kafka Connect is not malicious
 92 |     @edu.umd.cs.findbugs.annotations.SuppressFBWarnings("EI_EXPOSE_REP")
 93 |     public byte[] value() {
 94 |         return value;
 95 |     }
 96 | 
 97 |     public Long timestamp() {
 98 |         return timestamp;
 99 |     }
100 | 
101 |     public Headers headers() {
102 |         return headers;
103 |     }
104 | 
105 |     public long kafkaOffset() {
106 |         return kafkaOffset;
107 |     }
108 | 
109 |     public TimestampType timestampType() {
110 |         return timestampType;
111 |     }
112 | 
113 |     @Override
114 |     public int hashCode() {
115 |         int result = Objects.hash(topic, kafkaPartition, timestamp, headers, kafkaOffset, timestampType);
116 |         result = 31 * result + Arrays.hashCode(key);
117 |         result = 31 * result + Arrays.hashCode(value);
118 |         return result;
119 |     }
120 | 
121 |     @Override
122 |     public boolean equals(Object o) {
123 |         if (this == o)
124 |             return true;
125 |         if (o == null || getClass() != o.getClass())
126 |             return false;
127 | 
128 |         Record that = (Record) o;
129 | 
130 |         // alternative implementation of ConnectRecord.equals that use Headers equality by value
131 |         return Objects.equals(kafkaPartition(), that.kafkaPartition())
132 |                 && Objects.equals(topic(), that.topic())
133 |                 && Arrays.equals(key(), that.key())
134 |                 && Arrays.equals(value(), that.value())
135 |                 && Objects.equals(timestamp(), that.timestamp())
136 |                 && headersEqualityByValue(headers(), that.headers())
137 |                 && Objects.equals(kafkaOffset(), that.kafkaOffset())
138 |                 && Objects.equals(timestampType(), that.timestampType());
139 |     }
140 | 
141 |     @Override
142 |     public String toString() {
143 |         String keyLength = (key == null) ? "null" : String.valueOf(key.length);
144 |         String valueLength = (value == null) ? "null" : String.valueOf(value.length);
145 |         String timestampTypeStr = timestampType.toString();
146 |         String timestampStr = (timestamp == null) ? "null" : String.valueOf(timestamp);
147 |         return String.format("Record{topic: %s, partition: %d, offset: %d, key: byte[%s], value: byte[%s], timestampType: %s, timestamp: %s, headers: %s}",
148 |                 topic, kafkaPartition, kafkaOffset, keyLength, valueLength, timestampTypeStr, timestampStr, headers);
149 |     }
150 | 
151 |     private boolean headersEqualityByValue(Headers a, Headers b) {
152 |         // This is an alternative implementation of ConnectHeaders::equals that use proper Header equality by value
153 |         if (a == b) {
154 |             return true;
155 |         }
156 |         // Note, similar to ConnectHeaders::equals, it requires headers to have the same order
157 |         // (although, that is probably not what we want in most cases)
158 |         Iterator<Header> aIter = a.iterator();
159 |         Iterator<Header> bIter = b.iterator();
160 |         while (aIter.hasNext() && bIter.hasNext()) {
161 |             if (!headerEqualityByValue(aIter.next(), bIter.next()))
162 |                 return false;
163 |         }
164 |         return !aIter.hasNext() && !bIter.hasNext();
165 |     }
166 | 
167 |     private boolean headerEqualityByValue(Header a, Header b) {
168 |         // This is an alternative implementation of ConnectHeader::equals that use proper Value equality by value
169 |         // (even if they are byte arrays)
170 |         if (a == b) {
171 |             return true;
172 |         }
173 |         if (!Objects.equals(a.key(), b.key())) {
174 |             return false;
175 |         }
176 |         try {
177 |             // This particular case is not handled by ConnectHeader::equals
178 |             byte[] aBytes = a.value();
179 |             byte[] bBytes = b.value();
180 |             return Arrays.equals(aBytes, bBytes);
181 |         } catch (ClassCastException e) {
182 |             return a.value() == b.value();
183 |         }
184 |     }
185 | 
186 | 
187 | }
188 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/record/RecordSerde.java:
--------------------------------------------------------------------------------
  1 | package de.azapps.kafkabackup.common.record;
  2 | 
  3 | import org.apache.kafka.common.header.Header;
  4 | import org.apache.kafka.common.header.internals.RecordHeaders;
  5 | import org.apache.kafka.common.record.TimestampType;
  6 | 
  7 | import java.io.*;
  8 | import java.nio.charset.StandardCharsets;
  9 | 
 10 | /**
 11 |  * Record Format:
 12 |  * offset: int64
 13 |  * timestampType: int32 -2 if timestamp is null
 14 |  * [timestamp: int64] if timestampType != NO_TIMESTAMP_TYPE && timestamp != null
 15 |  * keyLength: int32
 16 |  * [key: byte[keyLength]] if keyLength >= 0
 17 |  * valueLength: int32
 18 |  * [value: byte[valueLength]] if valueLength >= 0
 19 |  * headerCount: int32
 20 |  * headers: Header[headerCount]
 21 |  * <p>
 22 |  * Header Format:
 23 |  * headerKeyLength: int32
 24 |  * headerKey: byte[headerKeyLength]
 25 |  * headerValueLength: int32
 26 |  * [headerValue: byte[headerValueLength]] if headerValueLength >= 0
 27 |  */
 28 | public class RecordSerde {
 29 |     public static Record read(String topic, int partition, InputStream inputStream) throws IOException {
 30 |         DataInputStream dataStream = new DataInputStream(inputStream);
 31 |         long offset = dataStream.readLong();
 32 |         int timestampTypeInt = dataStream.readInt();
 33 |         TimestampType timestampType;
 34 |         Long timestamp;
 35 |         // See comment in `write()`
 36 |         if (timestampTypeInt == -2) {
 37 |             timestampType = TimestampType.CREATE_TIME;
 38 |             timestamp=null;
 39 |         } else {
 40 |             switch (timestampTypeInt) {
 41 |                 case -1:
 42 |                     timestampType = TimestampType.NO_TIMESTAMP_TYPE;
 43 |                     break;
 44 |                 case 0:
 45 |                     timestampType = TimestampType.CREATE_TIME;
 46 |                     break;
 47 |                 case 1:
 48 |                     timestampType = TimestampType.LOG_APPEND_TIME;
 49 |                     break;
 50 |                 default:
 51 |                     throw new RuntimeException("Unexpected TimestampType. Expected -1,0 or 1. Got " + timestampTypeInt);
 52 |             }
 53 |             if (timestampType != TimestampType.NO_TIMESTAMP_TYPE) {
 54 |                 timestamp = dataStream.readLong();
 55 |             } else {
 56 |                 timestamp = null;
 57 |             }
 58 |         }
 59 |         int keyLength = dataStream.readInt();
 60 |         byte[] key = null;
 61 |         if (keyLength >= 0) {
 62 |             key = new byte[keyLength];
 63 |             int readBytes = dataStream.read(key);
 64 |             if (readBytes != keyLength) {
 65 |                 throw new IOException(String.format("Expected to read %d bytes, got %d", keyLength, readBytes));
 66 |             }
 67 |         }
 68 | 
 69 |         int valueLength = dataStream.readInt();
 70 |         byte[] value = null;
 71 |         if (valueLength >= 0) {
 72 |             value = new byte[valueLength];
 73 |             int readBytes = dataStream.read(value);
 74 |             if (readBytes != valueLength) {
 75 |                 throw new IOException(String.format("Expected to read %d bytes, got %d", valueLength, readBytes));
 76 |             }
 77 |         }
 78 |         int headerCount = dataStream.readInt();
 79 |         RecordHeaders headers = new RecordHeaders();
 80 |         for (int i = 0; i < headerCount; i++) {
 81 |             // Key
 82 |             int headerKeyLength = dataStream.readInt();
 83 |             if (headerKeyLength < 0) {
 84 |                 throw new RuntimeException("Invalid negative header key size " + headerKeyLength);
 85 |             }
 86 |             byte[] headerKeyBytes = new byte[headerKeyLength];
 87 |             int readBytes = dataStream.read(headerKeyBytes);
 88 |             if (readBytes != headerKeyLength) {
 89 |                 throw new IOException(String.format("Expected to read %d bytes, got %d", headerKeyLength, readBytes));
 90 |             }
 91 |             String headerKey = new String(headerKeyBytes, StandardCharsets.UTF_8);
 92 |             // Value
 93 |             int headerValueLength = dataStream.readInt();
 94 |             byte[] headerValue = null;
 95 |             if (headerValueLength >= 0) {
 96 |                 headerValue = new byte[headerValueLength];
 97 |                 int hvReadBytes = dataStream.read(headerValue);
 98 |                 if (hvReadBytes != headerValueLength) {
 99 |                     throw new IOException(String.format("Expected to read %d bytes, got %d", headerValueLength, hvReadBytes));
100 |                 }
101 |             }
102 |             headers.add(headerKey, headerValue);
103 |         }
104 | 
105 |         return new Record(topic, partition, key, value, offset, timestamp, timestampType, headers);
106 |     }
107 | 
108 |     public static void write(OutputStream outputStream, Record record) throws IOException {
109 |         DataOutputStream dataStream = new DataOutputStream(outputStream);
110 |         dataStream.writeLong(record.kafkaOffset());
111 |         // There is a special case where the timestamp type eqauls `CREATE_TIME` but is actually `null`.
112 |         // This should not happen normally and I see it as a bug in the Client implementation of pykafka
113 |         // But as Kafka accepts that value, so should Kafka Backup. Thus, this dirty workaround: we write the
114 |         // timestamp type `-2` if the type is CREATE_TIME but the timestamp itself is null. Otherwise we would have
115 |         // needed to change the byte format and for now I think this is the better solution.
116 |         if (record.timestampType() == TimestampType.CREATE_TIME && record.timestamp() == null) {
117 |             dataStream.writeInt(-2);
118 |         } else {
119 |             dataStream.writeInt(record.timestampType().id);
120 |             if (record.timestampType() != TimestampType.NO_TIMESTAMP_TYPE) {
121 |                 dataStream.writeLong(record.timestamp());
122 |             }
123 |         }
124 |         if (record.key() != null) {
125 |             dataStream.writeInt(record.key().length);
126 |             dataStream.write(record.key());
127 |         } else {
128 |             dataStream.writeInt(-1);
129 |         }
130 |         if (record.value() != null) {
131 |             dataStream.writeInt(record.value().length);
132 |             dataStream.write(record.value());
133 |         } else {
134 |             dataStream.writeInt(-1);
135 |         }
136 |         Header[] headers = record.headers().toArray();
137 |         dataStream.writeInt(headers.length);
138 |         for (Header header : record.headers()) {
139 |             byte[] headerKeyBytes = header.key().getBytes(StandardCharsets.UTF_8);
140 |             dataStream.writeInt(headerKeyBytes.length);
141 |             dataStream.write(headerKeyBytes);
142 |             if (header.value() != null) {
143 |                 dataStream.writeInt(header.value().length);
144 |                 dataStream.write(header.value());
145 |             } else {
146 |                 dataStream.writeInt(-1);
147 |             }
148 |         }
149 |     }
150 | }
151 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/segment/SegmentIndex.java:
--------------------------------------------------------------------------------
  1 | package de.azapps.kafkabackup.common.segment;
  2 | 
  3 | import java.io.EOFException;
  4 | import java.io.FileInputStream;
  5 | import java.io.FileOutputStream;
  6 | import java.io.IOException;
  7 | import java.nio.file.Files;
  8 | import java.nio.file.Path;
  9 | import java.util.ArrayList;
 10 | import java.util.List;
 11 | import java.util.Optional;
 12 | 
 13 | public class SegmentIndex {
 14 |     private static final byte V1_MAGIC_BYTE = 0x01;
 15 |     private Path indexFile;
 16 |     private List<SegmentIndexEntry> index = new ArrayList<>();
 17 |     private long lastValidRecordOffset = -1;
 18 |     private long lastValidIndexPosition = 1; // mind the magic byte!
 19 |     private FileOutputStream fileOutputStream;
 20 |     private FileInputStream fileInputStream;
 21 | 
 22 |     public SegmentIndex(Path indexFile) throws IOException, IndexException {
 23 |         this.indexFile = indexFile;
 24 |         initFile();
 25 |         while (true) {
 26 |             try {
 27 |                 SegmentIndexEntry segmentIndexEntry = SegmentIndexEntry.fromStream(fileInputStream);
 28 |                 if (segmentIndexEntry.getOffset() <= lastValidRecordOffset) {
 29 |                     throw new IndexException("Offsets must be always increasing! There is something terribly wrong in your index!");
 30 |                 }
 31 |                 index.add(segmentIndexEntry);
 32 |                 lastValidRecordOffset = segmentIndexEntry.getOffset();
 33 |                 lastValidIndexPosition = fileInputStream.getChannel().position();
 34 |             } catch (EOFException e) {
 35 |                 // reached End of File
 36 |                 break;
 37 |             }
 38 |         }
 39 |     }
 40 | 
 41 |     private void initFile() throws IOException, IndexException {
 42 |         if (!Files.isRegularFile(indexFile)) {
 43 |             Files.createFile(indexFile);
 44 |             fileOutputStream = new FileOutputStream(indexFile.toFile());
 45 |             fileOutputStream.write(V1_MAGIC_BYTE);
 46 |         } else {
 47 |             fileOutputStream = new FileOutputStream(indexFile.toFile(), true);
 48 |         }
 49 |         this.fileInputStream = new FileInputStream(indexFile.toFile());
 50 |         byte[] v1Validation = new byte[1];
 51 |         if (fileInputStream.read(v1Validation) != 1 || v1Validation[0] != V1_MAGIC_BYTE) {
 52 |             throw new IndexException("Cannot validate Magic Byte in the beginning of the index " + indexFile);
 53 |         }
 54 |     }
 55 | 
 56 |     void addEntry(SegmentIndexEntry segmentIndexEntry) throws IOException, IndexException {
 57 |         if (segmentIndexEntry.getOffset() <= lastValidRecordOffset) {
 58 |             throw new IndexException("Offsets must be always increasing! There is something terribly wrong in your index!");
 59 |         }
 60 |         fileOutputStream.getChannel().position(lastValidIndexPosition);
 61 |         segmentIndexEntry.writeToStream(fileOutputStream);
 62 |         lastValidIndexPosition = fileOutputStream.getChannel().position();
 63 |         lastValidRecordOffset = segmentIndexEntry.getOffset();
 64 |         index.add(segmentIndexEntry);
 65 |     }
 66 | 
 67 |     Optional<SegmentIndexEntry> lastIndexEntry() {
 68 |         if (!index.isEmpty()) {
 69 |             return Optional.of(index.get(index.size() - 1));
 70 |         } else {
 71 |             return Optional.empty();
 72 |         }
 73 |     }
 74 | 
 75 |     long lastValidStartPosition() {
 76 |         if (!index.isEmpty()) {
 77 |             return index.get(index.size() - 1).recordFilePosition();
 78 |         } else {
 79 |             return 0L;
 80 |         }
 81 | 
 82 |     }
 83 | 
 84 |     Optional<SegmentIndexEntry> getByPosition(int position) {
 85 |         if (position >= index.size()) {
 86 |             return Optional.empty();
 87 |         } else {
 88 |             return Optional.of(index.get(position));
 89 |         }
 90 |     }
 91 | 
 92 |     Optional<Long> findByOffset(long offset) {
 93 |         for (SegmentIndexEntry current : index) {
 94 |             if (current.getOffset() == offset) {
 95 |                 return Optional.of(current.recordFilePosition());
 96 |             }
 97 |         }
 98 |         return Optional.empty();
 99 |     }
100 | 
101 |     Optional<Long> findEarliestWithHigherOrEqualOffset(long offset) {
102 |         for (SegmentIndexEntry current : index) {
103 |             if (current.getOffset() >= offset) {
104 |                 return Optional.of(current.recordFilePosition());
105 |             }
106 |         }
107 |         return Optional.empty();
108 |     }
109 | 
110 |     int size() {
111 |         return index.size();
112 |     }
113 | 
114 |     public List<SegmentIndexEntry> index() {
115 |         return index;
116 |     }
117 | 
118 |     void flush() throws IOException {
119 |         fileOutputStream.flush();
120 |     }
121 | 
122 |     void close() throws IOException {
123 |         fileInputStream.close();
124 |         fileOutputStream.close();
125 |     }
126 | 
127 |     public static class IndexException extends Exception {
128 |         IndexException(String message) {
129 |             super(message);
130 |         }
131 |     }
132 | 
133 | }
134 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/segment/SegmentIndexEntry.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.common.segment;
 2 | 
 3 | import java.io.*;
 4 | import java.util.Objects;
 5 | 
 6 | /**
 7 |  * Format:
 8 |  * offset: int64
 9 |  * recordFilePosition: int64
10 |  * recordLength: int64
11 |  */
12 | public class SegmentIndexEntry {
13 |     private final long offset;
14 |     private final long recordFilePosition;
15 |     private final long recordByteLength;
16 | 
17 |     SegmentIndexEntry(long offset, long recordFilePosition, long recordByteLength) {
18 |         this.offset = offset;
19 |         this.recordFilePosition = recordFilePosition;
20 |         this.recordByteLength = recordByteLength;
21 |     }
22 | 
23 |     static SegmentIndexEntry fromStream(InputStream byteStream) throws IOException {
24 |         DataInputStream stream = new DataInputStream(byteStream);
25 |         long offset = stream.readLong();
26 |         long recordFileOffset = stream.readLong();
27 |         long recordByteLength = stream.readLong();
28 |         return new SegmentIndexEntry(offset, recordFileOffset, recordByteLength);
29 |     }
30 | 
31 |     public long getOffset() {
32 |         return offset;
33 |     }
34 | 
35 |     public long recordFilePosition() {
36 |         return recordFilePosition;
37 |     }
38 | 
39 |     public long recordByteLength() {
40 |         return recordByteLength;
41 |     }
42 | 
43 |     void writeToStream(OutputStream byteStream) throws IOException {
44 |         DataOutputStream stream = new DataOutputStream(byteStream);
45 |         stream.writeLong(offset);
46 |         stream.writeLong(recordFilePosition);
47 |         stream.writeLong(recordByteLength);
48 |     }
49 | 
50 |     @Override
51 |     public int hashCode() {
52 |         return Objects.hash(offset, recordFilePosition, recordByteLength);
53 |     }
54 | 
55 |     @Override
56 |     public boolean equals(Object o) {
57 |         if (this == o)
58 |             return true;
59 |         if (o == null || getClass() != o.getClass())
60 |             return false;
61 | 
62 |         SegmentIndexEntry that = (SegmentIndexEntry) o;
63 | 
64 |         return Objects.equals(getOffset(), that.getOffset())
65 |                 && Objects.equals(recordFilePosition(), that.recordFilePosition())
66 |                 && Objects.equals(recordByteLength(), that.recordByteLength());
67 |     }
68 | 
69 |     @Override
70 |     public String toString() {
71 |         return String.format("SegmentIndexEntry{offset: %d, recordFilePosition: %d, recordByteLength: %d}",
72 |                 offset, recordFilePosition, recordByteLength);
73 |     }
74 | }
75 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/segment/SegmentIndexRestore.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.common.segment;
 2 | 
 3 | import de.azapps.kafkabackup.common.record.Record;
 4 | 
 5 | import java.io.EOFException;
 6 | import java.io.IOException;
 7 | import java.nio.file.Files;
 8 | import java.nio.file.Path;
 9 | 
10 | public class SegmentIndexRestore {
11 |     private final SegmentIndex segmentIndex;
12 |     private final UnverifiedSegmentReader reader;
13 | 
14 |     public SegmentIndexRestore(Path segmentFile) throws IOException, RestoreException, SegmentIndex.IndexException {
15 |         int partition = SegmentUtils.getPartitionFromSegment(segmentFile);
16 |         long startOffset = SegmentUtils.getStartOffsetFromSegment(segmentFile);
17 |         Path parent = segmentFile.toAbsolutePath().getParent();
18 |         if (parent == null) {
19 |             throw new RestoreException("Segment file " + segmentFile + " does not exist");
20 |         }
21 |         Path indexFile = SegmentUtils.indexFile(parent, partition, startOffset);
22 | 
23 |         if (!Files.isRegularFile(segmentFile)) {
24 |             throw new RestoreException("Segment file " + segmentFile + " does not exist");
25 |         }
26 |         if (Files.isRegularFile(indexFile)) {
27 |             throw new RestoreException("Index file " + indexFile + " must not exist");
28 |         }
29 |         segmentIndex = new SegmentIndex(indexFile);
30 |         reader = new UnverifiedSegmentReader(segmentFile);
31 |     }
32 | 
33 |     public void restore() throws IOException, SegmentIndex.IndexException {
34 |         long lastPosition = 1; // mind the magic byte!
35 |         while (true) {
36 |             try {
37 |                 Record record = reader.read();
38 |                 long currentPosition = reader.position();
39 |                 SegmentIndexEntry indexEntry = new SegmentIndexEntry(record.kafkaOffset(), lastPosition, currentPosition - lastPosition);
40 |                 segmentIndex.addEntry(indexEntry);
41 |                 lastPosition = currentPosition;
42 |             } catch (EOFException e) {
43 |                 break;
44 |             }
45 |         }
46 |         segmentIndex.flush();
47 |         segmentIndex.close();
48 |     }
49 | 
50 |     public static class RestoreException extends Exception {
51 |         RestoreException(String message) {
52 |             super(message);
53 |         }
54 |     }
55 | 
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/segment/SegmentReader.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.common.segment;
 2 | 
 3 | import de.azapps.kafkabackup.common.record.Record;
 4 | import de.azapps.kafkabackup.common.record.RecordSerde;
 5 | 
 6 | import java.io.EOFException;
 7 | import java.io.FileInputStream;
 8 | import java.io.IOException;
 9 | import java.nio.channels.FileChannel;
10 | import java.nio.file.Files;
11 | import java.nio.file.Path;
12 | import java.util.ArrayList;
13 | import java.util.List;
14 | import java.util.Optional;
15 | 
16 | public class SegmentReader {
17 |     private final String topic;
18 |     private final int partition;
19 |     private final String filePrefix;
20 |     private final SegmentIndex segmentIndex;
21 |     private final FileInputStream recordInputStream;
22 |     private final long lastValidStartPosition;
23 | 
24 |     public SegmentReader(String topic, int partition, Path topicDir, long startOffset) throws IOException, SegmentIndex.IndexException {
25 |         this(topic, partition, topicDir, SegmentUtils.filePrefix(partition, startOffset));
26 |     }
27 | 
28 |     public SegmentReader(String topic, int partition, Path topicDir, String filePrefix) throws IOException, SegmentIndex.IndexException {
29 |         this.topic = topic;
30 |         this.partition = partition;
31 |         this.filePrefix = filePrefix;
32 | 
33 |         Path indexFile = SegmentUtils.indexFile(topicDir, filePrefix);
34 |         Path recordFile = SegmentUtils.recordsFile(topicDir, filePrefix);
35 |         if (!Files.isRegularFile(indexFile)) {
36 |             throw new RuntimeException("Index for Segment not found: " + indexFile.toString());
37 |         }
38 |         if (!Files.isRegularFile(recordFile)) {
39 |             throw new RuntimeException("Segment not found: " + recordFile.toString());
40 |         }
41 |         segmentIndex = new SegmentIndex(indexFile);
42 |         recordInputStream = new FileInputStream(recordFile.toFile());
43 |         SegmentUtils.ensureValidSegment(recordInputStream);
44 |         lastValidStartPosition = segmentIndex.lastValidStartPosition();
45 |     }
46 | 
47 |     public void seek(long offset) throws IOException {
48 |         Optional<Long> optionalPosition = segmentIndex.findEarliestWithHigherOrEqualOffset(offset);
49 |         if (optionalPosition.isPresent()) {
50 |             recordInputStream.getChannel().position(optionalPosition.get());
51 |         } else {
52 |             // If we couldn't find such a record, skip to EOF. This will make sure that hasMoreData() returns false.
53 |             FileChannel fileChannel = recordInputStream.getChannel();
54 |             fileChannel.position(fileChannel.size());
55 |         }
56 |     }
57 | 
58 |     public boolean hasMoreData() throws IOException {
59 |         return recordInputStream.getChannel().position() <= lastValidStartPosition;
60 |     }
61 | 
62 |     public Record read() throws IOException {
63 |         if (!hasMoreData()) {
64 |             throw new EOFException("Already read the last valid record in topic " + topic + ", segment " + filePrefix);
65 |         }
66 |         return RecordSerde.read(topic, partition, recordInputStream);
67 |     }
68 | 
69 |     public List<Record> readN(int n) throws IOException {
70 |         List<Record> records = new ArrayList<>(n);
71 |         while (hasMoreData() && records.size() < n) {
72 |             Record record = read();
73 |             records.add(record);
74 |         }
75 |         return records;
76 |     }
77 | 
78 |     public List<Record> readFully() throws IOException {
79 |         List<Record> records = new ArrayList<>(segmentIndex.size());
80 |         while (hasMoreData()) {
81 |             Record record = read();
82 |             records.add(record);
83 |         }
84 |         return records;
85 |     }
86 | 
87 |     public void close() throws IOException {
88 |         recordInputStream.close();
89 |         segmentIndex.close();
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/segment/SegmentUtils.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.common.segment;
 2 | 
 3 | import java.io.FileInputStream;
 4 | import java.io.IOException;
 5 | import java.nio.file.Path;
 6 | import java.nio.file.Paths;
 7 | import java.util.regex.Matcher;
 8 | import java.util.regex.Pattern;
 9 | 
10 | public class SegmentUtils {
11 | 
12 |     static final byte V1_MAGIC_BYTE = 0x01;
13 |     private static final Pattern SEGMENT_PATTERN = Pattern.compile("^segment_partition_([0-9]+)_from_offset_([0-9]+)_records$");
14 | 
15 |     public static String filePrefix(int partition, long startOffset) {
16 |         return String.format("segment_partition_%03d_from_offset_%010d", partition, startOffset);
17 |     }
18 | 
19 |     static void ensureValidSegment(FileInputStream inputStream) throws IOException {
20 |         inputStream.getChannel().position(0);
21 |         byte[] v1Validation = new byte[1];
22 |         if (inputStream.read(v1Validation) != 1 || v1Validation[0] != SegmentUtils.V1_MAGIC_BYTE) {
23 |             throw new IOException("Cannot validate Magic Byte in the beginning of the Segment");
24 |         }
25 |     }
26 | 
27 |     public static Path indexFile(Path topicDir, int partition, long startOffset) {
28 |         return indexFile(topicDir, filePrefix(partition, startOffset));
29 |     }
30 | 
31 |     static Path indexFile(Path topicDir, String filePrefix) {
32 |         return Paths.get(topicDir.toString(), filePrefix + "_index");
33 |     }
34 | 
35 |     public static Path recordsFile(Path topicDir, int partition, long startOffset) {
36 |         return recordsFile(topicDir, filePrefix(partition, startOffset));
37 |     }
38 | 
39 |     static Path recordsFile(Path topicDir, String filePrefix) {
40 |         return Paths.get(topicDir.toString(), filePrefix + "_records");
41 |     }
42 | 
43 |     public static boolean isSegment(Path file) {
44 |         Path fpath = file.getFileName();
45 |         if (fpath == null) {
46 |             return false;
47 |         }
48 |         Matcher m = SEGMENT_PATTERN.matcher(fpath.toString());
49 |         return m.find();
50 |     }
51 | 
52 |     public static int getPartitionFromSegment(Path file) {
53 |         Path fpath = file.getFileName();
54 |         if (fpath == null) {
55 |             throw new RuntimeException("File " + file + " is not a Segment");
56 |         }
57 |         Matcher m = SEGMENT_PATTERN.matcher(fpath.toString());
58 |         if (m.find()) {
59 |             String partitionStr = m.group(1);
60 |             return Integer.parseInt(partitionStr);
61 |         } else {
62 |             throw new RuntimeException("File " + file + " is not a Segment");
63 |         }
64 |     }
65 | 
66 |     public static long getStartOffsetFromSegment(Path file) {
67 |         Path fpath = file.getFileName();
68 |         if (fpath == null) {
69 |             throw new RuntimeException("File " + file + " is not a Segment");
70 |         }
71 |         Matcher m = SEGMENT_PATTERN.matcher(fpath.toString());
72 |         if (m.find()) {
73 |             String offsetStr = m.group(2);
74 |             return Long.parseLong(offsetStr);
75 |         } else {
76 |             throw new RuntimeException("File " + file + " is not a Segment");
77 |         }
78 |     }
79 | 
80 | }
81 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/segment/SegmentWriter.java:
--------------------------------------------------------------------------------
  1 | package de.azapps.kafkabackup.common.segment;
  2 | 
  3 | import de.azapps.kafkabackup.common.record.Record;
  4 | import de.azapps.kafkabackup.common.record.RecordSerde;
  5 | 
  6 | import java.io.FileInputStream;
  7 | import java.io.FileOutputStream;
  8 | import java.io.IOException;
  9 | import java.nio.file.Files;
 10 | import java.nio.file.Path;
 11 | import java.util.Optional;
 12 | 
 13 | public class SegmentWriter {
 14 |     private final String topic;
 15 |     private final int partition;
 16 |     private final long startOffset;
 17 |     private final SegmentIndex segmentIndex;
 18 |     private final FileOutputStream recordOutputStream;
 19 | 
 20 |     public SegmentWriter(String topic, int partition, long startOffset, Path topicDir) throws IOException, SegmentIndex.IndexException {
 21 |         this.topic = topic;
 22 |         this.partition = partition;
 23 |         this.startOffset = startOffset;
 24 | 
 25 |         Path indexFile = SegmentUtils.indexFile(topicDir, partition, startOffset);
 26 |         segmentIndex = new SegmentIndex(indexFile);
 27 | 
 28 |         Path recordFile = SegmentUtils.recordsFile(topicDir, partition, startOffset);
 29 |         if (!Files.isRegularFile(recordFile)) {
 30 |             Files.createFile(recordFile);
 31 |             recordOutputStream = new FileOutputStream(recordFile.toFile());
 32 |             recordOutputStream.write(SegmentUtils.V1_MAGIC_BYTE);
 33 |         } else {
 34 |             // Validate Magic Byte
 35 |             FileInputStream inputStream = new FileInputStream(recordFile.toFile());
 36 |             SegmentUtils.ensureValidSegment(inputStream);
 37 |             inputStream.close();
 38 | 
 39 |             // move to last committed position of the file
 40 |             recordOutputStream = new FileOutputStream(recordFile.toFile(), true);
 41 |             Optional<SegmentIndexEntry> optionalPreviousIndexEntry = segmentIndex.lastIndexEntry();
 42 |             if (optionalPreviousIndexEntry.isPresent()) {
 43 |                 SegmentIndexEntry previousSegmentIndexEntry = optionalPreviousIndexEntry.get();
 44 |                 long position = previousSegmentIndexEntry.recordFilePosition() + previousSegmentIndexEntry.recordByteLength();
 45 |                 recordOutputStream.getChannel().position(position);
 46 |             } else {
 47 |                 recordOutputStream.getChannel().position(1);
 48 |             }
 49 |         }
 50 |     }
 51 | 
 52 |     public long lastWrittenOffset() {
 53 |         return segmentIndex.lastIndexEntry().map(SegmentIndexEntry::getOffset).orElse(-1L);
 54 |     }
 55 | 
 56 |     public void append(Record record) throws IOException, SegmentIndex.IndexException, SegmentException {
 57 |         if (!record.topic().equals(topic)) {
 58 |             throw new SegmentException("Trying to append to wrong topic!\n" +
 59 |                     "Expected topic: " + topic + " given topic: " + record.topic());
 60 |         }
 61 |         if (record.kafkaPartition() != partition) {
 62 |             throw new SegmentException("Trying to append to wrong partition!\n" +
 63 |                     "Expected partition: " + partition + " given partition: " + partition);
 64 |         }
 65 |         if (record.kafkaOffset() < startOffset) {
 66 |             throw new SegmentException("Try to append a record with an offset smaller than the start offset. Something is very wrong. \n" +
 67 |                     "Topic: " + record.topic() + "Partition: " + record.kafkaPartition() + " StartOffset: " + startOffset + " RecordOffset: " + record.kafkaOffset() + "\n" +
 68 |                     "You probably forgot to delete a previous Backup\n");
 69 |         }
 70 |         if (record.kafkaOffset() <= lastWrittenOffset()) {
 71 |             // We are handling the offsets ourselves. This should never happen!
 72 |             throw new SegmentException("Trying to override a written record. There is something terribly wrong in your setup! Please check whether you are trying to override an existing backup" +
 73 |                     "Topic: " + record.topic() + "Partition: " + record.kafkaPartition() + " lastWrittenOffset: " + lastWrittenOffset() + " RecordOffset: " + record.kafkaOffset());
 74 |         }
 75 |         long startPosition = recordOutputStream.getChannel().position();
 76 |         RecordSerde.write(recordOutputStream, record);
 77 |         long recordByteLength = recordOutputStream.getChannel().position() - startPosition;
 78 |         SegmentIndexEntry segmentIndexEntry = new SegmentIndexEntry(record.kafkaOffset(), startPosition, recordByteLength);
 79 |         segmentIndex.addEntry(segmentIndexEntry);
 80 |     }
 81 | 
 82 |     public String filePrefix() {
 83 |         return SegmentUtils.filePrefix(partition, startOffset);
 84 |     }
 85 | 
 86 |     public long size() throws IOException {
 87 |         return recordOutputStream.getChannel().size();
 88 |     }
 89 | 
 90 |     public void flush() throws IOException {
 91 |         recordOutputStream.flush();
 92 |         segmentIndex.flush();
 93 |     }
 94 | 
 95 |     public void close() throws IOException {
 96 |         recordOutputStream.close();
 97 |         segmentIndex.close();
 98 |     }
 99 | 
100 |     public static class SegmentException extends Exception {
101 |         SegmentException(String message) {
102 |             super(message);
103 |         }
104 |     }
105 | }
106 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/common/segment/UnverifiedSegmentReader.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.common.segment;
 2 | 
 3 | import de.azapps.kafkabackup.common.record.Record;
 4 | import de.azapps.kafkabackup.common.record.RecordSerde;
 5 | 
 6 | import java.io.EOFException;
 7 | import java.io.FileInputStream;
 8 | import java.io.IOException;
 9 | import java.nio.file.Path;
10 | import java.util.ArrayList;
11 | import java.util.List;
12 | 
13 | public class UnverifiedSegmentReader {
14 |     private String topic;
15 |     private int partition;
16 |     private FileInputStream recordInputStream;
17 | 
18 |     public UnverifiedSegmentReader(Path recordFile) throws IOException {
19 |         this(recordFile, "topic", 0);
20 |     }
21 | 
22 |     public UnverifiedSegmentReader(Path recordFile, String topic, int partition) throws IOException {
23 |         recordInputStream = new FileInputStream(recordFile.toFile());
24 |         this.topic = topic;
25 |         this.partition = partition;
26 |         SegmentUtils.ensureValidSegment(recordInputStream);
27 |     }
28 | 
29 |     public Record read() throws IOException {
30 |         return RecordSerde.read(topic, partition, recordInputStream);
31 |     }
32 | 
33 |     public List<Record> readN(int n) throws IOException {
34 |         List<Record> records = new ArrayList<>(n);
35 |         while (records.size() <= n) {
36 |             try {
37 |                 Record record = read();
38 |                 records.add(record);
39 |             } catch (EOFException e) {
40 |                 break;
41 |             }
42 |         }
43 |         return records;
44 |     }
45 | 
46 |     public List<Record> readFully() throws IOException {
47 |         List<Record> records = new ArrayList<>();
48 |         while (true) {
49 |             try {
50 |                 Record record = read();
51 |                 records.add(record);
52 |             } catch (EOFException e) {
53 |                 break;
54 |             }
55 |         }
56 |         return records;
57 |     }
58 | 
59 |     public long position() throws IOException {
60 |         return recordInputStream.getChannel().position();
61 |     }
62 | 
63 |     public void close() throws IOException {
64 |         recordInputStream.close();
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/sink/BackupSinkConfig.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.sink;
 2 | 
 3 | import org.apache.kafka.common.config.AbstractConfig;
 4 | import org.apache.kafka.common.config.ConfigDef;
 5 | 
 6 | import java.util.HashMap;
 7 | import java.util.Map;
 8 | 
 9 | class BackupSinkConfig extends AbstractConfig {
10 |     static final String CLUSTER_PREFIX = "cluster.";
11 |     static final String CLUSTER_BOOTSTRAP_SERVERS = CLUSTER_PREFIX + "bootstrap.servers";
12 |     static final String ADMIN_CLIENT_PREFIX = "admin.";
13 |     static final String TARGET_DIR_CONFIG = "target.dir";
14 |     static final String MAX_SEGMENT_SIZE = "max.segment.size.bytes";
15 |     static final String SNAPSHOT = "snapshot";
16 | 
17 |     static final ConfigDef CONFIG_DEF = new ConfigDef()
18 |             .define(TARGET_DIR_CONFIG, ConfigDef.Type.STRING,
19 |                     ConfigDef.Importance.HIGH, "TargetDir")
20 |             .define(MAX_SEGMENT_SIZE, ConfigDef.Type.INT, 1024 ^ 3, // 1 GiB
21 |                     ConfigDef.Importance.LOW, "Maximum segment size")
22 |             .define(SNAPSHOT, ConfigDef.Type.BOOLEAN, false,
23 |                    ConfigDef.Importance.LOW, "Creates a snapshot. Terminates connector when end of all partitions has been reached.");
24 | 
25 |     BackupSinkConfig(Map<?, ?> props) {
26 |         super(CONFIG_DEF, props, true);
27 |         if (!props.containsKey(TARGET_DIR_CONFIG)) {
28 |             throw new RuntimeException("Missing Configuration Variable: " + TARGET_DIR_CONFIG);
29 |         }
30 |         if (!props.containsKey(MAX_SEGMENT_SIZE)) {
31 |             throw new RuntimeException("Missing Configuration Variable: " + MAX_SEGMENT_SIZE);
32 |         }
33 |     }
34 | 
35 |     Map<String, Object> adminConfig() {
36 |         Map<String, Object> props = new HashMap<>();
37 |         props.putAll(originalsWithPrefix(CLUSTER_PREFIX));
38 |         props.putAll(originalsWithPrefix(ADMIN_CLIENT_PREFIX));
39 |         return props;
40 |     }
41 | 
42 |     String targetDir() {
43 |         return getString(TARGET_DIR_CONFIG);
44 |     }
45 | 
46 |     Integer maxSegmentSizeBytes() {
47 |         return getInt(MAX_SEGMENT_SIZE);
48 |     }
49 | 
50 |     Boolean snapShotMode() { return getBoolean(SNAPSHOT); }
51 | 
52 |     Map<String, Object> consumerConfig() {
53 |         return new HashMap<>(originalsWithPrefix(CLUSTER_PREFIX));
54 |     }
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/sink/BackupSinkConnector.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.sink;
 2 | 
 3 | import org.apache.kafka.common.config.ConfigDef;
 4 | import org.apache.kafka.connect.connector.Task;
 5 | import org.apache.kafka.connect.errors.ConnectException;
 6 | import org.apache.kafka.connect.sink.SinkConnector;
 7 | 
 8 | import java.util.ArrayList;
 9 | import java.util.List;
10 | import java.util.Map;
11 | 
12 | public class BackupSinkConnector extends SinkConnector {
13 |     private Map<String, String> config;
14 | 
15 |     @Override
16 |     public void start(Map<String, String> props) {
17 |         config = props;
18 |     }
19 | 
20 |     @Override
21 |     public Class<? extends Task> taskClass() {
22 |         return BackupSinkTask.class;
23 |     }
24 | 
25 |     @Override
26 |     public List<Map<String, String>> taskConfigs(int maxTasks) {
27 |         if (maxTasks > 1) {
28 |             throw new ConnectException("kafka-backup can currently handle only one task.");
29 |         }
30 |         List<Map<String, String>> configs = new ArrayList<>();
31 |         configs.add(config);
32 |         return configs;
33 |     }
34 | 
35 |     @Override
36 |     public void stop() {
37 | 
38 |     }
39 | 
40 |     @Override
41 |     public ConfigDef config() {
42 |         return BackupSinkConfig.CONFIG_DEF;
43 |     }
44 | 
45 |     @Override
46 |     public String version() {
47 |         return "0.1";
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/source/BackupSourceConfig.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.source;
 2 | 
 3 | import de.azapps.kafkabackup.common.BackupConfig;
 4 | import org.apache.kafka.common.config.AbstractConfig;
 5 | import org.apache.kafka.common.config.ConfigDef;
 6 | 
 7 | import java.util.Arrays;
 8 | import java.util.HashMap;
 9 | import java.util.List;
10 | import java.util.Map;
11 | 
12 | class BackupSourceConfig extends BackupConfig {
13 |     private static final String CLUSTER_KEY_DESERIALIZER = CLUSTER_PREFIX + "key.deserializer";
14 |     private static final String CLUSTER_VALUE_DESERIALIZER = CLUSTER_PREFIX + "value.deserializer";
15 |     private static final String BATCH_SIZE_CONFIG = "batch.size";
16 |     private static final String SOURCE_DIR_CONFIG = "source.dir";
17 |     private static final String TOPICS_CONFIG = "topics";
18 |     static final String ALLOW_OLD_KAFKA_CONNECT_VERSION = "allow.old.kafka.connect.version.unsafe";
19 | 
20 | 
21 |     private static final ConfigDef CONFIG_DEF = new ConfigDef()
22 |             .define(SOURCE_DIR_CONFIG, ConfigDef.Type.STRING,
23 |                     ConfigDef.Importance.HIGH, "TargetDir")
24 |             .define(BATCH_SIZE_CONFIG, ConfigDef.Type.INT, 100,
25 |                     ConfigDef.Importance.LOW, "Batch size per partition")
26 |             .define(TOPICS_CONFIG, ConfigDef.Type.STRING,
27 |                     ConfigDef.Importance.HIGH, "Topics to restore");
28 | 
29 |     BackupSourceConfig(Map<?, ?> props) {
30 |         super(CONFIG_DEF, props);
31 |         if (!props.containsKey(SOURCE_DIR_CONFIG)) {
32 |             throw new RuntimeException("Missing Configuration Variable: " + SOURCE_DIR_CONFIG);
33 |         }
34 |         if (!props.containsKey(TOPICS_CONFIG)) {
35 |             throw new RuntimeException("Missing Configuration Variable: " + TOPICS_CONFIG);
36 |         }
37 |         if (!props.containsKey(CLUSTER_KEY_DESERIALIZER)) {
38 |             throw new RuntimeException("Missing Configuration Variable: " + CLUSTER_KEY_DESERIALIZER);
39 |         }
40 |         if (!props.containsKey(CLUSTER_VALUE_DESERIALIZER)) {
41 |             throw new RuntimeException("Missing Configuration Variable: " + CLUSTER_VALUE_DESERIALIZER);
42 |         }
43 |     }
44 | 
45 |     Map<String, Object> consumerConfig() {
46 |         return new HashMap<>(originalsWithPrefix(CLUSTER_PREFIX));
47 |     }
48 | 
49 |     String sourceDir() {
50 |         return getString(SOURCE_DIR_CONFIG);
51 |     }
52 | 
53 |     Integer batchSize() {
54 |         return getInt(BATCH_SIZE_CONFIG);
55 |     }
56 | 
57 |     List<String> topics() {
58 |         return Arrays.asList(getString(TOPICS_CONFIG).split("\\s*,\\s*"));
59 |     }
60 | 
61 | 
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/src/main/java/de/azapps/kafkabackup/source/BackupSourceConnector.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.source;
 2 | 
 3 | import org.apache.kafka.clients.producer.RecordMetadata;
 4 | import org.apache.kafka.common.config.ConfigDef;
 5 | import org.apache.kafka.connect.connector.Task;
 6 | import org.apache.kafka.connect.errors.ConnectException;
 7 | import org.apache.kafka.connect.source.SourceConnector;
 8 | import org.apache.kafka.connect.source.SourceRecord;
 9 | import org.apache.kafka.connect.source.SourceTask;
10 | 
11 | import java.util.ArrayList;
12 | import java.util.List;
13 | import java.util.Map;
14 | 
15 | public class BackupSourceConnector extends SourceConnector {
16 |     private Map<String, String> config;
17 | 
18 | 
19 |     @Override
20 |     public void start(Map<String, String> props) {
21 |         config = props;
22 |         if (!config.getOrDefault(BackupSourceConfig.ALLOW_OLD_KAFKA_CONNECT_VERSION, "false").equals("true")) {
23 |             try {
24 |                 SourceTask.class.getMethod("commitRecord", SourceRecord.class, RecordMetadata.class);
25 |             } catch (NoSuchMethodException e) {
26 |                 throw new RuntimeException("Kafka Backup requires at least Kafka Connect 2.4. Otherwise Offsets cannot be committed. If you are sure what you are doing, please set " + BackupSourceConfig.ALLOW_OLD_KAFKA_CONNECT_VERSION + " to true");
27 |             }
28 |         }
29 |     }
30 | 
31 |     @Override
32 |     public Class<? extends Task> taskClass() {
33 |         return BackupSourceTask.class;
34 |     }
35 | 
36 |     @Override
37 |     public List<Map<String, String>> taskConfigs(int maxTasks) {
38 |         if (maxTasks > 1) {
39 |             throw new ConnectException("kafka-backup can currently handle only one task.");
40 |         }
41 |         List<Map<String, String>> configs = new ArrayList<>();
42 |         configs.add(config);
43 |         return configs;
44 |     }
45 | 
46 |     @Override
47 |     public void stop() {
48 | 
49 |     }
50 | 
51 |     @Override
52 |     public ConfigDef config() {
53 |         return new ConfigDef();
54 |     }
55 | 
56 |     @Override
57 |     public String version() {
58 |         return "0.1";
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/test/assets/v1/partitionindex/testIndex:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itadventurer/kafka-backup/4692ffeaf2f314aa9ad0d7a2346e47f24ab2dc3d/src/test/assets/v1/partitionindex/testIndex


--------------------------------------------------------------------------------
/src/test/assets/v1/records/empty_record:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itadventurer/kafka-backup/4692ffeaf2f314aa9ad0d7a2346e47f24ab2dc3d/src/test/assets/v1/records/empty_record


--------------------------------------------------------------------------------
/src/test/assets/v1/records/header_record:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itadventurer/kafka-backup/4692ffeaf2f314aa9ad0d7a2346e47f24ab2dc3d/src/test/assets/v1/records/header_record


--------------------------------------------------------------------------------
/src/test/assets/v1/records/null_record:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itadventurer/kafka-backup/4692ffeaf2f314aa9ad0d7a2346e47f24ab2dc3d/src/test/assets/v1/records/null_record


--------------------------------------------------------------------------------
/src/test/assets/v1/records/simple_record:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itadventurer/kafka-backup/4692ffeaf2f314aa9ad0d7a2346e47f24ab2dc3d/src/test/assets/v1/records/simple_record


--------------------------------------------------------------------------------
/src/test/assets/v1/segmentindex/testIndex:
--------------------------------------------------------------------------------
1 |                       
2 |                      
3 |                                    $       
4 | 


--------------------------------------------------------------------------------
/src/test/assets/v1/segments/segment_partition_000_from_offset_0000000000_index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itadventurer/kafka-backup/4692ffeaf2f314aa9ad0d7a2346e47f24ab2dc3d/src/test/assets/v1/segments/segment_partition_000_from_offset_0000000000_index


--------------------------------------------------------------------------------
/src/test/assets/v1/segments/segment_partition_000_from_offset_0000000000_records:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itadventurer/kafka-backup/4692ffeaf2f314aa9ad0d7a2346e47f24ab2dc3d/src/test/assets/v1/segments/segment_partition_000_from_offset_0000000000_records


--------------------------------------------------------------------------------
/src/test/java/de/azapps/kafkabackup/common/TestUtils.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.common;
 2 | 
 3 | import java.io.IOException;
 4 | import java.nio.file.Files;
 5 | import java.nio.file.Path;
 6 | import java.nio.file.Paths;
 7 | 
 8 | public class TestUtils {
 9 |     private static Path TEMP_DIR;
10 | 
11 |     static {
12 |         try {
13 |             TEMP_DIR = Files.createTempDirectory("kafka_backup_tests");
14 |         } catch (IOException e) {
15 |             e.printStackTrace();
16 |         }
17 |     }
18 | 
19 |     public static Path getTestDir(String tests) {
20 |         Path ret = Paths.get(TEMP_DIR.toString(), tests);
21 |         try {
22 |             Files.createDirectories(ret);
23 |         } catch (Exception e) {
24 |             throw new RuntimeException(e);
25 |         }
26 |         return ret;
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/src/test/java/de/azapps/kafkabackup/common/partition/PartitionIndexTest.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.common.partition;
 2 | 
 3 | import de.azapps.kafkabackup.common.TestUtils;
 4 | import de.azapps.kafkabackup.common.segment.SegmentIndex;
 5 | import de.azapps.kafkabackup.common.segment.SegmentIndexEntry;
 6 | import org.junit.jupiter.api.Test;
 7 | 
 8 | import java.nio.file.Files;
 9 | import java.nio.file.Path;
10 | import java.nio.file.Paths;
11 | import java.util.ArrayList;
12 | import java.util.List;
13 | import java.util.Optional;
14 | 
15 | import static org.junit.jupiter.api.Assertions.*;
16 | 
17 | public class PartitionIndexTest {
18 |     private static Path TEMP_DIR = TestUtils.getTestDir("PartitionIndexTest");
19 | 
20 |     @Test
21 |     public void simpleRoundtripTest() throws Exception {
22 |         String indexFile = "simpleRoundtripTestIndex";
23 |         List<PartitionIndexEntry> entries = new ArrayList<>();
24 |         entries.add(new PartitionIndexEntry("s0", 0));
25 |         entries.add(new PartitionIndexEntry("s100", 100));
26 |         entries.add(new PartitionIndexEntry("s200", 200));
27 |         entries.add(new PartitionIndexEntry("s300", 300));
28 |         PartitionIndex index = new PartitionIndex(Paths.get(TEMP_DIR.toString(), indexFile));
29 |         index.appendSegment(entries.get(0).filename(), entries.get(0).startOffset());
30 |         index.appendSegment(entries.get(1).filename(), entries.get(1).startOffset());
31 |         index.appendSegment(entries.get(2).filename(), entries.get(2).startOffset());
32 |         index.appendSegment(entries.get(3).filename(), entries.get(3).startOffset());
33 | 
34 |         assertEquals(entries, index.index());
35 |         index.close();
36 | 
37 | 
38 |         PartitionIndex b = new PartitionIndex(Paths.get(TEMP_DIR.toString(), indexFile));
39 |         assertEquals(entries, b.index());
40 |         b.seek(10);
41 |         assertEquals(entries.get(0).filename(), b.readFileName());
42 |         assertTrue(b.hasMoreData());
43 |         b.seek(200);
44 |         assertEquals(entries.get(2).filename(), b.readFileName());
45 |         assertTrue(b.hasMoreData());
46 |         b.seek(310);
47 |         assertEquals(entries.get(3).filename(), b.readFileName());
48 |         assertFalse(b.hasMoreData());
49 |         b.close();
50 |     }
51 | 
52 | 
53 |     @Test
54 |     public void testReadV1Index() throws Exception {
55 |         String indexFile = "testIndex";
56 |         Path directory = Paths.get("src/test/assets/v1/partitionindex");
57 |         List<PartitionIndexEntry> entries = new ArrayList<>();
58 |         entries.add(new PartitionIndexEntry("s0", 0));
59 |         entries.add(new PartitionIndexEntry("s100", 100));
60 |         entries.add(new PartitionIndexEntry("s200", 200));
61 |         entries.add(new PartitionIndexEntry("s300", 300));
62 | 
63 |         PartitionIndex b = new PartitionIndex(Paths.get(directory.toString(), indexFile));
64 |         assertEquals(entries, b.index());
65 |     }
66 | 
67 | 
68 |     /**
69 |      * Utility function to be run once when the format on disk changes to be able to stay backwards-compatible
70 |      * <p>
71 |      * Call it manually once when the format changes
72 |      */
73 |     private static void writeTestIndexToFile() throws Exception {
74 |         String indexFile = "testIndex";
75 |         Path directory = Paths.get("src/test/assets/v1/partitionindex"); // CHANGEME WHEN CHANGING DATA FORMAT!
76 |         Files.createDirectories(directory);
77 | 
78 |         List<PartitionIndexEntry> entries = new ArrayList<>();
79 |         entries.add(new PartitionIndexEntry("s0", 0));
80 |         entries.add(new PartitionIndexEntry("s100", 100));
81 |         entries.add(new PartitionIndexEntry("s200", 200));
82 |         entries.add(new PartitionIndexEntry("s300", 300));
83 | 
84 |         PartitionIndex index = new PartitionIndex(Paths.get(directory.toString(), indexFile));
85 |         index.appendSegment(entries.get(0).filename(), entries.get(0).startOffset());
86 |         index.appendSegment(entries.get(1).filename(), entries.get(1).startOffset());
87 |         index.appendSegment(entries.get(2).filename(), entries.get(2).startOffset());
88 |         index.appendSegment(entries.get(3).filename(), entries.get(3).startOffset());
89 |         index.close();
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/src/test/java/de/azapps/kafkabackup/common/partition/PartitionSerdeTest.java:
--------------------------------------------------------------------------------
  1 | package de.azapps.kafkabackup.common.partition;
  2 | 
  3 | import de.azapps.kafkabackup.common.TestUtils;
  4 | import de.azapps.kafkabackup.common.record.Record;
  5 | import de.azapps.kafkabackup.common.segment.SegmentReader;
  6 | import de.azapps.kafkabackup.common.segment.SegmentUtils;
  7 | import org.apache.kafka.common.header.internals.RecordHeaders;
  8 | import org.apache.kafka.common.record.TimestampType;
  9 | import org.apache.kafka.connect.header.ConnectHeaders;
 10 | import org.apache.kafka.connect.header.Headers;
 11 | import org.junit.jupiter.api.Test;
 12 | 
 13 | import java.nio.charset.StandardCharsets;
 14 | import java.nio.file.Files;
 15 | import java.nio.file.Path;
 16 | import java.util.ArrayList;
 17 | import java.util.List;
 18 | 
 19 | import static org.junit.jupiter.api.Assertions.assertEquals;
 20 | import static org.junit.jupiter.api.Assertions.assertFalse;
 21 | 
 22 | public class PartitionSerdeTest {
 23 |     private static final String TOPIC = "test-topic";
 24 |     private static final byte[] KEY_BYTES = "test-key".getBytes(StandardCharsets.UTF_8);
 25 |     private static final byte[] VALUE_BYTES = "test-value".getBytes(StandardCharsets.UTF_8);
 26 |     private static final Path TEMP_DIR = TestUtils.getTestDir("PartitionSerdeTest");
 27 | 
 28 |     private static final RecordHeaders HEADERS = new RecordHeaders();
 29 |     private static final byte[] HEADER_0_VALUE_BYTES = "header0-value".getBytes(StandardCharsets.UTF_8);
 30 |     private static final byte[] HEADER_1_VALUE_BYTES = "header1-value".getBytes(StandardCharsets.UTF_8);
 31 |     static {
 32 |         HEADERS.add("", new byte[0]);
 33 |         HEADERS.add("null", null);
 34 |         HEADERS.add("value0", HEADER_0_VALUE_BYTES);
 35 |         HEADERS.add("value1", HEADER_1_VALUE_BYTES);
 36 |     }
 37 |     @Test
 38 |     public void simpleRoundtripTest() throws Exception {
 39 |         int partition = 0;
 40 | 
 41 |         List<Record> records = new ArrayList<>();
 42 |         records.add(new Record(TOPIC, partition, KEY_BYTES, VALUE_BYTES, 0));
 43 |         records.add(new Record(TOPIC, partition, null, null, 1));
 44 |         records.add(new Record(TOPIC, partition, new byte[0], new byte[0], 2));
 45 |         records.add(new Record(TOPIC, partition, KEY_BYTES, VALUE_BYTES, 3, null, TimestampType.NO_TIMESTAMP_TYPE, HEADERS));
 46 | 
 47 |         PartitionWriter partitionWriter = new PartitionWriter(TOPIC, partition, TEMP_DIR, 50);
 48 |         partitionWriter.append(records.get(0));
 49 |         partitionWriter.append(records.get(1));
 50 |         partitionWriter.append(records.get(2));
 51 |         partitionWriter.append(records.get(3));
 52 |         partitionWriter.close();
 53 | 
 54 |         PartitionReader partitionReader = new PartitionReader(TOPIC, partition, TEMP_DIR);
 55 |         assertEquals(records, partitionReader.readFully());
 56 |         assertFalse(partitionReader.hasMoreData());
 57 |         partitionReader.seek(1);
 58 |         assertEquals(records.get(1), partitionReader.read());
 59 |         partitionReader.seek(3);
 60 |         assertEquals(records.get(3), partitionReader.read());
 61 |         assertFalse(partitionReader.hasMoreData());
 62 |     }
 63 | 
 64 |     @Test
 65 |     public void smallSegmentSizeTest() throws Exception {
 66 |         int partition = 1;
 67 | 
 68 |         List<Record> records = new ArrayList<>();
 69 |         records.add(new Record(TOPIC, partition, KEY_BYTES, VALUE_BYTES, 0));
 70 |         records.add(new Record(TOPIC, partition, null, null, 1));
 71 |         records.add(new Record(TOPIC, partition, new byte[0], new byte[0], 2));
 72 | 
 73 |         PartitionWriter partitionWriter = new PartitionWriter(TOPIC, partition, TEMP_DIR, 1);
 74 |         partitionWriter.append(records.get(0));
 75 |         partitionWriter.append(records.get(1));
 76 |         partitionWriter.append(records.get(2));
 77 |         partitionWriter.close();
 78 | 
 79 |         SegmentReader a = new SegmentReader(TOPIC, partition, TEMP_DIR, 0);
 80 |         assertEquals(records.get(0), a.read());
 81 |         assertFalse(a.hasMoreData());
 82 |         SegmentReader b = new SegmentReader(TOPIC, partition, TEMP_DIR, 1);
 83 |         assertEquals(records.get(1), b.read());
 84 |         assertFalse(b.hasMoreData());
 85 |         SegmentReader c = new SegmentReader(TOPIC, partition, TEMP_DIR, 2);
 86 |         assertEquals(records.get(2), c.read());
 87 |         assertFalse(c.hasMoreData());
 88 |     }
 89 | 
 90 |     @Test
 91 |     public void deleteSomeSegmentsTest() throws Exception {
 92 |         int partition = 2;
 93 | 
 94 |         List<Record> records = new ArrayList<>();
 95 |         records.add(new Record(TOPIC, partition, KEY_BYTES, VALUE_BYTES, 0));
 96 |         records.add(new Record(TOPIC, partition, null, null, 1));
 97 |         records.add(new Record(TOPIC, partition, new byte[0], new byte[0], 2));
 98 |         records.add(new Record(TOPIC, partition, KEY_BYTES, VALUE_BYTES, 3, null, TimestampType.NO_TIMESTAMP_TYPE, HEADERS));
 99 | 
100 |         PartitionWriter partitionWriter = new PartitionWriter(TOPIC, partition, TEMP_DIR, 1);
101 |         partitionWriter.append(records.get(0));
102 |         partitionWriter.append(records.get(1));
103 |         partitionWriter.append(records.get(2));
104 |         partitionWriter.append(records.get(3));
105 |         partitionWriter.close();
106 | 
107 |         // Delete segments 0 and 2
108 |         Files.delete(SegmentUtils.recordsFile(TEMP_DIR, partition, 0));
109 |         Files.delete(SegmentUtils.indexFile(TEMP_DIR, partition, 0));
110 |         Files.delete(SegmentUtils.recordsFile(TEMP_DIR, partition, 2));
111 |         Files.delete(SegmentUtils.indexFile(TEMP_DIR, partition, 2));
112 |         Files.delete(PartitionUtils.indexFile(TEMP_DIR, partition));
113 | 
114 |         // Restore indices
115 |         PartitionIndexRestore restore = new PartitionIndexRestore(TEMP_DIR, partition);
116 |         restore.restore();
117 | 
118 |         // Expected
119 |         List<Record> expected = new ArrayList<>();
120 |         expected.add(records.get(1));
121 |         expected.add(records.get(3));
122 | 
123 |         PartitionReader reader = new PartitionReader(TOPIC, partition, TEMP_DIR);
124 |         assertEquals(expected, reader.readFully());
125 |     }
126 | }
127 | 


--------------------------------------------------------------------------------
/src/test/java/de/azapps/kafkabackup/common/record/RecordSerdeTest.java:
--------------------------------------------------------------------------------
  1 | package de.azapps.kafkabackup.common.record;
  2 | 
  3 | import org.apache.kafka.common.header.internals.RecordHeaders;
  4 | import org.apache.kafka.common.record.TimestampType;
  5 | import org.junit.jupiter.api.Test;
  6 | 
  7 | import java.io.*;
  8 | import java.nio.charset.StandardCharsets;
  9 | 
 10 | import static org.junit.jupiter.api.Assertions.assertEquals;
 11 | import static org.junit.jupiter.api.Assertions.assertNotEquals;
 12 | 
 13 | public class RecordSerdeTest {
 14 | 
 15 |     private static final String TOPIC = "test-topic";
 16 |     private static final int PARTITION = 42;
 17 |     private static final long OFFSET = 123;
 18 |     private static final byte[] KEY_BYTES = "test-key".getBytes(StandardCharsets.UTF_8);
 19 |     private static final byte[] VALUE_BYTES = "test-value".getBytes(StandardCharsets.UTF_8);
 20 |     private static final byte[] NULL_TIMESTAMP_BYTES = "null-timestamp".getBytes(StandardCharsets.UTF_8);
 21 | 
 22 |     private static final String SIMPLE_RECORD_FILE = "simple_record";
 23 |     private static final String NULL_RECORD_FILE = "null_record";
 24 |     private static final String EMPTY_RECORD_FILE = "empty_record";
 25 |     private static final String HEADER_RECORD_FILE = "header_record";
 26 | 
 27 |     // Example records
 28 |     private static final Record SIMPLE_RECORD, NULL_RECORD, EMPTY_RECORD, HEADER_RECORD, NULL_TIMESTAMP_RECORD;
 29 | 
 30 |     static {
 31 |         SIMPLE_RECORD = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET);
 32 |         NULL_RECORD = new Record(TOPIC, PARTITION, null, null, OFFSET);
 33 |         EMPTY_RECORD = new Record(TOPIC, PARTITION, new byte[0], new byte[0], OFFSET);
 34 |         NULL_TIMESTAMP_RECORD = new Record(TOPIC, PARTITION, NULL_TIMESTAMP_BYTES, null, OFFSET, null, TimestampType.CREATE_TIME);
 35 |         // Build multiple headers that might cause problems
 36 |         RecordHeaders headers = new RecordHeaders();
 37 |         headers.add("", new byte[0]);
 38 |         headers.add("null", null);
 39 |         headers.add("value", VALUE_BYTES);
 40 |         HEADER_RECORD = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, null, TimestampType.NO_TIMESTAMP_TYPE, headers);
 41 |     }
 42 | 
 43 |     @Test
 44 |     public void roundtripTest() throws Exception {
 45 |         Record simpleRoundtrip = writeAndReadRecord(SIMPLE_RECORD);
 46 |         assertEquals(SIMPLE_RECORD, simpleRoundtrip);
 47 |     }
 48 | 
 49 |     @Test
 50 |     public void roundtripWithNull() throws Exception {
 51 |         Record nullRoundtrip = writeAndReadRecord(NULL_RECORD);
 52 |         assertEquals(NULL_RECORD, nullRoundtrip);
 53 | 
 54 |         Record emptyRoundtrip = writeAndReadRecord(EMPTY_RECORD);
 55 |         assertEquals(EMPTY_RECORD, emptyRoundtrip);
 56 | 
 57 |         // Must be different
 58 |         assertNotEquals(nullRoundtrip, emptyRoundtrip);
 59 |     }
 60 | 
 61 |     @Test
 62 |     public void roundtripNullTimestamp() throws Exception {
 63 |         Record nullTimestampRoundtrip =writeAndReadRecord(NULL_TIMESTAMP_RECORD);
 64 |         assertEquals(NULL_TIMESTAMP_RECORD, nullTimestampRoundtrip);
 65 |     }
 66 | 
 67 |     @Test
 68 |     public void roundtripHeaders() throws Exception {
 69 |         Record headerRoundtrip = writeAndReadRecord(HEADER_RECORD);
 70 |         assertEquals(HEADER_RECORD, headerRoundtrip);
 71 |     }
 72 | 
 73 |     /**
 74 |      * DO NOT CHANGE THIS TEST!
 75 |      */
 76 |     @Test
 77 |     public void readV1() throws Exception {
 78 |         File v1Directory = new File("src/test/assets/v1/records");
 79 |         Record simpleRecord = readFromFile(new File(v1Directory, SIMPLE_RECORD_FILE));
 80 |         assertEquals(SIMPLE_RECORD, simpleRecord);
 81 |         Record nullRecord = readFromFile(new File(v1Directory, NULL_RECORD_FILE));
 82 |         assertEquals(NULL_RECORD, nullRecord);
 83 |         assertNotEquals(SIMPLE_RECORD, nullRecord); // just to make sure!
 84 |         Record emptyRecord = readFromFile(new File(v1Directory, EMPTY_RECORD_FILE));
 85 |         assertEquals(EMPTY_RECORD, emptyRecord);
 86 |         assertNotEquals(NULL_RECORD, emptyRecord); // just to make sure!
 87 |         Record headerRecord = readFromFile(new File(v1Directory, HEADER_RECORD_FILE));
 88 |         assertEquals(HEADER_RECORD, headerRecord);
 89 |         assertNotEquals(EMPTY_RECORD, headerRecord); // just to make sure!
 90 |     }
 91 | 
 92 |     // UTILS
 93 | 
 94 |     private Record writeAndReadRecord(Record record) throws IOException {
 95 |         ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
 96 |         RecordSerde.write(outputStream, record);
 97 |         byte[] data = outputStream.toByteArray();
 98 |         return RecordSerde.read(TOPIC, PARTITION, new ByteArrayInputStream(data));
 99 |     }
100 | 
101 |     private static Record readFromFile(File file) throws IOException {
102 |         FileInputStream inputStream = new FileInputStream(file);
103 |         return RecordSerde.read(TOPIC, PARTITION, inputStream);
104 |     }
105 | 
106 |     /**
107 |      * Utility function to be run once when the format on disk changes to be able to stay backwards-compatible
108 |      * <p>
109 |      * Call it manually once when the format changes
110 |      */
111 |     private static void writeTestRecordsToFile() throws IOException {
112 |         File directory = new File("src/test/assets/v1/records"); // CHANGEME WHEN CHANGING DATA FORMAT!
113 |         writeCurrentVersionRecordToFile(SIMPLE_RECORD, new File(directory, SIMPLE_RECORD_FILE));
114 |         writeCurrentVersionRecordToFile(NULL_RECORD, new File(directory, NULL_RECORD_FILE));
115 |         writeCurrentVersionRecordToFile(EMPTY_RECORD, new File(directory, EMPTY_RECORD_FILE));
116 |         writeCurrentVersionRecordToFile(HEADER_RECORD, new File(directory, HEADER_RECORD_FILE));
117 |     }
118 | 
119 |     private static void writeCurrentVersionRecordToFile(Record record, File file) throws IOException {
120 |         FileOutputStream outputStream = new FileOutputStream(file);
121 |         RecordSerde.write(outputStream, record);
122 |     }
123 | }
124 | 


--------------------------------------------------------------------------------
/src/test/java/de/azapps/kafkabackup/common/record/RecordTest.java:
--------------------------------------------------------------------------------
  1 | package de.azapps.kafkabackup.common.record;
  2 | 
  3 | import org.apache.kafka.common.header.internals.RecordHeaders;
  4 | import org.apache.kafka.common.record.TimestampType;
  5 | import org.apache.kafka.connect.sink.SinkRecord;
  6 | import org.junit.jupiter.api.Test;
  7 | 
  8 | import java.nio.charset.StandardCharsets;
  9 | import java.util.Arrays;
 10 | 
 11 | import static org.junit.jupiter.api.Assertions.assertEquals;
 12 | import static org.junit.jupiter.api.Assertions.assertNotEquals;
 13 | 
 14 | public class RecordTest {
 15 |     private static final String TOPIC = "test-topic";
 16 |     private static final int PARTITION = 42;
 17 |     private static final long OFFSET = 123;
 18 |     private static final TimestampType TIMESTAMP_TYPE = TimestampType.LOG_APPEND_TIME;
 19 |     private static final Long TIMESTAMP = 573831430000L;
 20 |     // encoding here is not really important, we just want some bytes
 21 |     private static final byte[] KEY_BYTES = "test-key".getBytes(StandardCharsets.UTF_8);
 22 |     private static final byte[] VALUE_BYTES = "test-value".getBytes(StandardCharsets.UTF_8);
 23 |     // Header fixtures:
 24 |     private static final byte[] HEADER_0_VALUE_BYTES = "header0-value".getBytes(StandardCharsets.UTF_8);
 25 |     private static final byte[] HEADER_1_VALUE_BYTES = "header1-value".getBytes(StandardCharsets.UTF_8);
 26 |     private static final RecordHeaders HEADERS = new RecordHeaders();
 27 | 
 28 |     static {
 29 |         HEADERS.add("", new byte[0]);
 30 |         HEADERS.add("null", null);
 31 |         HEADERS.add("value0", HEADER_0_VALUE_BYTES);
 32 |         HEADERS.add("value1", HEADER_1_VALUE_BYTES);
 33 |     }
 34 | 
 35 |     @Test
 36 |     public void equalsIdentityTrueTest() {
 37 |         // GIVEN
 38 |         Record a = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, TIMESTAMP, TIMESTAMP_TYPE, HEADERS);
 39 | 
 40 |         // THEN
 41 |         assertEquals(a, a);
 42 |     }
 43 | 
 44 |     @Test
 45 |     public void equalsValueTrueTest() {
 46 |         // GIVEN
 47 |         Record a = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, TIMESTAMP, TIMESTAMP_TYPE, HEADERS);
 48 | 
 49 |         RecordHeaders bHeaders = new RecordHeaders();
 50 |         bHeaders.add("", new byte[0]);
 51 |         bHeaders.add("null", null);
 52 |         bHeaders.add("value0", Arrays.copyOf(HEADER_0_VALUE_BYTES, HEADER_0_VALUE_BYTES.length));
 53 |         bHeaders.add("value1", Arrays.copyOf(HEADER_1_VALUE_BYTES, HEADER_1_VALUE_BYTES.length));
 54 |         Record b = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, TIMESTAMP, TIMESTAMP_TYPE, bHeaders);
 55 | 
 56 |         // THEN
 57 |         assertEquals(a, b);
 58 |         assertEquals(b, a);
 59 |     }
 60 | 
 61 |     @Test
 62 |     public void equalsFalseBecauseStrictSubsetTest() {
 63 |         // GIVEN
 64 |         Record a = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, TIMESTAMP, TIMESTAMP_TYPE, HEADERS);
 65 |         Record b = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET);
 66 | 
 67 |         // THEN
 68 |         assertNotEquals(a, b);
 69 |         assertNotEquals(b, a);
 70 |     }
 71 | 
 72 |     @Test
 73 |     public void equalsFalseBecauseHeadersStrictSubsetTest() {
 74 |         // GIVEN
 75 |         RecordHeaders aHeaders = new RecordHeaders();
 76 |         aHeaders.add("header0-key", Arrays.copyOf(HEADER_0_VALUE_BYTES, HEADER_0_VALUE_BYTES.length));
 77 |         aHeaders.add("header1-key", Arrays.copyOf(HEADER_1_VALUE_BYTES, HEADER_1_VALUE_BYTES.length));
 78 |         Record a = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, TIMESTAMP, TIMESTAMP_TYPE, aHeaders);
 79 | 
 80 |         RecordHeaders bHeaders = new RecordHeaders();
 81 |         bHeaders.add("header0-key", Arrays.copyOf(HEADER_0_VALUE_BYTES, HEADER_0_VALUE_BYTES.length));
 82 |         Record b = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, TIMESTAMP, TIMESTAMP_TYPE, bHeaders);
 83 | 
 84 | 
 85 |         RecordHeaders cHeaders = new RecordHeaders();
 86 |         cHeaders.add("header1-key", Arrays.copyOf(HEADER_0_VALUE_BYTES, HEADER_0_VALUE_BYTES.length));
 87 |         cHeaders.add("header1-key", Arrays.copyOf(HEADER_1_VALUE_BYTES, HEADER_1_VALUE_BYTES.length));
 88 |         Record c = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, TIMESTAMP, TIMESTAMP_TYPE, cHeaders);
 89 | 
 90 |         // THEN
 91 |         assertNotEquals(a, b);
 92 |         assertNotEquals(b, a);
 93 |         assertNotEquals(a, c);
 94 |         assertNotEquals(b, c);
 95 |     }
 96 | 
 97 |     /**
 98 |      * This is not used during normal operations, but we need to verify that this works
 99 |      * correctly as we use the functions for our end to end tests!
100 |      */
101 |     @Test
102 |     public void roundtripSinkRecordTest() {
103 | 
104 |         // given
105 |         Record a = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET, TIMESTAMP, TIMESTAMP_TYPE, HEADERS);
106 |         Record b = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, 3, null, TimestampType.NO_TIMESTAMP_TYPE, HEADERS);
107 |         Record c = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, 0);
108 |         Record d = new Record(TOPIC, PARTITION, null, null, 1);
109 |         Record e = new Record(TOPIC, PARTITION, new byte[0], new byte[0], 2);
110 |         Record f = new Record(TOPIC, PARTITION, KEY_BYTES, VALUE_BYTES, OFFSET);
111 | 
112 |         // transform
113 |         SinkRecord srA = a.toSinkRecord();
114 |         SinkRecord srB = b.toSinkRecord();
115 |         SinkRecord srC = c.toSinkRecord();
116 |         SinkRecord srD = d.toSinkRecord();
117 |         SinkRecord srE = e.toSinkRecord();
118 |         SinkRecord srF = f.toSinkRecord();
119 | 
120 |         // expect
121 |         assertEquals(a, Record.fromSinkRecord(srA));
122 |         assertEquals(b, Record.fromSinkRecord(srB));
123 |         assertEquals(c, Record.fromSinkRecord(srC));
124 |         assertEquals(d, Record.fromSinkRecord(srD));
125 |         assertEquals(e, Record.fromSinkRecord(srE));
126 |         assertEquals(f, Record.fromSinkRecord(srF));
127 | 
128 | 
129 |     }
130 | }
131 | 


--------------------------------------------------------------------------------
/src/test/java/de/azapps/kafkabackup/common/segment/SegmentIndexTest.java:
--------------------------------------------------------------------------------
  1 | package de.azapps.kafkabackup.common.segment;
  2 | 
  3 | import de.azapps.kafkabackup.common.TestUtils;
  4 | import de.azapps.kafkabackup.common.record.Record;
  5 | import org.apache.kafka.common.record.TimestampType;
  6 | import org.junit.jupiter.api.Test;
  7 | 
  8 | import java.nio.charset.StandardCharsets;
  9 | import java.nio.file.Files;
 10 | import java.nio.file.Path;
 11 | import java.nio.file.Paths;
 12 | import java.util.ArrayList;
 13 | import java.util.List;
 14 | import java.util.Optional;
 15 | 
 16 | import static org.junit.jupiter.api.Assertions.*;
 17 | 
 18 | public class SegmentIndexTest {
 19 |     private static final String TOPIC = "test-topic";
 20 |     private static final byte[] KEY_BYTES = "test-key".getBytes(StandardCharsets.UTF_8);
 21 |     private static final byte[] VALUE_BYTES = "test-value".getBytes(StandardCharsets.UTF_8);
 22 |     private static final SegmentIndexEntry ENTRY1 = new SegmentIndexEntry(0, 1, 10);
 23 |     private static final SegmentIndexEntry ENTRY2 = new SegmentIndexEntry(1, 11, 10);
 24 |     private static final SegmentIndexEntry ENTRY3 = new SegmentIndexEntry(5, 21, 15);
 25 |     private static final SegmentIndexEntry ENTRY4 = new SegmentIndexEntry(6, 36, 10);
 26 |     private static final Path TEMP_DIR = TestUtils.getTestDir("SegmentIndexTest");
 27 | 
 28 |     @Test
 29 |     public void simpleRoundtripTest() throws Exception {
 30 |         String indexFile = "simpleRoundtripTestIndex";
 31 |         SegmentIndex index = new SegmentIndex(Paths.get(TEMP_DIR.toString(), indexFile));
 32 |         assertEquals(0L, index.lastValidStartPosition());
 33 |         List<SegmentIndexEntry> entries = new ArrayList<>();
 34 |         entries.add(ENTRY1);
 35 |         index.addEntry(ENTRY1);
 36 |         entries.add(ENTRY2);
 37 |         index.addEntry(ENTRY2);
 38 |         entries.add(ENTRY3);
 39 |         index.addEntry(ENTRY3);
 40 |         entries.add(ENTRY4);
 41 |         index.addEntry(ENTRY4);
 42 | 
 43 |         index.close();
 44 | 
 45 | 
 46 |         SegmentIndex b = new SegmentIndex(Paths.get(TEMP_DIR.toString(), indexFile));
 47 |         assertEquals(entries, b.index());
 48 |         assertEquals(Optional.of(ENTRY3.recordFilePosition()), b.findByOffset(5));
 49 |         assertEquals(Optional.of(ENTRY3.recordFilePosition()), b.findEarliestWithHigherOrEqualOffset(2));
 50 |         assertEquals(Optional.empty(), b.findEarliestWithHigherOrEqualOffset(11));
 51 |         assertEquals(36, b.lastValidStartPosition());
 52 |     }
 53 | 
 54 |     @Test
 55 |     public void writeRecordThenCheckIndex() throws Exception {
 56 |         int partition = 0;
 57 |         SegmentWriter segmentWriter = new SegmentWriter(TOPIC, partition, 0, TEMP_DIR);
 58 |         segmentWriter.append(new Record(TOPIC, partition, KEY_BYTES, VALUE_BYTES, 0));
 59 | 
 60 |         SegmentIndex i1 = new SegmentIndex(SegmentUtils.indexFile(TEMP_DIR, partition, 0));
 61 |         assertEquals(1, i1.size());
 62 | 
 63 | 
 64 |         segmentWriter.append(new Record(TOPIC, partition, null, null, 1));
 65 |         segmentWriter.append(new Record(TOPIC, partition, new byte[0], new byte[0], 2));
 66 |         segmentWriter.close();
 67 | 
 68 |         SegmentIndex i2 = new SegmentIndex(SegmentUtils.indexFile(TEMP_DIR, partition, 0));
 69 |         assertEquals(3, i2.size());
 70 |         long fileLength = SegmentUtils.recordsFile(TEMP_DIR, partition, 0).toFile().length();
 71 |         //noinspection OptionalGetWithoutIsPresent
 72 |         SegmentIndexEntry entry = i2.lastIndexEntry().get();
 73 |         assertEquals(fileLength, entry.recordFilePosition() + entry.recordByteLength());
 74 |     }
 75 | 
 76 |     @Test
 77 |     public void restoreTest() throws Exception {
 78 |         int partition = 1;
 79 |         List<Record> records = new ArrayList<>();
 80 |         records.add(new Record(TOPIC, partition, KEY_BYTES, VALUE_BYTES, 0));
 81 |         records.add(new Record(TOPIC, partition, null, null, 1));
 82 |         records.add(new Record(TOPIC, partition, new byte[0], new byte[0], 2));
 83 |         records.add(new Record(TOPIC, partition, KEY_BYTES, VALUE_BYTES, 3, null, TimestampType.NO_TIMESTAMP_TYPE));
 84 | 
 85 |         SegmentWriter segmentWriter = new SegmentWriter(TOPIC, partition, 0, TEMP_DIR);
 86 |         for (Record record : records) {
 87 |             segmentWriter.append(record);
 88 |         }
 89 |         segmentWriter.close();
 90 |         Path indexFile = SegmentUtils.indexFile(TEMP_DIR, partition, 0);
 91 |         SegmentIndex a = new SegmentIndex(indexFile);
 92 |         Files.delete(indexFile);
 93 |         SegmentIndexRestore restore = new SegmentIndexRestore(SegmentUtils.recordsFile(TEMP_DIR, partition, 0));
 94 |         restore.restore();
 95 |         SegmentIndex b = new SegmentIndex(indexFile);
 96 |         assertEquals(a.index(), b.index());
 97 |     }
 98 | 
 99 |     @Test
100 |     public void incrementingIndex() throws Exception {
101 |         String indexFile = "incrementingIndex";
102 |         SegmentIndex index = new SegmentIndex(Paths.get(TEMP_DIR.toString(), indexFile));
103 |         index.addEntry(new SegmentIndexEntry(5, 22, 15));
104 |         // Wrong offset
105 |         assertThrows(SegmentIndex.IndexException.class,
106 |                 () -> index.addEntry(new SegmentIndexEntry(0, 37, 10)));
107 |         // Should be ok
108 |         assertDoesNotThrow(() -> index.addEntry(new SegmentIndexEntry(10, 37, 10)));
109 |         index.close();
110 |     }
111 | 
112 |     @Test
113 |     public void emptyIndexTest() throws Exception {
114 |         String indexFile = "emptyIndexTest";
115 |         SegmentIndex index = new SegmentIndex(Paths.get(TEMP_DIR.toString(), indexFile));
116 |         assertEquals(0L, index.lastValidStartPosition());
117 |         index.close();
118 | 
119 | 
120 |         SegmentIndex b = new SegmentIndex(Paths.get(TEMP_DIR.toString(), indexFile));
121 |         assertEquals(0L, b.lastValidStartPosition());
122 |         assertEquals(Optional.empty(), b.findEarliestWithHigherOrEqualOffset(0));
123 |         assertEquals(Optional.empty(), b.findEarliestWithHigherOrEqualOffset(11));
124 |         assertTrue(b.index().isEmpty());
125 |     }
126 | 
127 | 
128 |     @Test
129 |     public void testReadV1Index() throws Exception {
130 |         String indexFile = "testIndex";
131 |         Path directory = Paths.get("src/test/assets/v1/segmentindex");
132 |         List<SegmentIndexEntry> entries = new ArrayList<>();
133 |         entries.add(ENTRY1);
134 |         entries.add(ENTRY2);
135 |         entries.add(ENTRY3);
136 |         entries.add(ENTRY4);
137 | 
138 |         SegmentIndex b = new SegmentIndex(Paths.get(directory.toString(), indexFile));
139 |         assertEquals(entries, b.index());
140 |         assertEquals(Optional.of(ENTRY3.recordFilePosition()), b.findByOffset(5));
141 |         assertEquals(Optional.of(ENTRY3.recordFilePosition()), b.findEarliestWithHigherOrEqualOffset(2));
142 |         assertEquals(36, b.lastValidStartPosition());
143 |     }
144 | 
145 | 
146 |     /**
147 |      * Utility function to be run once when the format on disk changes to be able to stay backwards-compatible
148 |      * <p>
149 |      * Call it manually once when the format changes
150 |      */
151 |     private static void writeTestIndexToFile() throws Exception {
152 |         String indexFile = "testIndex";
153 |         Path directory = Paths.get("src/test/assets/v1/segmentindex"); // CHANGEME WHEN CHANGING DATA FORMAT!
154 |         Files.createDirectories(directory);
155 | 
156 |         SegmentIndex index = new SegmentIndex(Paths.get(directory.toString(), indexFile));
157 |         index.addEntry(ENTRY1);
158 |         index.addEntry(ENTRY2);
159 |         index.addEntry(ENTRY3);
160 |         index.addEntry(ENTRY4);
161 |         index.close();
162 |     }
163 | }
164 | 


--------------------------------------------------------------------------------
/src/test/java/de/azapps/kafkabackup/sink/MockEndOffsetReader.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.sink;
 2 | 
 3 | import de.azapps.kafkabackup.common.offset.EndOffsetReader;
 4 | import org.apache.kafka.common.TopicPartition;
 5 | 
 6 | import java.util.Collection;
 7 | import java.util.HashMap;
 8 | import java.util.Map;
 9 | 
10 | public class MockEndOffsetReader extends EndOffsetReader {
11 |   private Map<TopicPartition, Long> offsets;
12 |   public MockEndOffsetReader(Map<TopicPartition, Long> offsets) {
13 |     super(new HashMap<>());
14 |     this.offsets = offsets;
15 |   }
16 |   @Override
17 |   public Map<TopicPartition, Long> getEndOffsets(Collection<TopicPartition> partitions) {
18 |     return offsets;
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/test/java/de/azapps/kafkabackup/sink/MockOffsetSink.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.sink;
 2 | 
 3 | import de.azapps.kafkabackup.common.offset.OffsetSink;
 4 | import org.apache.kafka.clients.admin.AdminClient;
 5 | 
 6 | import java.io.IOException;
 7 | import java.nio.file.Path;
 8 | 
 9 | public class MockOffsetSink extends OffsetSink {
10 |     public MockOffsetSink(AdminClient adminClient, Path targetDir) {
11 |         super(adminClient, targetDir);
12 |     }
13 | 
14 |     @Override
15 |     public void syncConsumerGroups() {
16 | 
17 |     }
18 | 
19 |     @Override
20 |     public void syncOffsets() throws IOException {
21 |     }
22 | 
23 |     @Override
24 |     public void flush() throws IOException {
25 |     }
26 | 
27 |     @Override
28 |     public void close() throws IOException {
29 |     }
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/src/test/java/de/azapps/kafkabackup/sink/MockSinkTaskContext.java:
--------------------------------------------------------------------------------
 1 | package de.azapps.kafkabackup.sink;
 2 | 
 3 | import org.apache.kafka.common.TopicPartition;
 4 | import org.apache.kafka.connect.sink.SinkTaskContext;
 5 | 
 6 | import java.util.HashMap;
 7 | import java.util.Map;
 8 | import java.util.Set;
 9 | 
10 | public class MockSinkTaskContext implements SinkTaskContext {
11 | 
12 |     @Override
13 |     public Map<String, String> configs() {
14 |         return new HashMap<>();
15 |     }
16 | 
17 |     @Override
18 |     public void offset(Map<TopicPartition, Long> offsets) {
19 | 
20 |     }
21 | 
22 |     @Override
23 |     public void offset(TopicPartition tp, long offset) {
24 | 
25 |     }
26 | 
27 |     @Override
28 |     public void timeout(long timeoutMs) {
29 | 
30 |     }
31 | 
32 |     @Override
33 |     public Set<TopicPartition> assignment() {
34 |         return null;
35 |     }
36 | 
37 |     @Override
38 |     public void pause(TopicPartition... partitions) {
39 | 
40 |     }
41 | 
42 |     @Override
43 |     public void resume(TopicPartition... partitions) {
44 | 
45 |     }
46 | 
47 |     @Override
48 |     public void requestCommit() {
49 | 
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/system_test/.gitignore:
--------------------------------------------------------------------------------
1 | out


--------------------------------------------------------------------------------
/system_test/01_simple_roundtrip_test.yaml:
--------------------------------------------------------------------------------
  1 | # Test description
  2 | #
  3 | # Create two topics:
  4 | # * backup-test-1partition: 1 partition
  5 | # * backup-test-empty-topic: 3 partitions
  6 | #
  7 | # * Fill the 1partition topic with 300 messages á 10KB each in every partition
  8 | # * Consume the messages with three consumer groups: cg-100: consume 100 messages, cg-200 consume 200 messages, cg-300 consume 300 messages
  9 | # * Fill one of the 3 partitions of empty-topic with 300 messages. Leave the other partitions empty
 10 | #
 11 | # * Take a backup
 12 | # * Stop the Cluster
 13 | #
 14 | # * Start the backup
 15 | # * Verify that all messages have been written correctly
 16 | # * Check the consumer offsets
 17 | #
 18 | # * Stop everything
 19 | - name: coyote
 20 |   title: kafka-backup
 21 | 
 22 | - name: Setup Cluster to Backup
 23 |   entries:
 24 |     - name: Docker Compose Up
 25 |       command: docker-compose up -d
 26 |     - name: Clean previous data
 27 |       command: docker run -v /tmp/kafka-backup/:/kafka-backup/ kafka-backup-dev:latest rm -rf "/kafka-backup/001_simple_1partition_test/"
 28 |     - name: Wait for Kafka to get up
 29 |       command: docker logs to-backup-kafka 2>&1 | grep -q '\[KafkaServer id=1\] started'
 30 |       timeout: 30s
 31 | 
 32 | - name: Create  Topic for tests
 33 |   entries:
 34 |     - command: docker-compose exec -T to-backup-kafka bash -c '
 35 |         utils.py create_topic --topic backup-test-1partition --partitions 1 &&
 36 |         utils.py create_topic --topic backup-test-weird-msgs --partitions 1 &&
 37 |         utils.py create_topic --topic backup-test-empty-topic --partitions 3'
 38 | - name: Produce Messages
 39 |   entries:
 40 |     - name: Produce 300 messages
 41 |       command: docker-compose exec -T to-backup-kafka bash -c '
 42 |         utils.py produce_messages --topic backup-test-1partition --partition 0 --start_num 0 --count 300 &&
 43 |         utils.py produce_messages --topic backup-test-empty-topic --partition 0 --start_num 0 --count 300'
 44 |     - name: Produce 'weird' messages
 45 |       command: docker-compose exec -T to-backup-kafka bash -c '
 46 |         utils.py produce_weird_messages --partition 0 --topic backup-test-weird-msgs'
 47 | - name: Consume messages
 48 |   entries:
 49 |     - name: Consume 100 messages with cg-100
 50 |       command: docker-compose exec -T to-backup-kafka
 51 |         utils.py consume_messages --topic backup-test-1partition --consumer_group cg-100 --count 100
 52 |     - name: Consume 200 messages with cg-200
 53 |       command: docker-compose exec -T to-backup-kafka
 54 |         utils.py consume_messages --topic backup-test-1partition --consumer_group cg-200 --count 200
 55 |     - name: Consume 300 messages with cg-300
 56 |       command: docker-compose exec -T to-backup-kafka
 57 |         utils.py consume_messages --topic backup-test-1partition --consumer_group cg-300 --count 300
 58 | - name: Check Consumer Group Offsets
 59 |   entries:
 60 |     - name: Count Messages
 61 |       command: docker-compose exec -T to-backup-kafka
 62 |         utils.py count_messages
 63 |       stdout_has:
 64 |         - "backup-test-1partition 0: 300"
 65 |         - "backup-test-empty-topic 0: 300"
 66 |         - "backup-test-empty-topic 1: 0"
 67 |         - "backup-test-empty-topic 2: 0"
 68 |     - name: Check Consumer Group cg-100
 69 |       command: docker-compose exec -T to-backup-kafka
 70 |         kafka-consumer-groups.sh --bootstrap-server localhost:9092 --describe --group cg-100
 71 |       stdout_has: [ 'backup-test-1partition 0          100             300             200' ]
 72 |     - name: Check Consumer Group cg-200
 73 |       command: docker-compose exec -T to-backup-kafka
 74 |         kafka-consumer-groups.sh --bootstrap-server localhost:9092 --describe --group cg-200
 75 |       stdout_has: [ 'backup-test-1partition 0          200             300             100' ]
 76 |     - name: Check Consumer Group cg-200
 77 |       command: docker-compose exec -T to-backup-kafka
 78 |         kafka-consumer-groups.sh --bootstrap-server localhost:9092 --describe --group cg-300
 79 |       stdout_has: [ 'backup-test-1partition 0          300             300             0' ]
 80 | 
 81 | - name: Start Kafka Backup
 82 |   entries:
 83 |     - name: Start Kafka Backup
 84 |       command: >
 85 |         docker run -d -v /tmp/kafka-backup/:/kafka-backup/ --net=system_test_to-backup -p 18083:8083 --name to-backup --rm
 86 |         kafka-backup-dev:latest backup-standalone.sh --bootstrap-server to-backup-kafka:9092
 87 |         --target-dir /kafka-backup/001_simple_1partition_test/ --topics-regex 'backup-test.*'
 88 |     - command: sleep 30
 89 |       nolog: true
 90 |     - name: Check For errors
 91 |       timeout: 300s
 92 |       command: docker exec to-backup curl -vs "http://localhost:8083/connectors/backup-sink/status"
 93 |       stderr_has: ["200 OK"]
 94 |       stdout_has: ["RUNNING"]
 95 |       stdout_not_has: ["FAILED"]
 96 | 
 97 | - name: Stop Cluster that was backed up
 98 |   entries:
 99 |     - name: Stop Kafka Backup
100 |       command: docker kill to-backup
101 |     - name: Docker Compose Down
102 |       command: docker-compose stop to-backup-kafka
103 | 
104 | - name: Restore
105 |   entries:
106 |     - name: Create Topic
107 |       command: docker-compose exec -T restore-to-kafka bash -c '
108 |         utils.py create_topic --topic backup-test-1partition --partitions 1 &&
109 |         utils.py create_topic --topic backup-test-weird-msgs --partitions 1 &&
110 |         utils.py create_topic --topic backup-test-empty-topic --partitions 3'
111 |     - name: Run Kafka Restore
112 |       command: >
113 |         docker run -v /tmp/kafka-backup/:/kafka-backup/ --net=system_test_restore-to --name restore-to --rm
114 |         kafka-backup-dev:latest restore-standalone.sh --bootstrap-server restore-to-kafka:9092
115 |         --source-dir /kafka-backup/001_simple_1partition_test/ --topics 'backup-test-1partition,backup-test-empty-topic,backup-test-weird-msgs'
116 |       timeout: 60s
117 |       stdout_has: ['All records read.']
118 | 
119 | - name: Verify Backup
120 |   entries:
121 |     - name: Verify Records
122 |       timeout: 30s
123 |       command: docker-compose exec -T restore-to-kafka bash -c '
124 |         utils.py consume_verify_messages --topic backup-test-1partition --partition 0 --count 300 &&
125 |         utils.py consume_verify_messages --topic backup-test-empty-topic --partition 0 --count 300'
126 |     - name: Verify Weird Records
127 |       timeout: 15s
128 |       command: docker-compose exec -T restore-to-kafka bash -c '
129 |         utils.py consume_verify_weird_messages --partition 0 --topic backup-test-weird-msgs'
130 |     - name: Count Messages
131 |       timeout: 30s
132 |       command: docker-compose exec -T restore-to-kafka
133 |         utils.py count_messages
134 |       stdout_has:
135 |         - "backup-test-1partition 0: 300"
136 |         - "backup-test-empty-topic 0: 300"
137 |         - "backup-test-empty-topic 1: 0"
138 |         - "backup-test-empty-topic 2: 0"
139 |     - name: Check Consumer Group cg-100
140 |       timeout: 30s
141 |       command: docker-compose exec -T restore-to-kafka
142 |         kafka-consumer-groups.sh --bootstrap-server localhost:9092 --describe --group cg-100
143 |       stdout_has: [ 'backup-test-1partition 0          100' ]
144 |     - name: Check Consumer Group cg-200
145 |       timeout: 30s
146 |       command: docker-compose exec -T restore-to-kafka
147 |         kafka-consumer-groups.sh --bootstrap-server localhost:9092 --describe --group cg-200
148 |       stdout_has: [ 'backup-test-1partition 0          200' ]
149 |     - name: Check Consumer Group cg-200
150 |       timeout: 30s
151 |       command: docker-compose exec -T restore-to-kafka
152 |         kafka-consumer-groups.sh --bootstrap-server localhost:9092 --describe --group cg-300
153 |       stdout_has: [ 'backup-test-1partition 0          300' ]
154 | 
155 | - name: Clean-up Containers
156 |   entries:
157 |     - name: Docker Compose Down
158 |       command: docker-compose down
159 |       timeout: 15s
160 | 


--------------------------------------------------------------------------------
/system_test/04_delete_old_segments.yaml:
--------------------------------------------------------------------------------
 1 | # * Create a backup with multiple segments
 2 | # * Delete some old segments
 3 | # * Delete all indexes
 4 | # * Recreate all indexes
 5 | # * Do a restore
 6 | - name: coyote
 7 |   title: kafka-backup
 8 | 
 9 | - name: Setup Cluster to Backup
10 |   entries:
11 |     - name: Docker Compose Up
12 |       command: docker-compose up -d
13 |     - name: Clean previous data
14 |       command: docker run -v /tmp/kafka-backup/:/kafka-backup/ kafka-backup-dev:latest rm -rf "/kafka-backup/04_delete_old_segment/"
15 |     - name: Wait for Kafka to get up
16 |       command: docker logs to-backup-kafka 2>&1 | grep -q '\[KafkaServer id=1\] started'
17 |       timeout: 30s
18 | 
19 | - name: Create  Topic for tests
20 |   entries:
21 |     - command: docker-compose exec -T to-backup-kafka
22 |         utils.py create_topic --topic backup-test-1partition --partitions 1
23 | - name: Produce Messages
24 |   entries:
25 |     - name: Produce 3000 messages
26 |       command: docker-compose exec -T to-backup-kafka
27 |         utils.py produce_messages --topic backup-test-1partition --partition 0 --start_num 0 --count 3000
28 |     - name: Count Messages
29 |       command: docker-compose exec -T to-backup-kafka
30 |         utils.py count_messages
31 |       stdout_has:
32 |         - "backup-test-1partition 0: 3000"
33 | 
34 | - name: Start Kafka Backup
35 |   entries:
36 |     - name: Start Kafka Backup
37 |       command: >
38 |         docker run -d -v /tmp/kafka-backup/:/kafka-backup/ --net=system_test_to-backup --name to-backup --rm
39 |         kafka-backup-dev:latest backup-standalone.sh --bootstrap-server to-backup-kafka:9092
40 |         --target-dir /kafka-backup/04_delete_old_segment/ --topics-regex 'backup-test.*' --max-segment-size 10485760
41 |     - command: sleep 30
42 |       nolog: true
43 | 
44 | - name: Stop Cluster that was backed up
45 |   entries:
46 |     - name: Stop Kafka Backup
47 |       command: docker kill to-backup
48 |     - name: Docker Compose Down
49 |       command: docker-compose stop to-backup-kafka
50 | 
51 | - name: Delete old segment and restore the index
52 |   entries:
53 |     - name: Delete all indexes
54 |       command: docker run -v /tmp/kafka-backup/:/kafka-backup/ kafka-backup-dev:latest bash -c  \
55 |         'rm /kafka-backup/04_delete_old_segment/backup-test-1partition/*index*'
56 |     - name: Delete old segment
57 |       command: docker run -v /tmp/kafka-backup/:/kafka-backup/ kafka-backup-dev:latest bash -c  \
58 |         'rm /kafka-backup/04_delete_old_segment/backup-test-1partition/segment_partition_000_from_offset_0000000000_records'
59 |     - name: Restore segment and partition indexes
60 |       command: >
61 |         docker run -v /tmp/kafka-backup/:/kafka-backup/ kafka-backup-dev:latest bash -c  '
62 |         export TOPICDIR="/kafka-backup/04_delete_old_segment/backup-test-1partition/" &&
63 |         export CLASSPATH="/connect-plugins/kafka-backup.jar" &&
64 |         for f in "$TOPICDIR"/segment_partition_*_records ; do
65 |           segment-index.sh --restore-index \
66 |           --segment $f
67 |         done &&
68 |         partition-index.sh --restore --partition 0 --topic-dir "$TOPICDIR"'
69 | 
70 | - name: Restore
71 |   entries:
72 |     - name: Create Topic
73 |       command: docker-compose exec -T restore-to-kafka
74 |         utils.py create_topic --topic backup-test-1partition --partitions 1
75 |     - name: Run Kafka Restore
76 |       command: >
77 |         docker run -v /tmp/kafka-backup/:/kafka-backup/ --net=system_test_restore-to --name restore-to --rm
78 |         kafka-backup-dev:latest restore-standalone.sh --bootstrap-server restore-to-kafka:9092
79 |         --source-dir /kafka-backup/04_delete_old_segment/
80 |         --topics 'backup-test-1partition'
81 |       timeout: 60s
82 |       stdout_has: ['All records read.']
83 | 
84 | - name: Verify Backup
85 |   entries:
86 |     - name: Count Messages
87 |       command: docker-compose exec -T restore-to-kafka
88 |         utils.py count_messages
89 |       stdout_has:
90 |         - "backup-test-1partition 0: 1959"
91 |     - name: Verify Records
92 |       command: docker-compose exec -T restore-to-kafka
93 |         utils.py consume_verify_messages --topic backup-test-1partition --partition 0 --start_num 1041 --count 1959
94 | 
95 | - name: Clean-up Containers
96 |   entries:
97 |     - name: Docker Compose Down
98 |       command: docker-compose down
99 | 


--------------------------------------------------------------------------------
/system_test/README.md:
--------------------------------------------------------------------------------
 1 | ## Usage
 2 | 
 3 | Install coyote 
 4 | 
 5 | ```sh
 6 | go get github.com/landoop/coyote
 7 | ```
 8 | 
 9 | Build Kafka Backup (from the root directory):
10 | 
11 | ```sh
12 | ./gradlew shadowJar
13 | ```
14 |   
15 | Then, just run coyote inside this directory.
16 | 
17 | ```
18 | coyote
19 | ```
20 | 
21 | When finished, open `coyote.html`.
22 | 
23 | ## Software
24 | 
25 | You need these programs to run the test:
26 | - [Coyote](https://github.com/Landoop/coyote/releases)
27 | - [Docker](https://docs.docker.com/engine/installation/)
28 | - [Docker Compose](https://docs.docker.com/engine/installation/)
29 | 
30 | Everything else is set up automatically inside containers.


--------------------------------------------------------------------------------
/system_test/archive/001_simple_1partition_test/connect-backup-sink.properties:
--------------------------------------------------------------------------------
 1 | name=backup-sink
 2 | connector.class=de.azapps.kafkabackup.sink.BackupSinkConnector
 3 | tasks.max=1
 4 | topics.regex=backup-test.*
 5 | key.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 6 | value.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 7 | header.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 8 | target.dir=/tmp/kafka-backup/001_simple_1partition_test/
 9 | # 10MiB
10 | max.segment.size.bytes=10485760
11 | cluster.bootstrap.servers=localhost:9092


--------------------------------------------------------------------------------
/system_test/archive/001_simple_1partition_test/connect-backup-source.properties:
--------------------------------------------------------------------------------
 1 | name=backup-source
 2 | connector.class=de.azapps.kafkabackup.source.BackupSourceConnector
 3 | tasks.max=1
 4 | topics=backup-test-1partition,backup-test-empty
 5 | key.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 6 | value.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 7 | header.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 8 | source.dir=/tmp/kafka-backup/001_simple_1partition_test/
 9 | batch.size=1000
10 | cluster.bootstrap.servers=localhost:9092
11 | cluster.key.deserializer=org.apache.kafka.common.serialization.ByteArrayDeserializer
12 | cluster.value.deserializer=org.apache.kafka.common.serialization.ByteArrayDeserializer


--------------------------------------------------------------------------------
/system_test/archive/001_simple_1partition_test/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$(dirname $0)
 3 | DATADIR=/tmp/kafka-backup/001_simple_1partition_test/
 4 | source $SCRIPT_DIR/../utils.sh
 5 | 
 6 | # Uses confluent cli
 7 | #
 8 | # * Stop kafka if its running
 9 | kafka_stop
10 | # * Delete all data
11 | kafka_delete_data
12 | rm -rf $DATADIR
13 | mkdir -p $DATADIR
14 | 
15 | 
16 | ########################## Generate Data
17 | # * Start Kafka
18 | kafka_start
19 | # * Configure following topics:
20 | #   * backup-test-1partition
21 | create_topic backup-test-1partition 1
22 | produce_messages backup-test-1partition 0 0 300
23 | consume_messages backup-test-1partition cg-100 100
24 | consume_messages backup-test-1partition cg-200 200
25 | consume_messages backup-test-1partition cg-300 300
26 | 
27 | # 1 empty partition, one full
28 | create_topic backup-test-empty 3
29 | produce_messages backup-test-empty 0 0 300
30 | 
31 | 
32 | 
33 | ########################## Backup
34 | # * Start Kafka Connect distributed
35 | kafka_connect_start
36 | # * Configure backup-sink:
37 | #   * segment size: 10MiB
38 | #   * topics.regex: backup-test-1partition
39 | kafka_connect_load_connector 001_simple_1partition_test_sink "$SCRIPT_DIR/connect-backup-sink.properties"
40 | # * Wait a few minutes
41 | sleep $((60*5))
42 | 
43 | 
44 | ########################## Destroy & Restore Cluster
45 | # * Stop Kafka
46 | kafka_stop
47 | # * Delete all data
48 | kafka_delete_data
49 | # * Start Kafka
50 | kafka_start
51 | # * Create all 3 topics as above (we are not testing zookeeper backup!)
52 | create_topic backup-test-1partition 1
53 | create_topic backup-test-empty 3
54 | 
55 | 
56 | ########################## Restore topic
57 | # * Start Kafka Connect distributed
58 | kafka_connect_start
59 | # * Configure backup-source
60 | kafka_connect_load_connector 001_simple_1partition_test_source $SCRIPT_DIR/connect-backup-source.properties
61 | kafka_connect_unload_connector 001_simple_1partition_test_source
62 | 
63 | consume_verify_messages backup-test-1partition 0 300
64 | 


--------------------------------------------------------------------------------
/system_test/archive/backup_with_burry/connect-backup-sink.properties:
--------------------------------------------------------------------------------
 1 | name=backup-sink
 2 | connector.class=de.azapps.kafkabackup.sink.BackupSinkConnector
 3 | tasks.max=1
 4 | topics.regex=backup-test.*
 5 | key.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 6 | value.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 7 | header.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 8 | target.dir=/tmp/kafka-backup/backup_with_burry/topics
 9 | # 10MiB
10 | max.segment.size.bytes=10485760
11 | cluster.bootstrap.servers=localhost:9092


--------------------------------------------------------------------------------
/system_test/archive/backup_with_burry/connect-backup-source.properties:
--------------------------------------------------------------------------------
 1 | name=backup-source
 2 | connector.class=de.azapps.kafkabackup.source.BackupSourceConnector
 3 | tasks.max=1
 4 | topics=backup-test-1partition
 5 | key.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 6 | value.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 7 | header.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 8 | source.dir=/tmp/kafka-backup/backup_with_burry/topics
 9 | batch.size=1000
10 | cluster.bootstrap.servers=localhost:9092
11 | cluster.key.deserializer=org.apache.kafka.common.serialization.ByteArrayDeserializer
12 | cluster.value.deserializer=org.apache.kafka.common.serialization.ByteArrayDeserializer


--------------------------------------------------------------------------------
/system_test/archive/backup_with_burry/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$(dirname $0)
 3 | DATADIR=/tmp/kafka-backup/backup_with_burry
 4 | source $SCRIPT_DIR/../utils.sh
 5 | 
 6 | # Uses confluent cli
 7 | #
 8 | # * Stop kafka if its running
 9 | kafka_stop
10 | # * Delete all data
11 | kafka_delete_data
12 | rm -rf $DATADIR
13 | mkdir -p $DATADIR/{burry,topics}
14 | 
15 | 
16 | ########################## Generate Data
17 | # * Start Kafka
18 | kafka_start
19 | # * Configure following topics:
20 | #   * backup-test-1partition
21 | create_topic backup-test-1partition 1
22 | produce_messages backup-test-1partition 0 0 300
23 | consume_messages backup-test-1partition cg-100 100
24 | consume_messages backup-test-1partition cg-200 200
25 | consume_messages backup-test-1partition cg-300 300
26 | 
27 | 
28 | ########################## Backup
29 | # * Start Kafka Connect distributed
30 | kafka_connect_start
31 | # * Configure backup-sink:
32 | #   * segment size: 10MiB
33 | #   * topics.regex: backup-test-1partition
34 | kafka_connect_load_connector 001_simple_1partition_test_sink "$SCRIPT_DIR/connect-backup-sink.properties"
35 | # * Wait a few minutes
36 | sleep $((60*5))
37 | 
38 | ########################## Backup Zookeeper
39 | burry_backup $DATADIR/burry
40 | 
41 | ########################## Destroy & Restore Cluster
42 | # * Stop Kafka
43 | kafka_stop
44 | # * Delete all data
45 | kafka_delete_data
46 | # * Start Kafka
47 | kafka_start
48 | 
49 | ########################## Restore Zookeeper
50 | burry_restore $DATADIR/burry
51 | # Restart Kafka
52 | kafka_stop
53 | kafka_start
54 | 
55 | 
56 | ########################## Restore topic
57 | # * Start Kafka Connect distributed
58 | kafka_connect_start
59 | # * Configure backup-source
60 | kafka_connect_load_connector 001_simple_1partition_test_source $SCRIPT_DIR/connect-backup-source.properties
61 | sleep $((60*5))
62 | kafka_connect_unload_connector 001_simple_1partition_test_source
63 | 
64 | consume_verify_messages backup-test-1partition 0 300
65 | 


--------------------------------------------------------------------------------
/system_test/archive/full_test/connect-backup-sink.properties:
--------------------------------------------------------------------------------
 1 | name=backup-sink
 2 | connector.class=de.azapps.kafkabackup.sink.BackupSinkConnector
 3 | tasks.max=1
 4 | topics.regex=backup-test.*
 5 | key.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 6 | value.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 7 | header.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 8 | target.dir=/tmp/kafka-backup/full_test
 9 | # 10MiB
10 | max.segment.size.bytes=10485760
11 | cluster.bootstrap.servers=localhost:9092


--------------------------------------------------------------------------------
/system_test/archive/full_test/connect-backup-source.properties:
--------------------------------------------------------------------------------
 1 | name=backup-source
 2 | connector.class=de.azapps.kafkabackup.source.BackupSourceConnector
 3 | tasks.max=1
 4 | topics=backup-test-1partition,backup-test-3partitions,backup-test-10partitions
 5 | key.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 6 | value.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 7 | header.converter=org.apache.kafka.connect.converters.ByteArrayConverter
 8 | source.dir=/tmp/kafka-backup/full_test/
 9 | batch.size=1000
10 | cluster.bootstrap.servers=localhost:9092
11 | cluster.key.deserializer=org.apache.kafka.common.serialization.ByteArrayDeserializer
12 | cluster.value.deserializer=org.apache.kafka.common.serialization.ByteArrayDeserializer


--------------------------------------------------------------------------------
/system_test/archive/full_test/round_trip.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$(dirname $0)
 3 | DATADIR=/tmp/kafka-backup/full_test/
 4 | source $SCRIPT_DIR/../utils.sh
 5 | NUM_MSG=100
 6 | 
 7 | # Uses confluent cli
 8 | #
 9 | # * Stop kafka if its running
10 | kafka_stop
11 | # * Delete all data
12 | kafka_delete_data
13 | rm -rf $DATADIR
14 | mkdir -p $DATADIR
15 | # * Start Kafka
16 | kafka_start
17 | # * Configure following topics:
18 | #   * backup-test-1partition
19 | #   * backup-test-3partitions
20 | create_topic backup-test-1partition 1
21 | create_topic backup-test-3partitions 3
22 | # * Produce 3,00 messages, 10KiB each to each partition
23 | #
24 | # We need to chunk the production of messages as otherwise we cannot
25 | # guarantee that the group consumer will evenly consume the partitions.
26 | produce_messages backup-test-1partition 0 0 $((3 * NUM_MSG))
27 | # backup-test-3partition
28 | produce_messages backup-test-3partitions 0 0 $((3 * NUM_MSG))
29 | produce_messages backup-test-3partitions 1 0 $((3 * NUM_MSG))
30 | produce_messages backup-test-3partitions 2 0 $((3 * NUM_MSG))
31 | # * Consume all messages with consumer-group `cg-3k`
32 | consume_messages backup-test-1partition cg-3k $((3 * NUM_MSG))
33 | consume_messages backup-test-3partitions cg-3k $((9 * NUM_MSG))
34 | kafka_group_describe cg-3k
35 | # * Produce 2 * NUM_MSG messages
36 | produce_messages backup-test-1partition 0 $((3 * NUM_MSG)) $((2 * NUM_MSG))
37 | # backup-test-3partition
38 | produce_messages backup-test-3partitions 0 $((3 * NUM_MSG)) $((2 * NUM_MSG))
39 | produce_messages backup-test-3partitions 1 $((3 * NUM_MSG)) $((2 * NUM_MSG))
40 | produce_messages backup-test-3partitions 2 $((3 * NUM_MSG)) $((2 * NUM_MSG))
41 | # * Consume all messages with consumer-group `cg-5k`
42 | consume_messages backup-test-1partition cg-5k $((5 * NUM_MSG))
43 | consume_messages backup-test-3partitions cg-5k $((15 * NUM_MSG))
44 | # * Produce 100 more messages
45 | produce_messages backup-test-1partition 0 $((5 * NUM_MSG)) $((1 * NUM_MSG))
46 | # backup-test-3partition
47 | produce_messages backup-test-3partitions 0 $((5 * NUM_MSG)) $((1 * NUM_MSG))
48 | produce_messages backup-test-3partitions 1 $((5 * NUM_MSG)) $((1 * NUM_MSG))
49 | produce_messages backup-test-3partitions 2 $((5 * NUM_MSG)) $((1 * NUM_MSG))
50 | # * Start Kafka Connect distributed
51 | kafka_connect_start
52 | # * Configure backup-sink:
53 | #   * segment size: 10MiB
54 | #   * topics.regex: backup-test-*
55 | sleep 10
56 | kafka_connect_load_connector backup-sink "$SCRIPT_DIR/connect-backup-sink.properties"
57 | sleep 10
58 | # * Create another topic:
59 | #   * backup-test-10partitions
60 | 
61 | create_topic backup-test-10partitions 10
62 | # * Produce 1,00 messages as above and consume 500 messages as above
63 | for i in {0..9} ; do
64 |     produce_messages backup-test-10partitions $i 0 $((5 * NUM_MSG))
65 | done
66 | # To force segmentation rolling
67 | produce_messages backup-test-1partition 0 $((6 * NUM_MSG)) $((15 * NUM_MSG))
68 | # Consume some messages
69 | # * Wait a few minutes
70 | sleep $((60*5))
71 | # * Stop Kafka
72 | kafka_stop
73 | # * Delete all data
74 | kafka_delete_data
75 | # * Start Kafka
76 | kafka_start
77 | # * Create all 3 topics as above (we are not testing zookeeper backup!)
78 | create_topic backup-test-1partition 1
79 | create_topic backup-test-3partitions 3
80 | create_topic backup-test-10partitions 10
81 | # * Start Kafka Connect distributed
82 | kafka_connect_start
83 | # * Configure backup-source
84 | kafka_connect_load_connector backup-source $SCRIPT_DIR/connect-backup-source.properties
85 | # * Wait for restore to finish
86 | sleep $((60*15))
87 | kafka_connect_unload_connector backup-source
88 | # * Read all messages and check that they are the same as the ones that were written.
89 | # * Subscribe to Kafka using the consumer groups as above (`cg-5k` and `cg-3k`) and check whether they are at the correct position
90 | 
91 | consume_verify_messages backup-test-1partition 0 $((21 * NUM_MSG))
92 | for i in {0..2} ; do
93 |     consume_verify_messages backup-test-3partitions $i $((6 * NUM_MSG))
94 | done
95 | for i in {0..9} ; do
96 |     consume_verify_messages backup-test-10partitions $i $((5 * NUM_MSG))
97 | done
98 | 


--------------------------------------------------------------------------------
/system_test/archive/utils.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | kafka_start() {
  4 |   confluent local start kafka
  5 | }
  6 | 
  7 | kafka_stop() {
  8 |   confluent local stop
  9 | }
 10 | 
 11 | kafka_delete_data() {
 12 |   rm -r /tmp/confluent.*
 13 | }
 14 | 
 15 | kafka-topics() {
 16 |   "$CONFLUENT_HOME/bin/kafka-topics" $@
 17 | }
 18 | kafka-console-consumer() {
 19 |   "$CONFLUENT_HOME/bin/kafka-console-consumer" $@
 20 | }
 21 | kafka-consumer-groups() {
 22 |   "$CONFLUENT_HOME/bin/kafka-consumer-groups" $@
 23 | }
 24 | 
 25 | create_topic() {
 26 |   TOPIC=$1
 27 |   PARTITIONS=$2
 28 |   if [ -z "$PARTITIONS" ]; then
 29 |     echo "USAGE: $0 [TOPIC] [PARTITIONS]"
 30 |     return 255
 31 |   fi
 32 | 
 33 |   kafka-topics --create --bootstrap-server localhost:9092 --topic "$TOPIC" --partitions "$PARTITIONS" --replication-factor 1
 34 | }
 35 | 
 36 | gen_message() {
 37 |   PARTITION=$1
 38 |   NUM=$2
 39 |   if [ -z "$NUM" ]; then
 40 |     echo "USAGE: $0 [PARTITION] [NUM] (SIZE)"
 41 |     return 255
 42 |   fi
 43 |   SIZE=$3
 44 |   if [ -z "$SIZE" ]; then
 45 |     SIZE=7500 # 10k Bytes base64
 46 |   fi
 47 |   VALUE=$(dd if=/dev/urandom bs=$SIZE count=1 2>/dev/null | base64 -w0)
 48 |   CHECKSUM=$(echo "$VALUE" | md5sum | cut -d' ' -f1)
 49 |   KEY="part_${PARTITION}_num_${NUM}_${CHECKSUM}"
 50 |   echo "${KEY},${VALUE}"
 51 | }
 52 | 
 53 | gen_messages() {
 54 |   PARTITION=$1
 55 |   START_NUM=$2
 56 |   COUNT=$3
 57 |   if [ -z "$COUNT" ]; then
 58 |     echo "USAGE: $0 [PARTITION] [START_NUM] [COUNT] (SIZE)"
 59 |     return 255
 60 |   fi
 61 |   SIZE=$4
 62 |   for NUM in {$START_NUM..$((START_NUM + COUNT - 1))}; do
 63 |     if [ "0" -eq "$(((NUM - START_NUM) % 100))" ]; then
 64 |       echo -e -n "\rProduced $((NUM - START_NUM))/$COUNT messages" >/dev/stderr
 65 |     fi
 66 |     gen_message "$PARTITION" $NUM "$SIZE"
 67 |   done
 68 |   echo ""
 69 | }
 70 | 
 71 | produce_messages() {
 72 |   TOPIC=$1
 73 |   PARTITION=$2
 74 |   START_NUM=$3
 75 |   COUNT=$4
 76 |   if [ -z "$COUNT" ]; then
 77 |     echo "USAGE: $0 [TOPIC] [PARTITION] [START_NUM] [COUNT] (SIZE)"
 78 |     return 255
 79 |   fi
 80 |   SIZE=$5
 81 | 
 82 |   gen_messages "$PARTITION" "$START_NUM" "$COUNT" "$SIZE" | kafkacat -P -b localhost:9092 -t "$TOPIC" -p "$PARTITION" -K ","
 83 | }
 84 | 
 85 | verify_messages() {
 86 |   PREVIOUS_NUM="-1"
 87 |   while read -r MESSAGE; do
 88 |     if [ "0" -eq "$(((PREVIOUS_NUM + 1) % 10))" ]; then
 89 |       echo -e -n "\rVerified $((PREVIOUS_NUM + 1)) messages" >/dev/stderr
 90 |     fi
 91 |     KEY=$(echo "$MESSAGE" | awk '{print $1}')
 92 |     KEY_MATCH=$(echo "$KEY" | sed 's/part_\([0-9]*\)_num_\([0-9]*\)_\(.*\)$/\1\t\2\t\3/')
 93 |     KEY_PARTITION=$(echo "$KEY_MATCH" | awk '{print $1}')
 94 |     KEY_NUM=$(echo "$KEY_MATCH" | awk '{print $2}')
 95 |     KEY_CHECKSUM=$(echo "$KEY_MATCH" | awk '{print $3}')
 96 | 
 97 |     VALUE=$(echo "$MESSAGE" | awk '{print $2}')
 98 |     VALUE_CHECKSUM=$(echo "$VALUE" | md5sum | cut -d' ' -f1)
 99 | 
100 |     if [ ! "$KEY_NUM" -eq "$((PREVIOUS_NUM + 1))" ]; then
101 |       echo "Missing message. Previous message has num $PREVIOUS_NUM. This message has num $KEY_NUM"
102 |       return 255
103 |     fi
104 |     PREVIOUS_NUM=$KEY_NUM
105 | 
106 |     if [ "$KEY_CHECKSUM" != "$VALUE_CHECKSUM" ]; then
107 |       echo "Partition $KEY_PARTITION, Key $KEY_NUM, KChk $KEY_CHECKSUM, vlength ${#VALUE}, vchk: $VALUE_CHECKSUM"
108 | 
109 |       echo "Checksum mismatch: Checksum in key ($KEY_CHECKSUM) does not match Checksum of value ($VALUE_CHECKSUM)"
110 |       return 255
111 |     fi
112 |   done
113 |   echo -e "\rVerified $((PREVIOUS_NUM + 1)) messages"
114 | }
115 | 
116 | consume_verify_messages() {
117 |   TOPIC=$1
118 |   PARTITION=$2
119 |   COUNT=$3
120 |   if [ -z "$COUNT" ]; then
121 |     echo "USAGE: $0 [TOPIC] [PARTITION] [COUNT]"
122 |     return 255
123 |   fi
124 | 
125 |   kafka-console-consumer \
126 |     --bootstrap-server localhost:9092 \
127 |     --from-beginning --property print.key=true \
128 |     --topic "$TOPIC" \
129 |     --max-messages="$COUNT" \
130 |     --partition="$PARTITION" 2>&/dev/null |
131 |     verify_messages
132 | }
133 | 
134 | consume_messages() {
135 |   TOPIC=$1
136 |   CONSUMER_GROUP=$2
137 |   COUNT=$3
138 |   if [ -z "$COUNT" ]; then
139 |     echo "USAGE: $0 [TOPIC] [CONSUMER GROUP] [COUNT]"
140 |     return 255
141 |   fi
142 | 
143 |   MESSAGES=$(kafka-console-consumer \
144 |     --bootstrap-server localhost:9092 \
145 |     --from-beginning --property print.key=true \
146 |     --topic "$TOPIC" \
147 |     --max-messages "$COUNT" \
148 |     --group "$CONSUMER_GROUP") # 2>/dev/null)
149 |       echo "Consumed $(echo "$MESSAGES" | wc -l) messages"
150 | }
151 | 
152 | kafka_connect_start() {
153 |   confluent local start connect
154 | }
155 | 
156 | kafka_connect_load_connector() {
157 |   NAME=$1
158 |   PROPS="$2"
159 |   if [ -z "$PROPS" ]; then
160 |     echo "USAGE: $0 [NAME] [PROPS FILE]"
161 |     return 255
162 |   fi
163 |   cp "$PROPS" /tmp/connect.properties
164 |   confluent local load "$NAME" -- -d /tmp/connect.properties
165 |   rm /tmp/connect.properties
166 | }
167 | 
168 | kafka_connect_unload_connector() {
169 |   NAME=$1
170 |   if [ -z "$NAME" ]; then
171 |     echo "USAGE: $0 [NAME]"
172 |     return 255
173 |   fi
174 |   confluent local unload "$NAME"
175 | }
176 | 
177 | kafka_group_describe() {
178 |   GROUP=$1
179 |   if [ -z "$GROUP" ]; then
180 |     echo "USAGE: $0 [GROUP]"
181 |     return 255
182 |   fi
183 |   kafka-consumer-groups --bootstrap-server localhost:9092 --describe --group "$GROUP"
184 | }
185 | 
186 | burry_backup() {
187 |   TARGET_DIR=$1
188 |   if [ -z "$TARGET_DIR" ]; then
189 |     echo "USAGE: $0 [TARGET_DIR]"
190 |     return 255
191 |   fi
192 |   docker run --network=host -v "$TARGET_DIR":/data azapps/burry -e localhost:2181 -t local
193 | }
194 | 
195 | burry_restore() {
196 |   SOURCE_DIR=$1
197 |   if [ -z "$SOURCE_DIR" ]; then
198 |     echo "USAGE: $0 [SOURCE_DIR]"
199 |     return 255
200 |   fi
201 |   SNAPSHOT=$(ls "$DATADIR"/burry | tail -n 1 | sed 's/.zip//')
202 |   docker run --network=host -v "$SOURCE_DIR":/data azapps/burry --operation=restore --snapshot="$SNAPSHOT" -e localhost:2181 -t local
203 | }
204 | 


--------------------------------------------------------------------------------
/system_test/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.3'
 2 | services:
 3 |   # To Backup
 4 |   to-backup-zk:
 5 |     build:
 6 |       context: ./utils
 7 |       dockerfile: Dockerfile
 8 |     container_name: to-backup-zk
 9 |     hostname: to-backup-zk
10 |     ports:
11 |       - 12181:2181
12 |     volumes:
13 |       - "./utils/kafka-configs/zookeeper.properties:/etc/zookeeper.properties"
14 |     command: "zookeeper-server-start.sh /etc/zookeeper.properties"
15 |     networks:
16 |       - to-backup
17 | 
18 |   to-backup-kafka:
19 |     build:
20 |       context: ./utils
21 |       dockerfile: Dockerfile
22 |     container_name: to-backup-kafka
23 |     hostname: to-backup-kafka
24 |     volumes:
25 |       - "./utils/kafka-configs/to-backup-kafka.properties:/etc/kafka.properties"
26 |       - "./utils/utils.py:/usr/bin/utils.py"
27 |     ports:
28 |       - 19092:19092
29 |     networks:
30 |       - to-backup
31 |     depends_on:
32 |       - to-backup-zk
33 |     command: "kafka-server-start.sh /etc/kafka.properties"
34 | 
35 |   ################################################################################################
36 |   # Restore to
37 | 
38 |   restore-to-zk:
39 |     build:
40 |       context: ./utils
41 |       dockerfile: Dockerfile
42 |     container_name: restore-to-zk-1
43 |     hostname: restore-to-zk-1
44 |     ports:
45 |       - 22181:2181
46 |     volumes:
47 |       - "./utils/kafka-configs/zookeeper.properties:/etc/zookeeper.properties"
48 |     command: "zookeeper-server-start.sh /etc/zookeeper.properties"
49 |     networks:
50 |       - restore-to
51 | 
52 |   restore-to-kafka:
53 |     build:
54 |       context: ./utils
55 |       dockerfile: Dockerfile
56 |     container_name: restore-to-kafka
57 |     hostname: restore-to-kafka
58 |     volumes:
59 |       - "./utils/kafka-configs/restore-to-kafka.properties:/etc/kafka.properties"
60 |       - "./utils/utils.py:/usr/bin/utils.py"
61 |     ports:
62 |       - 29092:29092
63 |     networks:
64 |       - restore-to
65 |     depends_on:
66 |       - restore-to-zk
67 |     command: "kafka-server-start.sh /etc/kafka.properties"
68 | 
69 | networks:
70 |   to-backup:
71 |   restore-to:


--------------------------------------------------------------------------------
/system_test/utils/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM kafka-backup-dev:latest
 2 | 
 3 | RUN apk add --no-cache make gcc g++ cmake curl pkgconfig perl bsd-compat-headers zlib-dev lz4-dev openssl-dev \
 4 |  curl-dev libcurl lz4-libs ca-certificates python3 bash python3-dev
 5 | 
 6 | # Build librdkafka
 7 | RUN mkdir /usr/src && cd /usr/src/ && \
 8 |     curl https://codeload.github.com/edenhill/librdkafka/tar.gz/master | tar xzf - && \
 9 |     cd librdkafka-master && \
10 |     ./configure && \
11 |     make && make install && \
12 |     cd / && rm -rf /usr/src/
13 | 
14 | # Install confluent-kafka python
15 | 
16 | RUN pip3 install confluent-kafka==1.3.0 pykafka==2.8.0dev1
17 | COPY utils.py /usr/bin/utils.py


--------------------------------------------------------------------------------
/system_test/utils/kafka-configs/restore-to-kafka.properties:
--------------------------------------------------------------------------------
 1 | broker.id=0
 2 | log.dirs=/tmp/kafka-logs
 3 | zookeeper.connect=restore-to-zk:2181
 4 | advertised.listeners=INDOCKER://restore-to-kafka:9092,OUTDOCKER://localhost:29092
 5 | listeners=INDOCKER://:9092,OUTDOCKER://:29092
 6 | listener.security.protocol.map=INDOCKER:PLAINTEXT,OUTDOCKER:PLAINTEXT
 7 | inter.broker.listener.name=INDOCKER
 8 | offsets.topic.replication.factor=1
 9 | offsets.topic.num.partitions=1
10 | auto.create.topics.enable=false


--------------------------------------------------------------------------------
/system_test/utils/kafka-configs/to-backup-kafka.properties:
--------------------------------------------------------------------------------
 1 | broker.id=0
 2 | log.dirs=/tmp/kafka-logs
 3 | zookeeper.connect=to-backup-zk:2181
 4 | advertised.listeners=INDOCKER://to-backup-kafka:9092,OUTDOCKER://localhost:19092
 5 | listeners=INDOCKER://:9092,OUTDOCKER://:19092
 6 | listener.security.protocol.map=INDOCKER:PLAINTEXT,OUTDOCKER:PLAINTEXT
 7 | inter.broker.listener.name=INDOCKER
 8 | offsets.topic.replication.factor=1
 9 | offsets.topic.num.partitions=1
10 | auto.create.topics.enable=false


--------------------------------------------------------------------------------
/system_test/utils/kafka-configs/zookeeper.properties:
--------------------------------------------------------------------------------
1 | dataDir=/tmp/zookeeper
2 | clientPort=2181
3 | maxClientCnxns=0


--------------------------------------------------------------------------------
/system_test/utils/runutil:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | source /root/utils.sh
3 | "$@"


--------------------------------------------------------------------------------
/system_test/utils/utils.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | create_topic() {
  4 |   TOPIC=$1
  5 |   PARTITIONS=$2
  6 |   if [ -z "$PARTITIONS" ] || [ -n "$3" ]; then
  7 |     echo "USAGE: $0 [TOPIC] [PARTITIONS]"
  8 |     return 255
  9 |   fi
 10 | 
 11 |   kafka-topics --create --bootstrap-server localhost:9092 --topic "$TOPIC" --partitions "$PARTITIONS" --replication-factor 1
 12 | }
 13 | export -f create_topic
 14 | 
 15 | gen_message() {
 16 |   PARTITION=$1
 17 |   NUM=$2
 18 |   SIZE=$3
 19 |   if [ -z "$NUM" ] || [ -n "$4" ]; then
 20 |     echo "USAGE: $0 [PARTITION] [NUM] (SIZE)"
 21 |     return 255
 22 |   fi
 23 |   if [ -z "$SIZE" ]; then
 24 |     SIZE=7500 # 10k Bytes base64
 25 |   fi
 26 |   VALUE=$(dd if=/dev/urandom bs=$SIZE count=1 2>/dev/null | base64 -w0)
 27 |   CHECKSUM=$(echo "$VALUE" | md5sum | cut -d' ' -f1)
 28 |   KEY="part_${PARTITION}_num_${NUM}_${CHECKSUM}"
 29 |   echo "${KEY},${VALUE}"
 30 | }
 31 | export -f gen_message
 32 | 
 33 | gen_messages() {
 34 |   PARTITION=$1
 35 |   START_NUM=$2
 36 |   COUNT=$3
 37 |   if [ -z "$COUNT" ] || [ -n "$4" ]; then
 38 |     echo "USAGE: $0 [PARTITION] [START_NUM] [COUNT] (SIZE)"
 39 |     return 255
 40 |   fi
 41 |   SIZE=$4
 42 |   for NUM in $(seq $START_NUM $((START_NUM + COUNT - 1))); do
 43 |     if [ "0" -eq "$(((NUM - START_NUM) % 100))" ]; then
 44 |       echo -e -n "\rProduced $((NUM - START_NUM))/$COUNT messages" >/dev/stderr
 45 |     fi
 46 |     gen_message "$PARTITION" $NUM "$SIZE"
 47 |   done
 48 |   echo ""
 49 | }
 50 | export -f gen_messages
 51 | 
 52 | produce_messages() {
 53 |   TOPIC=$1
 54 |   PARTITION=$2
 55 |   START_NUM=$3
 56 |   COUNT=$4
 57 |   SIZE=$5
 58 |   if [ -z "$COUNT" ] || [ -n "$6" ]; then
 59 |     echo "USAGE: $0 [TOPIC] [PARTITION] [START_NUM] [COUNT] (SIZE)"
 60 |     return 255
 61 |   fi
 62 | 
 63 |   gen_messages "$PARTITION" "$START_NUM" "$COUNT" "$SIZE" | kafkacat -P -b localhost:9092 -t "$TOPIC" -p "$PARTITION" -K ","
 64 | }
 65 | export -f produce_messages
 66 | 
 67 | verify_messages() {
 68 |   PREVIOUS_NUM="$1"
 69 |   if [ -z "$PREVIOUS_NUM" ]; then
 70 |     PREVIOUS_NUM="-1"
 71 |   fi
 72 |   while read -r MESSAGE; do
 73 |     if [ "0" -eq "$(((PREVIOUS_NUM + 1) % 100))" ]; then
 74 |       echo -e -n "\rVerified $((PREVIOUS_NUM + 1)) messages" >/dev/stderr
 75 |     fi
 76 |     KEY=$(echo "$MESSAGE" | awk '{print $1}')
 77 |     KEY_MATCH=$(echo "$KEY" | sed 's/part_\([0-9]*\)_num_\([0-9]*\)_\(.*\)$/\1\t\2\t\3/')
 78 |     KEY_PARTITION=$(echo "$KEY_MATCH" | awk '{print $1}')
 79 |     KEY_NUM=$(echo "$KEY_MATCH" | awk '{print $2}')
 80 |     KEY_CHECKSUM=$(echo "$KEY_MATCH" | awk '{print $3}')
 81 | 
 82 |     VALUE=$(echo "$MESSAGE" | awk '{print $2}')
 83 |     VALUE_CHECKSUM=$(echo "$VALUE" | md5sum | cut -d' ' -f1)
 84 | 
 85 |     if [ ! "$KEY_NUM" -eq "$((PREVIOUS_NUM + 1))" ]; then
 86 |       echo "Missing message. Previous message has num $PREVIOUS_NUM. This message has num $KEY_NUM"
 87 |       return 255
 88 |     fi
 89 |     PREVIOUS_NUM=$KEY_NUM
 90 | 
 91 |     if [ "$KEY_CHECKSUM" != "$VALUE_CHECKSUM" ]; then
 92 |       echo "Partition $KEY_PARTITION, Key $KEY_NUM, KChk $KEY_CHECKSUM, vlength ${#VALUE}, vchk: $VALUE_CHECKSUM"
 93 | 
 94 |       echo "Checksum mismatch: Checksum in key ($KEY_CHECKSUM) does not match Checksum of value ($VALUE_CHECKSUM)"
 95 |       return 255
 96 |     fi
 97 |   done
 98 |   echo -e "\rVerified $((PREVIOUS_NUM + 1)) messages"
 99 | }
100 | export -f verify_messages
101 | 
102 | consume_verify_messages() {
103 |   TOPIC=$1
104 |   PARTITION=$2
105 |   START_NUM=$3
106 |   COUNT=$4
107 |   if [ -z "$COUNT" ]; then
108 |     COUNT="$START_NUM"
109 |     START_NUM="0"
110 |   fi
111 | 
112 |   if [ -z "$COUNT" ] || [ -n "$5" ]; then
113 |     echo "USAGE: $0 [TOPIC] [PARTITION] ([START_NUM]) [COUNT]"
114 |     return 255
115 |   fi
116 | 
117 |   kafka-console-consumer \
118 |     --bootstrap-server localhost:9092 \
119 |     --from-beginning --property print.key=true \
120 |     --topic "$TOPIC" \
121 |     --max-messages="$COUNT" \
122 |     --partition="$PARTITION" 2>/dev/null |
123 |     verify_messages $((START_NUM - 1))
124 | }
125 | export -f consume_verify_messages
126 | 
127 | consume_messages() {
128 |   TOPIC=$1
129 |   CONSUMER_GROUP=$2
130 |   COUNT=$3
131 |   if [ -z "$COUNT" ] || [ -n "$4" ]; then
132 |     echo "USAGE: $0 [TOPIC] [CONSUMER GROUP] [COUNT]"
133 |     return 255
134 |   fi
135 | 
136 |   MESSAGES=$(kafka-console-consumer \
137 |     --bootstrap-server localhost:9092 \
138 |     --from-beginning --property print.key=true \
139 |     --topic "$TOPIC" \
140 |     --max-messages "$COUNT" \
141 |     --group "$CONSUMER_GROUP") # 2>/dev/null)
142 |   echo "Consumed $(echo "$MESSAGES" | wc -l) messages"
143 | }
144 | export -f consume_messages
145 | 
146 | kafka_group_describe() {
147 |   GROUP=$1
148 |   if [ -z "$GROUP" ] || [ -n "$2" ]; then
149 |     echo "USAGE: $0 [GROUP]"
150 |     return 255
151 |   fi
152 |   kafka-consumer-groups --bootstrap-server localhost:9092 --describe --group "$GROUP"
153 | }
154 | export -f kafka_group_describe
155 | 
156 | burry_backup() {
157 |   TARGET_DIR=$1
158 |   if [ -z "$TARGET_DIR" ] || [ -n "$2" ]; then
159 |     echo "USAGE: $0 [TARGET_DIR]"
160 |     return 255
161 |   fi
162 |   docker run --network=host -v "$TARGET_DIR":/data azapps/burry -e localhost:2181 -t local
163 | }
164 | export -f burry_backup
165 | 
166 | burry_restore() {
167 |   SOURCE_DIR=$1
168 |   if [ -z "$SOURCE_DIR" ] || [ -n "$2" ]; then
169 |     echo "USAGE: $0 [SOURCE_DIR]"
170 |     return 255
171 |   fi
172 |   SNAPSHOT=$(ls "$DATADIR"/burry | tail -n 1 | sed 's/.zip//')
173 |   docker run --network=host -v "$SOURCE_DIR":/data azapps/burry --operation=restore --snapshot="$SNAPSHOT" -e localhost:2181 -t local
174 | }
175 | export -f burry_restore
176 | 


--------------------------------------------------------------------------------