├── .github ├── dependabot.yml └── workflows │ ├── ci.yml │ ├── codeql-analysis.yml │ ├── dependency-review.yml │ ├── go.yml │ └── golangci-lint.yml ├── .gitignore ├── .vscode └── settings.json ├── LICENSE ├── README.md ├── cmd └── zstdseek │ ├── go.mod │ ├── go.sum │ └── main.go └── pkg ├── decoder.go ├── decoder_test.go ├── doc.go ├── doc_test.go ├── encoder.go ├── encoder_test.go ├── env ├── environments.go └── frame_offset.go ├── example_test.go ├── go.mod ├── go.sum ├── reader.go ├── reader_fuzz_test.go ├── reader_options.go ├── reader_test.go ├── seekable.go ├── seekable_fuzz_test.go ├── seekable_test.go ├── testdata ├── fuzz │ ├── FuzzReader │ │ ├── 031af68894ffe89e8860b612d9341b8f4270ec992df264751325ec4828ff8ad4 │ │ ├── 0bed2d41cfe3d295a3e183a3a0e658c19f9fd792f9b79310f698a4d6f517a7a6 │ │ ├── 2731075ae7d169285366e009d02826aba105e7470063f29bcf059171d9e76074 │ │ ├── 302d18d199dcc9f0f854687128e6e51f686d130ab36266e2f9cfc98a0bb0d105 │ │ ├── 3f17aa2f634b16b95c0cfbcad9894f23255ad4b0e1fc78f54146ac260330d86d │ │ ├── 55a6d9e48f240cf1cd588b5bf9aac882dbcfb765f4a0f2f0796bdb7271d6c1d2 │ │ ├── 6ac3e90e33489f6374af1d4a80567a49a4e33229f6944ff7a4a1ece24ee1a0ec │ │ ├── 724dfd4c280cae6fdb4a26c64c6084e0afeea42ba22a012a47eae7f132927a83 │ │ ├── 785bc3dd4151628f17e2150b2e453f7e5e5d30b386810e6e2018229dd571a250 │ │ └── f42940a36e752debf0e50f41e949bc36d55a128b9331723b1c91da5248308762 │ └── FuzzRoundTrip │ │ └── 4c7e5250c36d4db0 ├── intercompat-t2sz.zst └── intercompat-zstdseek_v0.zst ├── writer.go ├── writer_options.go └── writer_test.go /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | - package-ecosystem: "gomod" 8 | directories: 9 | - "/pkg" 10 | - "/cmd/zstdseek" 11 | schedule: 12 | interval: "weekly" 13 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | types: [opened, synchronize, closed] 7 | 8 | jobs: 9 | coverage: 10 | name: coverage 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | with: 15 | fetch-depth: 100 16 | - uses: actions/setup-go@v5 17 | with: 18 | go-version: '1.24.4' 19 | check-latest: true 20 | - name: Run coverage 21 | working-directory: ./pkg 22 | run: go test -coverprofile=coverage.out -covermode=count 23 | - name: Upload coverage to Codecov 24 | uses: codecov/codecov-action@v5 25 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | # The branches below must be a subset of the branches above 8 | branches: [ main ] 9 | schedule: 10 | - cron: '40 8 * * 3' 11 | 12 | jobs: 13 | analyze: 14 | name: Analyze 15 | runs-on: ubuntu-latest 16 | permissions: 17 | actions: read 18 | contents: read 19 | security-events: write 20 | 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | language: [ 'go' ] 25 | 26 | steps: 27 | - name: Checkout repository 28 | uses: actions/checkout@v4 29 | 30 | # Initializes the CodeQL tools for scanning. 31 | - name: Initialize CodeQL 32 | uses: github/codeql-action/init@v3 33 | with: 34 | languages: ${{ matrix.language }} 35 | 36 | - name: Autobuild 37 | uses: github/codeql-action/autobuild@v3 38 | 39 | - name: Perform CodeQL Analysis 40 | uses: github/codeql-action/analyze@v3 41 | -------------------------------------------------------------------------------- /.github/workflows/dependency-review.yml: -------------------------------------------------------------------------------- 1 | # Dependency Review Action 2 | # 3 | # This Action will scan dependency manifest files that change as part of a Pull Reqest, surfacing known-vulnerable versions of the packages declared or updated in the PR. Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable packages will be blocked from merging. 4 | # 5 | # Source repository: https://github.com/actions/dependency-review-action 6 | # Public documentation: https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review#dependency-review-enforcement 7 | name: 'Dependency Review' 8 | on: [pull_request] 9 | 10 | permissions: 11 | contents: read 12 | 13 | jobs: 14 | dependency-review: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: 'Checkout Repository' 18 | uses: actions/checkout@v4 19 | - name: 'Dependency Review' 20 | uses: actions/dependency-review-action@v4 21 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: go 2 | 3 | on: 4 | push: 5 | branches: ['*'] 6 | tags: ['v*'] 7 | pull_request: 8 | branches: ['*'] 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | go-version: ['1.24.4'] 16 | dir: ['pkg', 'cmd/zstdseek'] 17 | steps: 18 | - uses: dcarbone/install-jq-action@v3.1.1 19 | - uses: actions/checkout@v4 20 | - name: Setup Go ${{ matrix.go-version }} 21 | uses: actions/setup-go@v5 22 | with: 23 | go-version: ${{ matrix.go-version }} 24 | cache-dependency-path: ${{ matrix.dir }}/go.sum 25 | - name: Display Go version 26 | run: go version 27 | - name: Install dependencies (${{ matrix.dir }}) 28 | working-directory: ./${{ matrix.dir }} 29 | run: | 30 | go get . 31 | - name: go work 32 | run: | 33 | if [ "${{ matrix.dir }}" == "pkg" ]; then 34 | echo "Skipping go workspace for ${{ matrix.dir }}" 35 | rm -f go.work* 36 | exit 0 37 | fi 38 | go work init 39 | go work use pkg 40 | go work use ${{ matrix.dir }} 41 | - name: Build (${{ matrix.dir }}) 42 | working-directory: ./${{ matrix.dir }} 43 | run: | 44 | for OSARCH in $(go tool dist list -json | jq -r '.[] | select(.FirstClass) | [.GOOS , .GOARCH] | join("/")'); do 45 | IFS="/" read -r OS ARCH <<< "$OSARCH" 46 | echo "Building for $OS $ARCH" 47 | GOOS=$OS GOARCH=$ARCH go build ./... 48 | done 49 | - name: Test (${{ matrix.dir }}) 50 | working-directory: ./${{ matrix.dir }} 51 | run: go test -v ./... 52 | -------------------------------------------------------------------------------- /.github/workflows/golangci-lint.yml: -------------------------------------------------------------------------------- 1 | name: golangci-lint 2 | on: 3 | push: 4 | branches: ['*'] 5 | tags: ['v*'] 6 | pull_request: 7 | branches: ['*'] 8 | jobs: 9 | golangci: 10 | name: lint 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: actions/setup-go@v5 15 | with: 16 | go-version: '1.24.4' 17 | check-latest: true 18 | - name: golangci-lint 19 | uses: golangci/golangci-lint-action@v8.0.0 20 | with: 21 | version: latest 22 | working-directory: ./pkg 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | go.work 3 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "go.enableCodeLens": { 3 | "runtest": false 4 | }, 5 | "gopls": { 6 | "formatting.gofumpt": true, 7 | "formatting.local": "github.com/SaveTheRbtz/zstd-seekable-format-go", 8 | "ui.completion.usePlaceholders": true, 9 | "ui.semanticTokens": true, 10 | "ui.codelenses": { 11 | "gc_details": false, 12 | "regenerate_cgo": false, 13 | "generate": false, 14 | "test": false, 15 | "tidy": false, 16 | "upgrade_dependency": false, 17 | "vendor": false 18 | }, 19 | }, 20 | "go.useLanguageServer": true, 21 | "go.buildOnSave": "off", 22 | "go.lintOnSave": "off", 23 | "go.vetOnSave": "off" 24 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Alexey Ivanov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![License][license-img]][license] [![GoDoc][doc-img]][doc] [![Build Status][ci-img]][ci] [![Go Report][report-img]][report] 2 | # ZSTD seekable compression format implementation in Go 3 | [Seekable ZSTD compression format](https://github.com/facebook/zstd/blob/dev/contrib/seekable_format/zstd_seekable_compression_format.md) implemented in Golang. 4 | 5 | This library provides a random access reader (using uncompressed file offsets) for ZSTD-compressed streams. This can be used for creating transparent compression layers. Coupled with Content Defined Chunking (CDC) it can also be used as a robust de-duplication layer. 6 | ## Installation 7 | 8 | `go get -u github.com/SaveTheRbtz/zstd-seekable-format-go/pkg` 9 | 10 | ## Using the seekable format 11 | 12 | Writing is done through the `Writer` interface: 13 | ```go 14 | import ( 15 | "github.com/klauspost/compress/zstd" 16 | seekable "github.com/SaveTheRbtz/zstd-seekable-format-go/pkg" 17 | ) 18 | 19 | enc, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedFastest)) 20 | if err != nil { 21 | log.Fatal(err) 22 | } 23 | defer enc.Close() 24 | 25 | w, err := seekable.NewWriter(f, enc) 26 | if err != nil { 27 | log.Fatal(err) 28 | } 29 | 30 | // Write data in chunks. 31 | for _, b := range [][]byte{[]byte("Hello"), []byte(" "), []byte("World!")} { 32 | _, err = w.Write(b) 33 | if err != nil { 34 | log.Fatal(err) 35 | } 36 | } 37 | 38 | // Close and flush seek table. 39 | err = w.Close() 40 | if err != nil { 41 | log.Fatal(err) 42 | } 43 | ``` 44 | NB! Do not forget to call `Close` since it is responsible for flushing the seek table. 45 | 46 | Reading can either be done through `ReaderAt` interface: 47 | 48 | ```go 49 | dec, err := zstd.NewReader(nil) 50 | if err != nil { 51 | log.Fatal(err) 52 | } 53 | defer dec.Close() 54 | 55 | r, err := seekable.NewReader(f, dec) 56 | if err != nil { 57 | log.Fatal(err) 58 | } 59 | defer r.Close() 60 | 61 | ello := make([]byte, 4) 62 | // ReaderAt 63 | r.ReadAt(ello, 1) 64 | if !bytes.Equal(ello, []byte("ello")) { 65 | log.Fatalf("%+v != ello", ello) 66 | } 67 | ``` 68 | 69 | Or through the `ReadSeeker`: 70 | ```go 71 | world := make([]byte, 5) 72 | // Seeker 73 | r.Seek(-6, io.SeekEnd) 74 | // Reader 75 | r.Read(world) 76 | if !bytes.Equal(world, []byte("World")) { 77 | log.Fatalf("%+v != World", world) 78 | } 79 | ``` 80 | 81 | Seekable format utilizes [ZSTD skippable frames](https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#skippable-frames) so it is a valid ZSTD stream: 82 | 83 | ```go 84 | // Standard ZSTD Reader 85 | f.Seek(0, io.SeekStart) 86 | dec, err := zstd.NewReader(f) 87 | if err != nil { 88 | log.Fatal(err) 89 | } 90 | defer dec.Close() 91 | 92 | all, err := io.ReadAll(dec) 93 | if err != nil { 94 | log.Fatal(err) 95 | } 96 | if !bytes.Equal(all, []byte("Hello World!")) { 97 | log.Fatalf("%+v != Hello World!", all) 98 | } 99 | ``` 100 | 101 | [doc-img]: https://pkg.go.dev/badge/github.com/SaveTheRbtz/zstd-seekable-format-go 102 | [doc]: https://pkg.go.dev/github.com/SaveTheRbtz/zstd-seekable-format-go/pkg 103 | [ci-img]: https://github.com/SaveTheRbtz/zstd-seekable-format-go/actions/workflows/go.yml/badge.svg 104 | [ci]: https://github.com/SaveTheRbtz/zstd-seekable-format-go/actions/workflows/go.yml 105 | [report-img]: https://goreportcard.com/badge/SaveTheRbtz/zstd-seekable-format-go 106 | [report]: https://goreportcard.com/report/SaveTheRbtz/zstd-seekable-format-go 107 | [license-img]: https://img.shields.io/badge/License-MIT-blue.svg 108 | [license]: https://opensource.org/licenses/MIT 109 | -------------------------------------------------------------------------------- /cmd/zstdseek/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/SaveTheRbtz/zstd-seekable-format-go/cmd/zstdseek 2 | 3 | go 1.24.4 4 | 5 | require ( 6 | github.com/SaveTheRbtz/fastcdc-go v0.3.0 7 | github.com/SaveTheRbtz/zstd-seekable-format-go/pkg v0.7.3 8 | github.com/klauspost/compress v1.18.0 9 | github.com/schollz/progressbar/v3 v3.18.0 10 | go.uber.org/zap v1.27.0 11 | golang.org/x/term v0.32.0 12 | ) 13 | 14 | require ( 15 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 16 | github.com/google/btree v1.1.3 // indirect 17 | github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect 18 | github.com/rivo/uniseg v0.4.7 // indirect 19 | go.uber.org/atomic v1.11.0 // indirect 20 | go.uber.org/multierr v1.11.0 // indirect 21 | golang.org/x/sync v0.15.0 // indirect 22 | golang.org/x/sys v0.33.0 // indirect 23 | ) 24 | -------------------------------------------------------------------------------- /cmd/zstdseek/go.sum: -------------------------------------------------------------------------------- 1 | github.com/SaveTheRbtz/fastcdc-go v0.3.0 h1:JdHvLlnijDuisYIwpRDcHZEjbxvCqtEmJ3gf35VJBgA= 2 | github.com/SaveTheRbtz/fastcdc-go v0.3.0/go.mod h1:2kMKqvBv1h9wCaUfETqsVkSESsCiFhp4YyEHyz7/SfE= 3 | github.com/SaveTheRbtz/zstd-seekable-format-go/pkg v0.7.3 h1:BP0HiyNT3AQEYi+if3wkRcIdQFHtsw6xX3Kx0glckgA= 4 | github.com/SaveTheRbtz/zstd-seekable-format-go/pkg v0.7.3/go.mod h1:hMNtySovKkn2gdDuLqnqveP+mfhUSaBdoBcr2I7Zt0E= 5 | github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= 6 | github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 7 | github.com/chengxilo/virtualterm v1.0.4 h1:Z6IpERbRVlfB8WkOmtbHiDbBANU7cimRIof7mk9/PwM= 8 | github.com/chengxilo/virtualterm v1.0.4/go.mod h1:DyxxBZz/x1iqJjFxTFcr6/x+jSpqN0iwWCOK1q10rlY= 9 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 10 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 11 | github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= 12 | github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= 13 | github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= 14 | github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= 15 | github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= 16 | github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= 17 | github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= 18 | github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= 19 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 20 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 21 | github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= 22 | github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= 23 | github.com/schollz/progressbar/v3 v3.18.0 h1:uXdoHABRFmNIjUfte/Ex7WtuyVslrw2wVPQmCN62HpA= 24 | github.com/schollz/progressbar/v3 v3.18.0/go.mod h1:IsO3lpbaGuzh8zIMzgY3+J8l4C8GjO0Y9S69eFvNsec= 25 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 26 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 27 | go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= 28 | go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= 29 | go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= 30 | go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= 31 | go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= 32 | go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= 33 | go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= 34 | go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= 35 | golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8= 36 | golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= 37 | golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= 38 | golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= 39 | golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= 40 | golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= 41 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 42 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 43 | -------------------------------------------------------------------------------- /cmd/zstdseek/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "crypto/sha512" 7 | "errors" 8 | "flag" 9 | "io" 10 | "log" 11 | "os" 12 | "strconv" 13 | "strings" 14 | 15 | "github.com/SaveTheRbtz/fastcdc-go" 16 | "github.com/klauspost/compress/zstd" 17 | "github.com/schollz/progressbar/v3" 18 | "go.uber.org/zap" 19 | "golang.org/x/term" 20 | 21 | seekable "github.com/SaveTheRbtz/zstd-seekable-format-go/pkg" 22 | ) 23 | 24 | type readCloser struct { 25 | io.Reader 26 | io.Closer 27 | } 28 | 29 | func main() { 30 | ctx := context.Background() 31 | 32 | var ( 33 | inputFlag, chunkingFlag, outputFlag string 34 | qualityFlag int 35 | verifyFlag, verboseFlag bool 36 | ) 37 | 38 | flag.StringVar(&inputFlag, "f", "", "input filename") 39 | flag.StringVar(&outputFlag, "o", "", "output filename") 40 | flag.StringVar(&chunkingFlag, "c", "128:1024:8192", "min:avg:max chunking block size (in kb)") 41 | flag.BoolVar(&verifyFlag, "t", false, "test reading after the write") 42 | flag.IntVar(&qualityFlag, "q", 1, "compression quality (lower == faster)") 43 | flag.BoolVar(&verboseFlag, "v", false, "be verbose") 44 | 45 | flag.Parse() 46 | 47 | var err error 48 | var logger *zap.Logger 49 | if verboseFlag { 50 | logger, err = zap.NewDevelopment() 51 | } else { 52 | logger, err = zap.NewProduction() 53 | } 54 | if err != nil { 55 | log.Fatal("failed to initialize logger", err) 56 | } 57 | defer func() { 58 | _ = logger.Sync() 59 | }() 60 | 61 | if inputFlag == "" || outputFlag == "" { 62 | logger.Fatal("both input and output files need to be defined") 63 | } 64 | if verifyFlag && outputFlag == "-" { 65 | logger.Fatal("verify can't be used with stdout output") 66 | } 67 | 68 | bar := progressbar.DefaultSilent(0, "") 69 | 70 | inputFile := os.Stdin 71 | if inputFlag != "-" { 72 | if inputFile, err = os.Open(inputFlag); err != nil { 73 | logger.Fatal("failed to open input", zap.Error(err)) 74 | } 75 | 76 | if term.IsTerminal(int(os.Stdout.Fd())) { 77 | size := int64(-1) 78 | stat, err := inputFile.Stat() 79 | if err == nil { 80 | size = stat.Size() 81 | } 82 | 83 | bar = progressbar.DefaultBytes( 84 | size, 85 | "compressing", 86 | ) 87 | } 88 | } 89 | 90 | var input io.ReadCloser = inputFile 91 | 92 | expected := sha512.New512_256() 93 | origDone := make(chan struct{}) 94 | if verifyFlag { 95 | pr, pw := io.Pipe() 96 | 97 | tee := io.TeeReader(inputFile, pw) 98 | input = readCloser{tee, pw} 99 | 100 | go func() { 101 | defer close(origDone) 102 | 103 | m, err := io.CopyBuffer(expected, pr, make([]byte, 128<<10)) 104 | if err != nil { 105 | logger.Fatal("failed to compute expected csum", zap.Int64("processed", m), zap.Error(err)) 106 | } 107 | }() 108 | } 109 | 110 | output := os.Stdout 111 | if outputFlag != "-" { 112 | output, err = os.OpenFile(outputFlag, os.O_TRUNC|os.O_WRONLY|os.O_CREATE, 0o644) 113 | if err != nil { 114 | logger.Fatal("failed to open output", zap.Error(err)) 115 | } 116 | defer output.Close() 117 | } 118 | 119 | chunkParams := strings.Split(chunkingFlag, ":") 120 | if len(chunkParams) != 3 { 121 | logger.Fatal("failed parse chunker params. len() != 3", zap.Int("actual", len(chunkParams))) 122 | } 123 | mustConv := func(s string) int { 124 | n, err := strconv.Atoi(s) 125 | if err != nil { 126 | logger.Fatal("failed to parse int", zap.String("string", s), zap.Error(err)) 127 | } 128 | return n 129 | } 130 | minChunkSize := mustConv(chunkParams[0]) * 1024 131 | avgChunkSize := mustConv(chunkParams[1]) * 1024 132 | maxChunkSize := mustConv(chunkParams[2]) * 1024 133 | 134 | var zstdOpts []zstd.EOption = []zstd.EOption{ 135 | zstd.WithEncoderLevel(zstd.EncoderLevelFromZstd(qualityFlag)), 136 | } 137 | enc, err := zstd.NewWriter(nil, zstdOpts...) 138 | if err != nil { 139 | logger.Fatal("failed to create zstd encoder", zap.Error(err)) 140 | } 141 | 142 | w, err := seekable.NewWriter(output, enc, seekable.WithWLogger(logger)) 143 | if err != nil { 144 | logger.Fatal("failed to create compressed writer", zap.Error(err)) 145 | } 146 | defer w.Close() 147 | 148 | // convert average chunk size to a number of bits 149 | logger.Debug("setting chunker params", zap.Int("min", minChunkSize), zap.Int("max", maxChunkSize)) 150 | chunker, err := fastcdc.NewChunker( 151 | input, 152 | fastcdc.Options{ 153 | MinSize: minChunkSize, 154 | AverageSize: avgChunkSize, 155 | MaxSize: maxChunkSize, 156 | }, 157 | ) 158 | if err != nil { 159 | logger.Fatal("failed to create chunker", zap.Error(err)) 160 | } 161 | 162 | frameSource := func() ([]byte, error) { 163 | chunk, err := chunker.Next() 164 | if err != nil { 165 | if errors.Is(err, io.EOF) { 166 | return nil, nil 167 | } 168 | return nil, err 169 | } 170 | // Chunker invalidates the data after calling Next, so we need to clone it 171 | return bytes.Clone(chunk.Data), nil 172 | } 173 | 174 | err = w.WriteMany(ctx, frameSource, seekable.WithWriteCallback(func(size uint32) { 175 | _ = bar.Add(int(size)) 176 | })) 177 | if err != nil { 178 | logger.Fatal("failed to write data", zap.Error(err)) 179 | } 180 | 181 | _ = bar.Finish() 182 | input.Close() 183 | w.Close() 184 | 185 | if verifyFlag { 186 | logger.Info("verifying checksum") 187 | 188 | verify, err := os.Open(outputFlag) 189 | if err != nil { 190 | logger.Fatal("failed to open file for verification", zap.Error(err)) 191 | } 192 | defer verify.Close() 193 | 194 | dec, err := zstd.NewReader(nil) 195 | if err != nil { 196 | logger.Fatal("failed to create zstd decompressor", zap.Error(err)) 197 | } 198 | defer dec.Close() 199 | 200 | reader, err := seekable.NewReader(verify, dec, seekable.WithRLogger(logger)) 201 | if err != nil { 202 | logger.Fatal("failed to create new seekable reader", zap.Error(err)) 203 | } 204 | 205 | actual := sha512.New512_256() 206 | m, err := io.CopyBuffer(actual, reader, make([]byte, 128<<10)) 207 | if err != nil { 208 | logger.Fatal("failed to compute actual csum", zap.Int64("processed", m), zap.Error(err)) 209 | } 210 | <-origDone 211 | 212 | if !bytes.Equal(actual.Sum(nil), expected.Sum(nil)) { 213 | logger.Fatal("checksum verification failed", 214 | zap.Binary("actual", actual.Sum(nil)), zap.Binary("expected", expected.Sum(nil))) 215 | } else { 216 | logger.Info("checksum verification succeeded", zap.Binary("actual", actual.Sum(nil))) 217 | } 218 | } 219 | } 220 | -------------------------------------------------------------------------------- /pkg/decoder.go: -------------------------------------------------------------------------------- 1 | package seekable 2 | 3 | import ( 4 | "github.com/SaveTheRbtz/zstd-seekable-format-go/pkg/env" 5 | ) 6 | 7 | // Decoder is a byte-oriented API that is useful for cases where wrapping io.ReadSeeker is not desirable. 8 | type Decoder interface { 9 | // GetIndexByDecompOffset returns FrameOffsetEntry for an offset in the decompressed stream. 10 | // Will return nil if offset is greater or equal than Size(). 11 | GetIndexByDecompOffset(off uint64) *env.FrameOffsetEntry 12 | 13 | // GetIndexByID returns FrameOffsetEntry for a given frame id. 14 | // Will return nil if offset is greater or equal than NumFrames() or less than 0. 15 | GetIndexByID(id int64) *env.FrameOffsetEntry 16 | 17 | // Size returns the size of the uncompressed stream. 18 | Size() int64 19 | 20 | // NumFrames returns number of frames in the compressed stream. 21 | NumFrames() int64 22 | 23 | // Close closes the decoder feeing up any resources. 24 | Close() error 25 | } 26 | 27 | // NewDecoder creates a byte-oriented Decode interface from a given seektable index. 28 | // This index can either be produced by either Writer's WriteSeekTable or Encoder's EndStream. 29 | // Decoder can be used concurrently. 30 | func NewDecoder(seekTable []byte, decoder ZSTDDecoder, opts ...rOption) (Decoder, error) { 31 | opts = append(opts, WithREnvironment(&decoderEnv{seekTable: seekTable})) 32 | 33 | sr, err := NewReader(nil, decoder, opts...) 34 | if err != nil { 35 | return nil, err 36 | } 37 | 38 | // Release seekTable reference to not leak memory. 39 | sr.(*readerImpl).env = nil 40 | 41 | return sr.(*readerImpl), err 42 | } 43 | 44 | type decoderEnv struct { 45 | seekTable []byte 46 | } 47 | 48 | func (d *decoderEnv) GetFrameByIndex(index env.FrameOffsetEntry) (p []byte, err error) { 49 | panic("should not be used") 50 | } 51 | 52 | func (d *decoderEnv) ReadFooter() ([]byte, error) { 53 | return d.seekTable, nil 54 | } 55 | 56 | func (d *decoderEnv) ReadSkipFrame(skippableFrameOffset int64) ([]byte, error) { 57 | return d.seekTable, nil 58 | } 59 | 60 | func (r *readerImpl) Size() int64 { 61 | return r.endOffset 62 | } 63 | 64 | func (r *readerImpl) NumFrames() int64 { 65 | return r.numFrames 66 | } 67 | 68 | func (r *readerImpl) GetIndexByDecompOffset(off uint64) (found *env.FrameOffsetEntry) { 69 | if off >= uint64(r.endOffset) { 70 | return nil 71 | } 72 | 73 | r.index.DescendLessOrEqual(&env.FrameOffsetEntry{DecompOffset: off}, func(index *env.FrameOffsetEntry) bool { 74 | found = index 75 | return false 76 | }) 77 | return 78 | } 79 | 80 | func (r *readerImpl) GetIndexByID(id int64) (found *env.FrameOffsetEntry) { 81 | if id < 0 { 82 | return nil 83 | } 84 | 85 | r.index.Descend(func(index *env.FrameOffsetEntry) bool { 86 | if index.ID == id { 87 | found = index 88 | return false 89 | } 90 | return true 91 | }) 92 | return 93 | } 94 | -------------------------------------------------------------------------------- /pkg/decoder_test.go: -------------------------------------------------------------------------------- 1 | package seekable 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/klauspost/compress/zstd" 7 | "github.com/stretchr/testify/assert" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestDecoder(t *testing.T) { 12 | t.Parallel() 13 | 14 | dec, err := zstd.NewReader(nil) 15 | require.NoError(t, err) 16 | defer dec.Close() 17 | 18 | d, err := NewDecoder(checksum[17+18:], dec) 19 | require.NoError(t, err) 20 | defer func() { require.NoError(t, d.Close()) }() 21 | 22 | assert.Equal(t, int64(len(sourceString)), d.Size()) 23 | assert.Equal(t, int64(2), d.NumFrames()) 24 | 25 | // First frame. 26 | 27 | bytes1 := []byte("test") 28 | for _, off := range []uint64{0, 1, 3} { 29 | indexOff0 := d.GetIndexByDecompOffset(off) 30 | indexID0 := d.GetIndexByID(0) 31 | assert.Equal(t, indexOff0, indexID0) 32 | assert.NotNil(t, indexOff0) 33 | assert.Equal(t, int64(0), indexOff0.ID) 34 | assert.Equal(t, uint32(len(bytes1)), indexOff0.DecompSize) 35 | assert.NotEqual(t, uint32(0), indexOff0.Checksum) 36 | 37 | decomp, err := dec.DecodeAll( 38 | checksum[indexOff0.CompOffset:indexOff0.CompOffset+uint64(indexOff0.CompSize)], nil) 39 | require.NoError(t, err) 40 | assert.Equal(t, decomp, bytes1) 41 | } 42 | 43 | // Second frame. 44 | 45 | bytes2 := []byte("test2") 46 | for _, off := range []uint64{4, 5, 8} { 47 | indexOff1 := d.GetIndexByDecompOffset(off) 48 | indexID1 := d.GetIndexByID(1) 49 | assert.Equal(t, indexOff1, indexID1) 50 | assert.NotNil(t, indexOff1) 51 | assert.Equal(t, int64(1), indexOff1.ID) 52 | assert.Equal(t, uint32(len(bytes2)), indexOff1.DecompSize) 53 | assert.NotEqual(t, uint32(0), indexOff1.Checksum) 54 | 55 | decomp, err := dec.DecodeAll( 56 | checksum[indexOff1.CompOffset:indexOff1.CompOffset+uint64(indexOff1.CompSize)], nil) 57 | require.NoError(t, err) 58 | assert.Equal(t, decomp, bytes2) 59 | } 60 | 61 | // Out of bounds. 62 | 63 | for _, off := range []uint64{9, 99} { 64 | assert.Nil(t, d.GetIndexByDecompOffset(off)) 65 | } 66 | 67 | for _, id := range []int64{-1, 2, 99} { 68 | assert.Nil(t, d.GetIndexByID(id)) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /pkg/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022, Alexey Ivanov 2 | // All rights reserved. 3 | 4 | // Package adds an ability create ZSTD files in seekable format 5 | // and randomly access them using uncompressed offsets. 6 | package seekable 7 | -------------------------------------------------------------------------------- /pkg/doc_test.go: -------------------------------------------------------------------------------- 1 | package seekable_test 2 | -------------------------------------------------------------------------------- /pkg/encoder.go: -------------------------------------------------------------------------------- 1 | package seekable 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/cespare/xxhash/v2" 7 | "go.uber.org/zap" 8 | ) 9 | 10 | // Encoder is a byte-oriented API that is useful where wrapping io.Writer is not desirable. 11 | type Encoder interface { 12 | // Encode returns compressed data and appends a frame to in-memory seek table. 13 | Encode(src []byte) ([]byte, error) 14 | 15 | // EndStream returns in-memory seek table as a ZSTD's skippable frame. 16 | EndStream() ([]byte, error) 17 | } 18 | 19 | func NewEncoder(encoder ZSTDEncoder, opts ...wOption) (Encoder, error) { 20 | sw, err := NewWriter(nil, encoder, opts...) 21 | if err != nil { 22 | return nil, err 23 | } 24 | 25 | return sw.(*writerImpl), err 26 | } 27 | 28 | func (s *writerImpl) encodeOne(src []byte) ([]byte, seekTableEntry, error) { 29 | if int64(len(src)) > maxChunkSize { 30 | return nil, seekTableEntry{}, 31 | fmt.Errorf("chunk size too big for seekable format: %d > %d", 32 | len(src), maxChunkSize) 33 | } 34 | 35 | if len(src) == 0 { 36 | return nil, seekTableEntry{}, nil 37 | } 38 | 39 | dst := s.enc.EncodeAll(src, nil) 40 | 41 | if int64(len(dst)) > maxChunkSize { 42 | return nil, seekTableEntry{}, 43 | fmt.Errorf("result size too big for seekable format: %d > %d", 44 | len(dst), maxChunkSize) 45 | } 46 | 47 | return dst, seekTableEntry{ 48 | CompressedSize: uint32(len(dst)), 49 | DecompressedSize: uint32(len(src)), 50 | Checksum: uint32((xxhash.Sum64(src) << 32) >> 32), 51 | }, nil 52 | } 53 | 54 | func (s *writerImpl) Encode(src []byte) ([]byte, error) { 55 | dst, entry, err := s.encodeOne(src) 56 | if err != nil { 57 | return nil, err 58 | } 59 | 60 | s.logger.Debug("appending frame", zap.Object("frame", &entry)) 61 | s.frameEntries = append(s.frameEntries, entry) 62 | return dst, nil 63 | } 64 | 65 | func (s *writerImpl) EndStream() ([]byte, error) { 66 | if int64(len(s.frameEntries)) > maxNumberOfFrames { 67 | return nil, fmt.Errorf("number of frames for seekable format: %d > %d", 68 | len(s.frameEntries), maxNumberOfFrames) 69 | } 70 | 71 | seekTable := make([]byte, len(s.frameEntries)*12+9) 72 | for i, e := range s.frameEntries { 73 | e.marshalBinaryInline(seekTable[i*12 : (i+1)*12]) 74 | } 75 | 76 | footer := seekTableFooter{ 77 | NumberOfFrames: uint32(len(s.frameEntries)), 78 | SeekTableDescriptor: seekTableDescriptor{ 79 | ChecksumFlag: true, 80 | }, 81 | SeekableMagicNumber: seekableMagicNumber, 82 | } 83 | 84 | footer.marshalBinaryInline(seekTable[len(s.frameEntries)*12 : len(s.frameEntries)*12+9]) 85 | return createSkippableFrame(seekableTag, seekTable) 86 | } 87 | -------------------------------------------------------------------------------- /pkg/encoder_test.go: -------------------------------------------------------------------------------- 1 | package seekable 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/klauspost/compress/zstd" 7 | "github.com/stretchr/testify/assert" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestEncoder(t *testing.T) { 12 | t.Parallel() 13 | 14 | enc, err := zstd.NewWriter(nil) 15 | require.NoError(t, err) 16 | 17 | e, err := NewEncoder(enc) 18 | require.NoError(t, err) 19 | 20 | decBytes1 := sourceString[:4] 21 | encBytes1, err := e.Encode([]byte(decBytes1)) 22 | require.NoError(t, err) 23 | 24 | decBytes2 := sourceString[4:] 25 | encBytes2, err := e.Encode([]byte(decBytes2)) 26 | require.NoError(t, err) 27 | 28 | footer, err := e.EndStream() 29 | require.NoError(t, err) 30 | 31 | // Standard Reader. 32 | dec, err := zstd.NewReader(nil) 33 | require.NoError(t, err) 34 | 35 | combined := append(append([]byte{}, encBytes1...), encBytes2...) 36 | decompressed, err := dec.DecodeAll(combined, nil) 37 | require.NoError(t, err) 38 | assert.Equal(t, sourceString, string(decompressed)) 39 | 40 | // Seekable Decoder. 41 | d, err := NewDecoder(footer, dec) 42 | require.NoError(t, err) 43 | 44 | assert.Equal(t, int64(len(sourceString)), d.Size()) 45 | assert.Equal(t, int64(2), d.NumFrames()) 46 | } 47 | -------------------------------------------------------------------------------- /pkg/env/environments.go: -------------------------------------------------------------------------------- 1 | package env 2 | 3 | // WEnvironment can be used to inject a custom file writer that is different from normal WriteCloser. 4 | // This is useful when, for example there is a custom chunking code. 5 | type WEnvironment interface { 6 | // WriteFrame is called each time frame is encoded and needs to be written upstream. 7 | WriteFrame(p []byte) (n int, err error) 8 | // WriteSeekTable is called on Close to flush the seek table. 9 | WriteSeekTable(p []byte) (n int, err error) 10 | } 11 | 12 | // REnvironment can be used to inject a custom file reader that is different from normal ReadSeeker. 13 | // This is useful when, for example there is a custom chunking code. 14 | type REnvironment interface { 15 | // GetFrameByIndex returns the compressed frame by its index. 16 | GetFrameByIndex(index FrameOffsetEntry) ([]byte, error) 17 | // ReadFooter returns buffer whose last 9 bytes are interpreted as a `Seek_Table_Footer`. 18 | ReadFooter() ([]byte, error) 19 | // ReadSkipFrame returns the full Seek Table Skippable frame 20 | // including the `Skippable_Magic_Number` and `Frame_Size`. 21 | ReadSkipFrame(skippableFrameOffset int64) ([]byte, error) 22 | } 23 | -------------------------------------------------------------------------------- /pkg/env/frame_offset.go: -------------------------------------------------------------------------------- 1 | package env 2 | 3 | import ( 4 | "go.uber.org/zap/zapcore" 5 | ) 6 | 7 | // FrameOffsetEntry is the post-processed view of the Seek_Table_Entries suitable for indexing. 8 | type FrameOffsetEntry struct { 9 | // ID is the is the sequence number of the frame in the index. 10 | ID int64 11 | 12 | // CompOffset is the offset within compressed stream. 13 | CompOffset uint64 14 | // DecompOffset is the offset within decompressed stream. 15 | DecompOffset uint64 16 | // CompSize is the size of the compressed frame. 17 | CompSize uint32 18 | // DecompSize is the size of the original data. 19 | DecompSize uint32 20 | 21 | // Checksum is the lower 32 bits of the XXH64 hash of the uncompressed data. 22 | Checksum uint32 23 | } 24 | 25 | func (o *FrameOffsetEntry) MarshalLogObject(enc zapcore.ObjectEncoder) error { 26 | enc.AddInt64("ID", o.ID) 27 | enc.AddUint64("CompOffset", o.CompOffset) 28 | enc.AddUint64("DecompOffset", o.DecompOffset) 29 | enc.AddUint32("CompSize", o.CompSize) 30 | enc.AddUint32("DecompSize", o.DecompSize) 31 | enc.AddUint32("Checksum", o.Checksum) 32 | 33 | return nil 34 | } 35 | 36 | func Less(a, b *FrameOffsetEntry) bool { 37 | return a.DecompOffset < b.DecompOffset 38 | } 39 | -------------------------------------------------------------------------------- /pkg/example_test.go: -------------------------------------------------------------------------------- 1 | package seekable_test 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "log" 7 | "os" 8 | 9 | "github.com/klauspost/compress/zstd" 10 | 11 | seekable "github.com/SaveTheRbtz/zstd-seekable-format-go/pkg" 12 | ) 13 | 14 | func Example() { 15 | f, err := os.CreateTemp("", "example") 16 | if err != nil { 17 | log.Fatal(err) 18 | } 19 | defer func() { 20 | _ = os.Remove(f.Name()) 21 | }() 22 | 23 | enc, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedFastest)) 24 | if err != nil { 25 | log.Fatal(err) 26 | } 27 | defer func() { 28 | _ = enc.Close() 29 | }() 30 | 31 | w, err := seekable.NewWriter(f, enc) 32 | if err != nil { 33 | log.Fatal(err) 34 | } 35 | 36 | // Write data in chunks. 37 | for _, b := range [][]byte{[]byte("Hello"), []byte(" "), []byte("World!")} { 38 | _, err = w.Write(b) 39 | if err != nil { 40 | log.Fatal(err) 41 | } 42 | } 43 | 44 | // Close and flush seek table. 45 | err = w.Close() 46 | if err != nil { 47 | log.Fatal(err) 48 | } 49 | 50 | dec, err := zstd.NewReader(nil) 51 | if err != nil { 52 | log.Fatal(err) 53 | } 54 | defer dec.Close() 55 | 56 | r, err := seekable.NewReader(f, dec) 57 | if err != nil { 58 | log.Fatal(err) 59 | } 60 | defer func() { 61 | _ = r.Close() 62 | }() 63 | 64 | ello := make([]byte, 4) 65 | // ReaderAt 66 | _, err = r.ReadAt(ello, 1) 67 | if err != nil { 68 | log.Fatal(err) 69 | } 70 | fmt.Printf("Offset: 1 from the start: %s\n", string(ello)) 71 | 72 | world := make([]byte, 5) 73 | // Seeker 74 | _, err = r.Seek(-6, io.SeekEnd) 75 | if err != nil { 76 | log.Fatal(err) 77 | } 78 | // Reader 79 | _, err = r.Read(world) 80 | if err != nil { 81 | log.Fatal(err) 82 | } 83 | fmt.Printf("Offset: -6 from the end: %s\n", string(world)) 84 | 85 | _, _ = f.Seek(0, io.SeekStart) 86 | 87 | // Standard ZSTD Reader. 88 | dec, err = zstd.NewReader(f) 89 | if err != nil { 90 | log.Fatal(err) 91 | } 92 | defer dec.Close() 93 | 94 | all, err := io.ReadAll(dec) 95 | if err != nil { 96 | log.Fatal(err) 97 | } 98 | 99 | fmt.Printf("Whole string: %s\n", string(all)) 100 | 101 | // Output: 102 | // Offset: 1 from the start: ello 103 | // Offset: -6 from the end: World 104 | // Whole string: Hello World! 105 | } 106 | -------------------------------------------------------------------------------- /pkg/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/SaveTheRbtz/zstd-seekable-format-go/pkg 2 | 3 | go 1.24.4 4 | 5 | require ( 6 | github.com/cespare/xxhash/v2 v2.3.0 7 | github.com/google/btree v1.1.3 8 | github.com/klauspost/compress v1.18.0 9 | github.com/stretchr/testify v1.10.0 10 | go.uber.org/atomic v1.11.0 11 | go.uber.org/multierr v1.11.0 12 | go.uber.org/zap v1.27.0 13 | golang.org/x/sync v0.15.0 14 | ) 15 | 16 | require ( 17 | github.com/davecgh/go-spew v1.1.1 // indirect 18 | github.com/pmezard/go-difflib v1.0.0 // indirect 19 | gopkg.in/yaml.v3 v3.0.1 // indirect 20 | ) 21 | -------------------------------------------------------------------------------- /pkg/go.sum: -------------------------------------------------------------------------------- 1 | github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= 2 | github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 3 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 4 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 5 | github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= 6 | github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= 7 | github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= 8 | github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= 9 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 10 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 11 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 12 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 13 | go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= 14 | go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= 15 | go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= 16 | go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= 17 | go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= 18 | go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= 19 | go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= 20 | go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= 21 | golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8= 22 | golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= 23 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 24 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 25 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 26 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 27 | -------------------------------------------------------------------------------- /pkg/reader.go: -------------------------------------------------------------------------------- 1 | package seekable 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "math" 9 | "sync" 10 | 11 | "github.com/cespare/xxhash/v2" 12 | "github.com/google/btree" 13 | "go.uber.org/atomic" 14 | "go.uber.org/zap" 15 | 16 | "github.com/SaveTheRbtz/zstd-seekable-format-go/pkg/env" 17 | ) 18 | 19 | type cachedFrame struct { 20 | m sync.Mutex 21 | 22 | offset uint64 23 | data []byte 24 | } 25 | 26 | func (f *cachedFrame) replace(offset uint64, data []byte) { 27 | f.m.Lock() 28 | defer f.m.Unlock() 29 | 30 | f.offset = offset 31 | f.data = data 32 | } 33 | 34 | func (f *cachedFrame) get() (uint64, []byte) { 35 | f.m.Lock() 36 | defer f.m.Unlock() 37 | 38 | return f.offset, f.data 39 | } 40 | 41 | // readSeekerEnvImpl is the environment implementation for the io.ReadSeeker. 42 | type readSeekerEnvImpl struct { 43 | rs io.ReadSeeker 44 | } 45 | 46 | func (rs *readSeekerEnvImpl) GetFrameByIndex(index env.FrameOffsetEntry) (p []byte, err error) { 47 | p = make([]byte, index.CompSize) 48 | off := int64(index.CompOffset) 49 | 50 | switch v := rs.rs.(type) { 51 | case io.ReaderAt: 52 | _, err = v.ReadAt(p, off) 53 | if errors.Is(err, io.EOF) { 54 | err = nil 55 | } 56 | default: 57 | _, err = v.Seek(off, io.SeekStart) 58 | if err != nil { 59 | return nil, err 60 | } 61 | _, err = io.ReadFull(rs.rs, p) 62 | } 63 | 64 | return 65 | } 66 | 67 | func (rs *readSeekerEnvImpl) ReadFooter() ([]byte, error) { 68 | n, err := rs.rs.Seek(-seekTableFooterOffset, io.SeekEnd) 69 | if err != nil { 70 | return nil, fmt.Errorf("failed to seek to: %d: %w", -seekTableFooterOffset, err) 71 | } 72 | 73 | buf := make([]byte, seekTableFooterOffset) 74 | _, err = io.ReadFull(rs.rs, buf) 75 | if err != nil { 76 | return nil, fmt.Errorf("failed to read footer at: %d: %w", n, err) 77 | } 78 | 79 | return buf, nil 80 | } 81 | 82 | func (rs *readSeekerEnvImpl) ReadSkipFrame(skippableFrameOffset int64) ([]byte, error) { 83 | n, err := rs.rs.Seek(-skippableFrameOffset, io.SeekEnd) 84 | if err != nil { 85 | return nil, fmt.Errorf("failed to seek to: %d: %w", -skippableFrameOffset, err) 86 | } 87 | 88 | buf := make([]byte, skippableFrameOffset) 89 | _, err = io.ReadFull(rs.rs, buf) 90 | if err != nil { 91 | return nil, fmt.Errorf("failed to read skippable frame header at: %d: %w", n, err) 92 | } 93 | return buf, nil 94 | } 95 | 96 | type readerImpl struct { 97 | dec ZSTDDecoder 98 | index *btree.BTreeG[*env.FrameOffsetEntry] 99 | 100 | checksums bool 101 | 102 | offset int64 103 | 104 | numFrames int64 105 | endOffset int64 106 | 107 | logger *zap.Logger 108 | env env.REnvironment 109 | 110 | closed atomic.Bool 111 | 112 | // TODO: Add simple LRU cache. 113 | cachedFrame cachedFrame 114 | } 115 | 116 | var ( 117 | _ io.Seeker = (*readerImpl)(nil) 118 | _ io.Reader = (*readerImpl)(nil) 119 | _ io.ReaderAt = (*readerImpl)(nil) 120 | _ io.Closer = (*readerImpl)(nil) 121 | ) 122 | 123 | type Reader interface { 124 | // Seek implements io.Seeker interface to randomly access data. 125 | // This method is NOT goroutine-safe and CAN NOT be called 126 | // concurrently since it modifies the underlying offset. 127 | Seek(offset int64, whence int) (int64, error) 128 | 129 | // Read implements io.Reader interface to sequentially access data. 130 | // This method is NOT goroutine-safe and CAN NOT be called 131 | // concurrently since it modifies the underlying offset. 132 | Read(p []byte) (n int, err error) 133 | 134 | // ReadAt implements io.ReaderAt interface to randomly access data. 135 | // This method is goroutine-safe and can be called concurrently ONLY if 136 | // the underlying reader supports io.ReaderAt interface. 137 | ReadAt(p []byte, off int64) (n int, err error) 138 | 139 | // Close implements io.Closer interface free up any resources. 140 | Close() error 141 | } 142 | 143 | // ZSTDDecoder is the decompressor. Tested with github.com/klauspost/compress/zstd. 144 | type ZSTDDecoder interface { 145 | DecodeAll(input, dst []byte) ([]byte, error) 146 | } 147 | 148 | // NewReader returns ZSTD stream reader that can be randomly accessed using uncompressed data offset. 149 | // Ideally, passed io.ReadSeeker should implement io.ReaderAt interface. 150 | func NewReader(rs io.ReadSeeker, decoder ZSTDDecoder, opts ...rOption) (Reader, error) { 151 | sr := readerImpl{ 152 | dec: decoder, 153 | } 154 | 155 | sr.logger = zap.NewNop() 156 | for _, o := range opts { 157 | err := o(&sr) 158 | if err != nil { 159 | return nil, err 160 | } 161 | } 162 | 163 | if sr.env == nil { 164 | if rs == nil { 165 | return nil, fmt.Errorf("nil ReadSeeker and no custom environment supplied") 166 | } 167 | sr.env = &readSeekerEnvImpl{ 168 | rs: rs, 169 | } 170 | } 171 | 172 | tree, last, err := sr.indexFooter() 173 | if err != nil { 174 | return nil, err 175 | } 176 | 177 | sr.index = tree 178 | if last != nil { 179 | sr.endOffset = int64(last.DecompOffset) + int64(last.DecompSize) 180 | sr.numFrames = last.ID + 1 181 | } else { 182 | sr.endOffset = 0 183 | sr.numFrames = 0 184 | } 185 | 186 | return &sr, nil 187 | } 188 | 189 | func (r *readerImpl) ReadAt(p []byte, off int64) (n int, err error) { 190 | for m := 0; n < len(p) && err == nil; n += m { 191 | _, m, err = r.read(p[n:], off+int64(n)) 192 | } 193 | return 194 | } 195 | 196 | func (r *readerImpl) Read(p []byte) (n int, err error) { 197 | offset, n, err := r.read(p, r.offset) 198 | if err != nil { 199 | if errors.Is(err, io.EOF) { 200 | r.offset = r.endOffset 201 | } 202 | return 203 | } 204 | r.offset = offset 205 | return 206 | } 207 | 208 | func (r *readerImpl) Close() error { 209 | if r.closed.CompareAndSwap(false, true) { 210 | r.cachedFrame.replace(math.MaxUint64, nil) 211 | r.index = nil 212 | } 213 | return nil 214 | } 215 | 216 | func (r *readerImpl) read(dst []byte, off int64) (int64, int, error) { 217 | if r.closed.Load() { 218 | return 0, 0, fmt.Errorf("reader is closed") 219 | } 220 | 221 | if off >= r.endOffset { 222 | return 0, 0, io.EOF 223 | } 224 | if off < 0 { 225 | return 0, 0, fmt.Errorf("offset before the start of the file: %d", off) 226 | } 227 | 228 | index := r.GetIndexByDecompOffset(uint64(off)) 229 | if index == nil { 230 | return 0, 0, fmt.Errorf("failed to get index by offset: %d", off) 231 | } 232 | if off < int64(index.DecompOffset) || off > int64(index.DecompOffset)+int64(index.DecompSize) { 233 | return 0, 0, fmt.Errorf("offset outside of index bounds: %d: min: %d, max: %d", 234 | off, int64(index.DecompOffset), int64(index.DecompOffset)+int64(index.DecompSize)) 235 | } 236 | 237 | var decompressed []byte 238 | 239 | cachedOffset, cachedData := r.cachedFrame.get() 240 | if cachedOffset == index.DecompOffset && cachedData != nil { 241 | // fastpath 242 | decompressed = cachedData 243 | } else { 244 | // slowpath 245 | if index.CompSize > maxDecoderFrameSize { 246 | return 0, 0, fmt.Errorf("index.CompSize is too big: %d > %d", 247 | index.CompSize, maxDecoderFrameSize) 248 | } 249 | 250 | src, err := r.env.GetFrameByIndex(*index) 251 | if err != nil { 252 | return 0, 0, fmt.Errorf("failed to read compressed data at: %d, %w", index.CompOffset, err) 253 | } 254 | 255 | if len(src) != int(index.CompSize) { 256 | return 0, 0, fmt.Errorf("compressed size does not match index at: %d: expected: %d, index: %+v", 257 | off, len(src), index) 258 | } 259 | 260 | decompressed, err = r.dec.DecodeAll(src, nil) 261 | if err != nil { 262 | return 0, 0, fmt.Errorf("failed to decompress data data at: %d, %w", index.CompOffset, err) 263 | } 264 | 265 | if r.checksums { 266 | checksum := uint32((xxhash.Sum64(decompressed) << 32) >> 32) 267 | if index.Checksum != checksum { 268 | return 0, 0, fmt.Errorf("checksum verification failed at: %d: expected: %d, actual: %d", 269 | index.CompOffset, index.Checksum, checksum) 270 | } 271 | } 272 | r.cachedFrame.replace(index.DecompOffset, decompressed) 273 | } 274 | 275 | if len(decompressed) != int(index.DecompSize) { 276 | return 0, 0, fmt.Errorf("index corruption: len: %d, expected: %d", len(decompressed), int(index.DecompSize)) 277 | } 278 | 279 | offsetWithinFrame := uint64(off) - index.DecompOffset 280 | 281 | size := uint64(len(decompressed)) - offsetWithinFrame 282 | if size > uint64(len(dst)) { 283 | size = uint64(len(dst)) 284 | } 285 | 286 | r.logger.Debug("decompressed", zap.Uint64("offsetWithinFrame", offsetWithinFrame), zap.Uint64("end", offsetWithinFrame+size), 287 | zap.Uint64("size", size), zap.Int("lenDecompressed", len(decompressed)), zap.Int("lenDst", len(dst)), zap.Object("index", index)) 288 | copy(dst, decompressed[offsetWithinFrame:offsetWithinFrame+size]) 289 | 290 | return off + int64(size), int(size), nil 291 | } 292 | 293 | func (r *readerImpl) Seek(offset int64, whence int) (int64, error) { 294 | newOffset := r.offset 295 | switch whence { 296 | case io.SeekCurrent: 297 | newOffset += offset 298 | case io.SeekStart: 299 | newOffset = offset 300 | case io.SeekEnd: 301 | newOffset = r.endOffset + offset 302 | default: 303 | return 0, fmt.Errorf("unknown whence: %d", whence) 304 | } 305 | 306 | if newOffset < 0 { 307 | return 0, fmt.Errorf("offset before the start of the file: %d (%d + %d)", 308 | newOffset, r.offset, offset) 309 | } 310 | 311 | r.offset = newOffset 312 | return r.offset, nil 313 | } 314 | 315 | func (r *readerImpl) indexFooter() (*btree.BTreeG[*env.FrameOffsetEntry], *env.FrameOffsetEntry, error) { 316 | // read seekTableFooter 317 | buf, err := r.env.ReadFooter() 318 | if err != nil { 319 | return nil, nil, fmt.Errorf("failed to read footer: %w", err) 320 | } 321 | if len(buf) < seekTableFooterOffset { 322 | return nil, nil, fmt.Errorf("footer is too small: %d", len(buf)) 323 | } 324 | 325 | // parse seekTableFooter 326 | footer := seekTableFooter{} 327 | err = footer.UnmarshalBinary(buf[len(buf)-seekTableFooterOffset:]) 328 | if err != nil { 329 | return nil, nil, fmt.Errorf("failed to parse footer %+v: %w", buf, err) 330 | } 331 | r.logger.Debug("loaded", zap.Object("footer", &footer)) 332 | 333 | r.checksums = footer.SeekTableDescriptor.ChecksumFlag 334 | 335 | // read SeekTableEntries 336 | seekTableEntrySize := int64(8) 337 | if footer.SeekTableDescriptor.ChecksumFlag { 338 | seekTableEntrySize += 4 339 | } 340 | 341 | skippableFrameOffset := seekTableFooterOffset + seekTableEntrySize*int64(footer.NumberOfFrames) 342 | skippableFrameOffset += frameSizeFieldSize 343 | skippableFrameOffset += skippableMagicNumberFieldSize 344 | 345 | if skippableFrameOffset > maxDecoderFrameSize { 346 | return nil, nil, fmt.Errorf("frame offset is too big: %d > %d", 347 | skippableFrameOffset, maxDecoderFrameSize) 348 | } 349 | 350 | buf, err = r.env.ReadSkipFrame(skippableFrameOffset) 351 | if err != nil { 352 | return nil, nil, fmt.Errorf("failed to read footer: %w", err) 353 | } 354 | 355 | if len(buf) < frameSizeFieldSize+skippableMagicNumberFieldSize+seekTableFooterOffset { 356 | return nil, nil, fmt.Errorf("skip frame is too small: %d", len(buf)) 357 | } 358 | 359 | // parse SeekTableEntries 360 | magic := binary.LittleEndian.Uint32(buf[0:4]) 361 | if magic != skippableFrameMagic+seekableTag { 362 | return nil, nil, fmt.Errorf("skippable frame magic mismatch %d vs %d", 363 | magic, skippableFrameMagic+seekableTag) 364 | } 365 | 366 | expectedFrameSize := int64(len(buf)) - frameSizeFieldSize - skippableMagicNumberFieldSize 367 | frameSize := int64(binary.LittleEndian.Uint32(buf[4:8])) 368 | if frameSize != expectedFrameSize { 369 | return nil, nil, fmt.Errorf("skippable frame size mismatch: expected: %d, actual: %d", 370 | expectedFrameSize, frameSize) 371 | } 372 | 373 | if frameSize > maxDecoderFrameSize { 374 | return nil, nil, fmt.Errorf("frame is too big: %d > %d", frameSize, maxDecoderFrameSize) 375 | } 376 | 377 | return r.indexSeekTableEntries(buf[8:len(buf)-seekTableFooterOffset], uint64(seekTableEntrySize)) 378 | } 379 | 380 | func (r *readerImpl) indexSeekTableEntries(p []byte, entrySize uint64) ( 381 | *btree.BTreeG[*env.FrameOffsetEntry], *env.FrameOffsetEntry, error, 382 | ) { 383 | if uint64(len(p))%entrySize != 0 { 384 | return nil, nil, fmt.Errorf("seek table size is not multiple of %d", entrySize) 385 | } 386 | 387 | // TODO: make fan-out tunable? 388 | t := btree.NewG(8, env.Less) 389 | entry := seekTableEntry{} 390 | var compOffset, decompOffset uint64 391 | 392 | var last *env.FrameOffsetEntry 393 | var i int64 394 | for indexOffset := uint64(0); indexOffset < uint64(len(p)); indexOffset += entrySize { 395 | err := entry.UnmarshalBinary(p[indexOffset : indexOffset+entrySize]) 396 | if err != nil { 397 | return nil, nil, fmt.Errorf("failed to parse entry %+v at: %d: %w", 398 | p[indexOffset:indexOffset+entrySize], indexOffset, err) 399 | } 400 | 401 | last = &env.FrameOffsetEntry{ 402 | ID: i, 403 | CompOffset: compOffset, 404 | DecompOffset: decompOffset, 405 | CompSize: entry.CompressedSize, 406 | DecompSize: entry.DecompressedSize, 407 | Checksum: entry.Checksum, 408 | } 409 | t.ReplaceOrInsert(last) 410 | compOffset += uint64(entry.CompressedSize) 411 | decompOffset += uint64(entry.DecompressedSize) 412 | i++ 413 | } 414 | 415 | return t, last, nil 416 | } 417 | -------------------------------------------------------------------------------- /pkg/reader_fuzz_test.go: -------------------------------------------------------------------------------- 1 | //go:build go1.18 2 | // +build go1.18 3 | 4 | package seekable 5 | 6 | import ( 7 | "errors" 8 | "io" 9 | "testing" 10 | 11 | "github.com/klauspost/compress/zstd" 12 | "github.com/stretchr/testify/assert" 13 | "github.com/stretchr/testify/require" 14 | ) 15 | 16 | func FuzzReader(f *testing.F) { 17 | dec, err := zstd.NewReader(nil, zstd.WithDecoderMaxMemory(1<<24)) 18 | require.NoError(f, err) 19 | defer dec.Close() 20 | 21 | f.Add(noChecksum, int64(0), uint8(1), io.SeekStart) 22 | f.Add(checksum, int64(-1), uint8(2), io.SeekEnd) 23 | f.Add(checksum, int64(1), uint8(0), io.SeekCurrent) 24 | 25 | f.Fuzz(func(t *testing.T, in []byte, off int64, l uint8, whence int) { 26 | sr := &seekableBufferReaderAt{buf: in} 27 | r, err := NewReader(sr, dec) 28 | if err != nil { 29 | return 30 | } 31 | defer func() { require.NoError(t, r.Close()) }() 32 | 33 | i, err := r.Seek(off, whence) 34 | if err != nil { 35 | return 36 | } 37 | 38 | buf1 := make([]byte, l) 39 | n, err := r.Read(buf1) 40 | if err != nil && !errors.Is(err, io.EOF) { 41 | return 42 | } 43 | 44 | buf2 := make([]byte, n) 45 | m, err := r.ReadAt(buf2, i) 46 | // t.Logf("off: %d, l: %d, whence: %d, i: %d, n: %d, m: %d", off, l, whence, i, n, m) 47 | 48 | if !errors.Is(err, io.EOF) { 49 | require.NoError(t, err) 50 | } 51 | 52 | assert.Equal(t, m, n) 53 | assert.Equal(t, buf1[:n], buf2) 54 | }) 55 | } 56 | 57 | func FuzzReaderConst(f *testing.F) { 58 | f.Add(int64(0), uint8(1), int8(io.SeekStart)) 59 | dec, err := zstd.NewReader(nil) 60 | require.NoError(f, err) 61 | defer dec.Close() 62 | 63 | sr := &seekableBufferReaderAt{buf: checksum} 64 | r, err := NewReader(sr, dec) 65 | require.NoError(f, err) 66 | defer func() { require.NoError(f, r.Close()) }() 67 | 68 | f.Fuzz(func(t *testing.T, off int64, l uint8, whence int8) { 69 | i, err := r.Seek(off, int(whence)) 70 | if err != nil { 71 | return 72 | } 73 | 74 | buf1 := make([]byte, l) 75 | n, err := r.Read(buf1) 76 | if err != nil && !errors.Is(err, io.EOF) { 77 | return 78 | } 79 | 80 | buf2 := make([]byte, n) 81 | m, err := r.ReadAt(buf2, i) 82 | // t.Logf("off: %d, l: %d, whence: %d, i: %d, n: %d, m: %d", off, l, whence, i, n, m) 83 | 84 | if !errors.Is(err, io.EOF) { 85 | require.NoError(t, err) 86 | } 87 | 88 | assert.Equal(t, m, n) 89 | assert.Equal(t, buf1[:n], buf2) 90 | 91 | if n > 0 { 92 | assert.Equal(t, string(buf2), sourceString[i:i+int64(n)]) 93 | } 94 | }) 95 | } 96 | -------------------------------------------------------------------------------- /pkg/reader_options.go: -------------------------------------------------------------------------------- 1 | package seekable 2 | 3 | import ( 4 | "go.uber.org/zap" 5 | 6 | "github.com/SaveTheRbtz/zstd-seekable-format-go/pkg/env" 7 | ) 8 | 9 | type rOption func(*readerImpl) error 10 | 11 | func WithRLogger(l *zap.Logger) rOption { 12 | return func(r *readerImpl) error { r.logger = l; return nil } 13 | } 14 | 15 | func WithREnvironment(e env.REnvironment) rOption { 16 | return func(r *readerImpl) error { r.env = e; return nil } 17 | } 18 | -------------------------------------------------------------------------------- /pkg/reader_test.go: -------------------------------------------------------------------------------- 1 | package seekable 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "strconv" 8 | "testing" 9 | 10 | "github.com/klauspost/compress/zstd" 11 | "github.com/stretchr/testify/assert" 12 | "github.com/stretchr/testify/require" 13 | 14 | "github.com/SaveTheRbtz/zstd-seekable-format-go/pkg/env" 15 | ) 16 | 17 | const sourceString = "testtest2" 18 | 19 | var checksum = []byte{ 20 | // frame 1 21 | 0x28, 0xb5, 0x2f, 0xfd, 0x04, 0x00, 0x21, 0x00, 0x00, 22 | // "test" 23 | 0x74, 0x65, 0x73, 0x74, 24 | 0x39, 0x81, 0x67, 0xdb, 25 | // frame 2 26 | 0x28, 0xb5, 0x2f, 0xfd, 0x04, 0x00, 0x29, 0x00, 0x00, 27 | // "test2" 28 | 0x74, 0x65, 0x73, 0x74, 0x32, 29 | 0x87, 0xeb, 0x11, 0x71, 30 | // skippable frame 31 | 0x5e, 0x2a, 0x4d, 0x18, 32 | 0x21, 0x00, 0x00, 0x00, 33 | // index 34 | 0x11, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x39, 0x81, 0x67, 0xdb, 35 | 0x12, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x87, 0xeb, 0x11, 0x71, 36 | // footer 37 | 0x02, 0x00, 0x00, 0x00, 38 | 0x80, 39 | 0xb1, 0xea, 0x92, 0x8f, 40 | } 41 | 42 | var noChecksum = []byte{ 43 | // frame 1 44 | 0x28, 0xb5, 0x2f, 0xfd, 0x04, 0x00, 0x21, 0x00, 0x00, 45 | // "test" 46 | 0x74, 0x65, 0x73, 0x74, 47 | 0x39, 0x81, 0x67, 0xdb, 48 | // frame 2 49 | 0x28, 0xb5, 0x2f, 0xfd, 0x04, 0x00, 0x29, 0x00, 0x00, 50 | // "test2" 51 | 0x74, 0x65, 0x73, 0x74, 0x32, 52 | 0x87, 0xeb, 0x11, 0x71, 53 | // skippable frame 54 | 0x5e, 0x2a, 0x4d, 0x18, 55 | 0x19, 0x00, 0x00, 0x00, 56 | // index 57 | 0x11, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 58 | 0x12, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 59 | // footer 60 | 0x02, 0x00, 0x00, 0x00, 61 | 0x00, 62 | 0xb1, 0xea, 0x92, 0x8f, 63 | } 64 | 65 | type seekableBufferReaderAt struct { 66 | buf []byte 67 | offset int64 68 | } 69 | 70 | func (s *seekableBufferReaderAt) ReadAt(p []byte, off int64) (n int, err error) { 71 | if off < 0 { 72 | return 0, fmt.Errorf("offset before the start of the file: %d", off) 73 | } 74 | 75 | size := uint64(len(s.buf)) - uint64(off) 76 | if size > uint64(len(p)) { 77 | size = uint64(len(p)) 78 | } 79 | 80 | if off > int64(len(s.buf)) { 81 | return 0, io.EOF 82 | } 83 | 84 | copy(p, s.buf[off:uint64(off)+size]) 85 | 86 | return int(size), nil 87 | } 88 | 89 | func (s *seekableBufferReaderAt) Read(p []byte) (n int, err error) { 90 | size := int64(len(s.buf)) - s.offset 91 | if size > int64(len(p)) { 92 | size = int64(len(p)) 93 | } 94 | 95 | if s.offset > int64(len(s.buf)) { 96 | return 0, io.EOF 97 | } 98 | 99 | copy(p, s.buf[s.offset:s.offset+size]) 100 | 101 | s.offset += size 102 | return int(size), nil 103 | } 104 | 105 | func (s *seekableBufferReaderAt) Seek(offset int64, whence int) (int64, error) { 106 | newOffset := s.offset 107 | switch whence { 108 | case io.SeekCurrent: 109 | newOffset += offset 110 | case io.SeekStart: 111 | newOffset = offset 112 | case io.SeekEnd: 113 | newOffset = int64(len(s.buf)) + offset 114 | } 115 | 116 | if newOffset < 0 { 117 | return 0, fmt.Errorf("offset before the start of the file: %d (%d + %d)", 118 | newOffset, s.offset, offset) 119 | } 120 | 121 | s.offset = newOffset 122 | return s.offset, nil 123 | } 124 | 125 | type seekableBufferReader struct { 126 | sra seekableBufferReaderAt 127 | } 128 | 129 | func (s *seekableBufferReader) Read(p []byte) (n int, err error) { 130 | return s.sra.Read(p) 131 | } 132 | 133 | func (s *seekableBufferReader) Seek(offset int64, whence int) (int64, error) { 134 | return s.sra.Seek(offset, whence) 135 | } 136 | 137 | func TestReader(t *testing.T) { 138 | t.Parallel() 139 | 140 | dec, err := zstd.NewReader(nil) 141 | require.NoError(t, err) 142 | defer dec.Close() 143 | 144 | for _, b := range [][]byte{checksum, noChecksum} { 145 | br := &seekableBufferReaderAt{buf: b} 146 | r, err := NewReader(br, dec) 147 | require.NoError(t, err) 148 | 149 | sr := r.(*readerImpl) 150 | assert.Equal(t, int64(9), sr.endOffset) 151 | assert.Equal(t, 2, sr.index.Len()) 152 | assert.Equal(t, int64(0), sr.offset) 153 | 154 | bytes1 := []byte("test") 155 | bytes2 := []byte("test2") 156 | 157 | tmp := make([]byte, 4096) 158 | n, err := r.Read(tmp) 159 | require.NoError(t, err) 160 | assert.Equal(t, len(bytes1), n) 161 | assert.Equal(t, bytes1, tmp[:n]) 162 | 163 | assert.Equal(t, int64(n), sr.offset) 164 | 165 | offset1, data1 := sr.cachedFrame.get() 166 | assert.Equal(t, uint64(0), offset1) 167 | assert.Equal(t, bytes1, data1) 168 | 169 | m, err := r.Read(tmp) 170 | require.NoError(t, err) 171 | assert.Equal(t, len(bytes2), m) 172 | assert.Equal(t, bytes2, tmp[:m]) 173 | 174 | assert.Equal(t, int64(n)+int64(m), sr.offset) 175 | offset2, data2 := sr.cachedFrame.get() 176 | assert.Equal(t, uint64(len(bytes1)), offset2) 177 | assert.Equal(t, bytes2, data2) 178 | 179 | _, err = r.Read(tmp) 180 | require.ErrorIs(t, err, io.EOF) 181 | 182 | err = r.Close() 183 | require.NoError(t, err) 184 | 185 | // read after close 186 | _, err = r.Read(tmp) 187 | require.ErrorContains(t, err, "reader is closed") 188 | 189 | // double close 190 | err = r.Close() 191 | require.NoError(t, err) 192 | } 193 | } 194 | 195 | func TestReaderEdges(t *testing.T) { 196 | dec, err := zstd.NewReader(nil) 197 | require.NoError(t, err) 198 | 199 | source := []byte(sourceString) 200 | for i, b := range [][]byte{checksum, noChecksum} { 201 | i := i 202 | b := b 203 | t.Run(strconv.Itoa(i), func(t *testing.T) { 204 | t.Parallel() 205 | 206 | sr := &seekableBufferReaderAt{buf: b} 207 | r, err := NewReader(sr, dec) 208 | require.NoError(t, err) 209 | defer func() { require.NoError(t, r.Close()) }() 210 | 211 | for _, whence := range []int{io.SeekStart, io.SeekEnd} { 212 | for n := int64(-1); n <= int64(len(source))+1; n++ { 213 | for m := int64(0); m <= int64(len(source))+1; m++ { 214 | var j int64 215 | switch whence { 216 | case io.SeekStart: 217 | j, err = r.Seek(n, whence) 218 | case io.SeekEnd: 219 | j, err = r.Seek(int64(-len(source))+n, whence) 220 | } 221 | if n < 0 { 222 | require.Error(t, err) 223 | continue 224 | } 225 | require.NoError(t, err) 226 | assert.Equal(t, n, j) 227 | 228 | tmp := make([]byte, m) 229 | k, err := r.Read(tmp) 230 | if n >= int64(len(source)) { 231 | require.ErrorIsf(t, err, io.EOF, 232 | "%d: should return EOF at %d, len(source): %d, len(tmp): %d, k: %d, whence: %d", 233 | i, n, len(source), m, k, whence) 234 | continue 235 | } 236 | require.NoErrorf(t, err, 237 | "%d: should NOT return EOF at %d, len(source): %d, len(tmp): %d, k: %d, whence: %d", 238 | i, n, len(source), m, k, whence) 239 | 240 | assert.Equal(t, source[n:n+int64(k)], tmp[:k]) 241 | } 242 | } 243 | } 244 | }) 245 | } 246 | } 247 | 248 | // TestReaderAt verified the following ReaderAt asssumption: 249 | // 250 | // When ReadAt returns n < len(p), it returns a non-nil error explaining why more bytes were not returned. 251 | // In this respect, ReadAt is stricter than Read. 252 | func TestReaderAt(t *testing.T) { 253 | t.Parallel() 254 | 255 | dec, err := zstd.NewReader(nil) 256 | require.NoError(t, err) 257 | defer dec.Close() 258 | 259 | for _, sr := range []io.ReadSeeker{ 260 | &seekableBufferReader{seekableBufferReaderAt{buf: noChecksum}}, 261 | &seekableBufferReaderAt{buf: noChecksum}, 262 | } { 263 | sr := sr 264 | t.Run(fmt.Sprintf("%T", sr), func(t *testing.T) { 265 | r, err := NewReader(sr, dec) 266 | require.NoError(t, err) 267 | defer func() { require.NoError(t, r.Close()) }() 268 | 269 | oldOffset, err := r.Seek(0, io.SeekCurrent) 270 | require.NoError(t, err) 271 | assert.Equal(t, int64(0), oldOffset) 272 | 273 | tmp1 := make([]byte, 3) 274 | k1, err := r.ReadAt(tmp1, 3) 275 | require.NoError(t, err) 276 | assert.Equal(t, 3, k1) 277 | assert.Equal(t, []byte("tte"), tmp1) 278 | 279 | // If ReadAt is reading from an input source with a seek offset, 280 | // ReadAt should not affect nor be affected by the underlying seek offset. 281 | newOffset, err := r.Seek(0, io.SeekCurrent) 282 | require.NoError(t, err) 283 | assert.Equal(t, newOffset, oldOffset) 284 | 285 | tmp2 := make([]byte, 100) 286 | k2, err := r.ReadAt(tmp2, 3) 287 | require.ErrorIs(t, err, io.EOF) 288 | 289 | tmpLast := make([]byte, 1) 290 | kLast, err := r.ReadAt(tmpLast, 8) 291 | assert.Equal(t, 1, kLast) 292 | assert.Equal(t, []byte("2"), tmpLast) 293 | require.NoError(t, err) 294 | 295 | tmpOOB := make([]byte, 1) 296 | _, err = r.ReadAt(tmpOOB, 9) 297 | require.ErrorIs(t, err, io.EOF) 298 | 299 | assert.Equal(t, 6, k2) 300 | assert.Equal(t, []byte("ttest2"), tmp2[:k2]) 301 | 302 | sectionReader := io.NewSectionReader(r, 3, 4) 303 | tmp3, err := io.ReadAll(sectionReader) 304 | require.NoError(t, err) 305 | assert.Len(t, tmp3, 4) 306 | assert.Equal(t, []byte("ttes"), tmp3) 307 | }) 308 | } 309 | } 310 | 311 | func TestReaderEdgesParallel(t *testing.T) { 312 | dec, err := zstd.NewReader(nil) 313 | require.NoError(t, err) 314 | 315 | source := []byte(sourceString) 316 | for i, b := range [][]byte{checksum, noChecksum} { 317 | i := i 318 | b := b 319 | 320 | sr := &seekableBufferReaderAt{buf: b} 321 | r, err := NewReader(sr, dec) 322 | require.NoError(t, err) 323 | 324 | for n := int64(-1); n <= int64(len(source)); n++ { 325 | for m := int64(0); m <= int64(len(source)); m++ { 326 | n := n 327 | m := m 328 | t.Run(fmt.Sprintf("%d/len:%d/buf:%d", i, n, m), func(t *testing.T) { 329 | t.Parallel() 330 | 331 | tmp := make([]byte, m) 332 | k, err := r.ReadAt(tmp, n) 333 | if n < 0 && m != 0 { 334 | assert.Error(t, err, 335 | "%d: should return Error at %d: ret: %d, bytes: %+v", 336 | i, n, k, tmp) 337 | return 338 | } 339 | 340 | if m == 0 { 341 | require.NoError(t, err) 342 | assert.Equal(t, 0, k) 343 | assert.Equal(t, make([]byte, m), tmp) 344 | return 345 | } 346 | 347 | if n >= int64(len(source)) { 348 | require.ErrorIsf(t, err, io.EOF, 349 | "%d: should return EOF at %d, len(source): %d, len(tmp): %d, k: %d", 350 | i, n, len(source), m, k) 351 | assert.Equal(t, 0, k, "should not read anything at the end") 352 | return 353 | } 354 | if n+m <= int64(len(source)) { 355 | require.NoErrorf(t, err, 356 | "%d: should NOT return Err at %d, len(source): %d, len(tmp): %d, k: %d", 357 | i, n, len(source), m, k) 358 | } else { 359 | require.ErrorIsf(t, err, io.EOF, 360 | "%d: should return EOF at %d, len(source): %d, len(tmp): %d, k: %d", 361 | i, n, len(source), m, k) 362 | } 363 | assert.Equal(t, source[n:n+int64(k)], tmp[:k]) 364 | }) 365 | } 366 | } 367 | } 368 | } 369 | 370 | type fakeReadEnvironment struct{} 371 | 372 | func (s *fakeReadEnvironment) GetFrameByIndex(index env.FrameOffsetEntry) ([]byte, error) { 373 | switch index.ID { 374 | case 0: 375 | return checksum[:17], nil 376 | case 1: 377 | return checksum[17 : 17+18], nil 378 | default: 379 | return nil, fmt.Errorf("unknown index: %d, %+v", index.ID, index) 380 | } 381 | } 382 | 383 | func (s *fakeReadEnvironment) ReadFooter() ([]byte, error) { 384 | return checksum[len(checksum)-10:], nil 385 | } 386 | 387 | func (s *fakeReadEnvironment) ReadSkipFrame(skippableFrameOffset int64) ([]byte, error) { 388 | return checksum[len(checksum)-41:], nil 389 | } 390 | 391 | func TestReadEnvironment(t *testing.T) { 392 | t.Parallel() 393 | dec, err := zstd.NewReader(nil) 394 | require.NoError(t, err) 395 | defer dec.Close() 396 | 397 | r, err := NewReader(nil, dec, WithREnvironment(&fakeReadEnvironment{})) 398 | require.NoError(t, err) 399 | defer func() { require.NoError(t, r.Close()) }() 400 | 401 | bytes1 := []byte("test") 402 | bytes2 := []byte("test2") 403 | 404 | tmp := make([]byte, 4096) 405 | n, err := r.Read(tmp) 406 | require.NoError(t, err) 407 | assert.Equal(t, len(bytes1), n) 408 | assert.Equal(t, bytes1, tmp[:n]) 409 | 410 | m, err := r.Read(tmp) 411 | require.NoError(t, err) 412 | assert.Equal(t, len(bytes2), m) 413 | assert.Equal(t, bytes2, tmp[:m]) 414 | 415 | _, err = r.Read(tmp) 416 | require.ErrorIs(t, err, io.EOF) 417 | } 418 | 419 | func TestNoReaderAt(t *testing.T) { 420 | t.Parallel() 421 | 422 | dec, err := zstd.NewReader(nil) 423 | require.NoError(t, err) 424 | defer dec.Close() 425 | 426 | for _, sr := range []io.ReadSeeker{ 427 | &seekableBufferReader{seekableBufferReaderAt{buf: checksum}}, 428 | &seekableBufferReaderAt{buf: checksum}, 429 | } { 430 | sr := sr 431 | t.Run(fmt.Sprintf("%T", sr), func(t *testing.T) { 432 | r, err := NewReader(sr, dec) 433 | require.NoError(t, err) 434 | defer func() { require.NoError(t, r.Close()) }() 435 | 436 | tmp := make([]byte, 3) 437 | n, err := r.ReadAt(tmp, 5) 438 | require.NoError(t, err) 439 | assert.Equal(t, 3, n) 440 | assert.Equal(t, tmp[:n], []byte("est")) 441 | 442 | // If ReadAt is reading from an input source with a seek offset, 443 | // ReadAt should not affect nor be affected by the underlying seek offset. 444 | m, err := r.Seek(0, io.SeekCurrent) 445 | require.NoError(t, err) 446 | assert.Equal(t, int64(0), m) 447 | 448 | tmp = make([]byte, 4096) 449 | n, err = r.Read(tmp) 450 | require.NoError(t, err) 451 | assert.Equal(t, 4, n) 452 | assert.Equal(t, tmp[:n], []byte("test")) 453 | 454 | m, err = r.Seek(1, io.SeekCurrent) 455 | require.NoError(t, err) 456 | assert.Equal(t, int64(5), m) 457 | 458 | n, err = r.Read(tmp) 459 | require.NoError(t, err) 460 | assert.Equal(t, 4, n) 461 | assert.Equal(t, tmp[:n], []byte("est2")) 462 | 463 | _, err = r.Seek(-1, io.SeekStart) 464 | require.ErrorContains(t, err, "offset before the start of the file") 465 | 466 | _, err = r.Seek(0, 9999) 467 | assert.Errorf(t, err, "unknown whence: %d", 9999) 468 | 469 | _, err = r.Seek(999, io.SeekStart) 470 | require.NoError(t, err) 471 | 472 | _, err = r.Read(tmp) 473 | require.ErrorIs(t, err, io.EOF) 474 | }) 475 | } 476 | } 477 | 478 | func TestEmptyWriteRead(t *testing.T) { 479 | t.Parallel() 480 | 481 | enc, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedFastest)) 482 | require.NoError(t, err) 483 | 484 | var b bytes.Buffer 485 | bw := io.Writer(&b) 486 | w, err := NewWriter(bw, enc) 487 | require.NoError(t, err) 488 | 489 | bytes1 := []byte("") 490 | bytesWritten1, err := w.Write(bytes1) 491 | require.NoError(t, err) 492 | assert.Equal(t, 0, bytesWritten1) 493 | 494 | err = w.Close() 495 | require.NoError(t, err) 496 | 497 | dec1, err := zstd.NewReader(nil) 498 | require.NoError(t, err) 499 | 500 | // test seekable decompression 501 | compressed := b.Bytes() 502 | 503 | sr := &seekableBufferReaderAt{buf: compressed} 504 | r, err := NewReader(sr, dec1) 505 | require.NoError(t, err) 506 | defer func() { require.NoError(t, r.Close()) }() 507 | 508 | tmp1 := make([]byte, 1) 509 | n, err := r.Read(tmp1) 510 | require.ErrorIs(t, err, io.EOF) 511 | assert.Equal(t, 0, n) 512 | 513 | // test native decompression 514 | dec2, err := zstd.NewReader(bytes.NewReader(compressed)) 515 | require.NoError(t, err) 516 | defer dec2.Close() 517 | 518 | tmp2 := make([]byte, 1) 519 | n, err = dec2.Read(tmp2) 520 | require.ErrorIs(t, err, io.EOF) 521 | assert.Equal(t, 0, n) 522 | } 523 | 524 | func TestSeekTableParsing(t *testing.T) { 525 | var err error 526 | var stf seekTableFooter 527 | 528 | t.Parallel() 529 | 530 | // Checksum. 531 | err = stf.UnmarshalBinary([]byte{ 532 | 0x00, 0x00, 0x00, 0x00, 533 | 1 << 7, 534 | 0xb1, 0xea, 0x92, 0x8f, 535 | }) 536 | require.NoError(t, err) 537 | 538 | // No checksum. 539 | err = stf.UnmarshalBinary([]byte{ 540 | 0x00, 0x00, 0x00, 0x00, 541 | 0x00, 542 | 0xb1, 0xea, 0x92, 0x8f, 543 | }) 544 | require.NoError(t, err) 545 | 546 | // Unused bits. 547 | require.NoError(t, err) 548 | err = stf.UnmarshalBinary([]byte{ 549 | 0x00, 0x00, 0x00, 0x00, 550 | (1 << 7) + 0x01 + 0x2, 551 | 0xb1, 0xea, 0x92, 0x8f, 552 | }) 553 | require.NoError(t, err) 554 | 555 | // Reserved bits. 556 | err = stf.UnmarshalBinary([]byte{ 557 | 0x00, 0x00, 0x00, 0x00, 558 | 0x84, 559 | 0xb1, 0xea, 0x92, 0x8f, 560 | }) 561 | require.ErrorContains(t, err, "footer reserved bits") 562 | err = stf.UnmarshalBinary([]byte{ 563 | 0x00, 0x00, 0x00, 0x00, 564 | 0x80 + 0x40, 565 | 0xb1, 0xea, 0x92, 0x8f, 566 | }) 567 | require.ErrorContains(t, err, "footer reserved bits") 568 | 569 | // Size. 570 | err = stf.UnmarshalBinary([]byte{ 571 | 0xb1, 0xea, 0x92, 0x8f, 572 | }) 573 | require.ErrorContains(t, err, "footer length mismatch") 574 | 575 | // Magic. 576 | err = stf.UnmarshalBinary([]byte{ 577 | 0x00, 0x00, 0x00, 0x00, 578 | 0x80, 579 | 0xea, 0x92, 0x8f, 0xb1, 580 | }) 581 | require.ErrorContains(t, err, "footer magic mismatch") 582 | } 583 | func TestNilReaderNoEnvironment(t *testing.T) { 584 | t.Parallel() 585 | 586 | dec, err := zstd.NewReader(nil) 587 | require.NoError(t, err) 588 | defer dec.Close() 589 | 590 | r, err := NewReader(nil, dec) 591 | require.Error(t, err) 592 | assert.Nil(t, r) 593 | } 594 | -------------------------------------------------------------------------------- /pkg/seekable.go: -------------------------------------------------------------------------------- 1 | package seekable 2 | 3 | import ( 4 | "encoding/binary" 5 | "fmt" 6 | "math" 7 | 8 | "go.uber.org/zap/zapcore" 9 | ) 10 | 11 | const ( 12 | /* 13 | The format consists of a number of frames (Zstandard compressed frames and skippable frames), followed by a final skippable frame at the end containing the seek table. 14 | 15 | Seek Table Format 16 | 17 | The structure of the seek table frame is as follows: 18 | 19 | |`Skippable_Magic_Number`|`Frame_Size`|`[Seek_Table_Entries]`|`Seek_Table_Footer`| 20 | |------------------------|------------|----------------------|-------------------| 21 | | 4 bytes | 4 bytes | 8-12 bytes each | 9 bytes | 22 | 23 | Skippable_Magic_Number 24 | 25 | Value: 0x184D2A5E. 26 | This is for compatibility with Zstandard skippable frames: https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#skippable-frames. 27 | 28 | Since it is legal for other Zstandard skippable frames to use the same 29 | magic number, it is not recommended for a decoder to recognize frames 30 | solely on this. 31 | 32 | Frame_Size 33 | 34 | The total size of the skippable frame, not including the `Skippable_Magic_Number` or `Frame_Size`. 35 | This is for compatibility with Zstandard skippable frames: https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#skippable-frames. 36 | 37 | https://github.com/facebook/zstd/blob/dev/contrib/seekable_format/zstd_seekable_compression_format.md 38 | */ 39 | skippableFrameMagic uint32 = 0x184D2A50 40 | 41 | seekableMagicNumber uint32 = 0x8F92EAB1 42 | 43 | seekTableFooterOffset = 9 44 | 45 | frameSizeFieldSize = 4 46 | skippableMagicNumberFieldSize = 4 47 | 48 | // maxFrameSize is the maximum framesize supported by decoder. This is to prevent OOMs due to untrusted input. 49 | maxDecoderFrameSize = 128 << 20 50 | 51 | seekableTag = 0xE 52 | 53 | // maximum size of a single frame 54 | maxChunkSize int64 = math.MaxUint32 55 | 56 | // maximum number of frames in a seekable stream 57 | maxNumberOfFrames int64 = math.MaxUint32 58 | ) 59 | 60 | /* 61 | seekTableDescriptor is a Go representation of a bitfield. 62 | 63 | A bitfield describing the format of the seek table. 64 | 65 | | Bit number | Field name | 66 | | ---------- | ---------- | 67 | | 7 | `Checksum_Flag` | 68 | | 6-2 | `Reserved_Bits` | 69 | | 1-0 | `Unused_Bits` | 70 | 71 | While only `Checksum_Flag` currently exists, there are 7 other bits in this field that can be used for future changes to the format, 72 | for example the addition of inline dictionaries. 73 | 74 | `Reserved_Bits` are not currently used but may be used in the future for breaking changes, 75 | so a compliant decoder should ensure they are set to 0. 76 | 77 | `Unused_Bits` may be used in the future for non-breaking changes, 78 | so a compliant decoder should not interpret these bits. 79 | */ 80 | type seekTableDescriptor struct { 81 | // If the checksum flag is set, each of the seek table entries contains a 4 byte checksum 82 | // of the uncompressed data contained in its frame. 83 | ChecksumFlag bool 84 | } 85 | 86 | func (d *seekTableDescriptor) MarshalLogObject(enc zapcore.ObjectEncoder) error { 87 | enc.AddBool("ChecksumFlag", d.ChecksumFlag) 88 | return nil 89 | } 90 | 91 | /* 92 | seekTableFooter is the footer of a seekable ZSTD stream. 93 | 94 | The seek table footer format is as follows: 95 | 96 | |`Number_Of_Frames`|`Seek_Table_Descriptor`|`Seekable_Magic_Number`| 97 | |------------------|-----------------------|-----------------------| 98 | | 4 bytes | 1 byte | 4 bytes | 99 | 100 | https://github.com/facebook/zstd/blob/dev/contrib/seekable_format/zstd_seekable_compression_format.md#seek_table_footer 101 | */ 102 | type seekTableFooter struct { 103 | // The number of stored frames in the data. 104 | NumberOfFrames uint32 105 | // A bitfield describing the format of the seek table. 106 | SeekTableDescriptor seekTableDescriptor 107 | // Value : 0x8F92EAB1. 108 | SeekableMagicNumber uint32 109 | } 110 | 111 | func (f *seekTableFooter) marshalBinaryInline(dst []byte) { 112 | binary.LittleEndian.PutUint32(dst[0:], f.NumberOfFrames) 113 | if f.SeekTableDescriptor.ChecksumFlag { 114 | dst[4] |= 1 << 7 115 | } 116 | binary.LittleEndian.PutUint32(dst[5:], seekableMagicNumber) 117 | } 118 | 119 | func (f *seekTableFooter) MarshalBinary() ([]byte, error) { 120 | dst := make([]byte, seekTableFooterOffset) 121 | f.marshalBinaryInline(dst) 122 | return dst, nil 123 | } 124 | 125 | func (f *seekTableFooter) MarshalLogObject(enc zapcore.ObjectEncoder) error { 126 | enc.AddUint32("NumberOfFrames", f.NumberOfFrames) 127 | if err := enc.AddObject("SeekTableDescriptor", &f.SeekTableDescriptor); err != nil { 128 | return err 129 | } 130 | enc.AddUint32("SeekableMagicNumber", f.SeekableMagicNumber) 131 | return nil 132 | } 133 | 134 | func (f *seekTableFooter) UnmarshalBinary(p []byte) error { 135 | if len(p) != seekTableFooterOffset { 136 | return fmt.Errorf("footer length mismatch %d vs %d", len(p), seekTableFooterOffset) 137 | } 138 | // Check that reserved bits are set to 0. 139 | reservedBits := (p[4] << 1) >> 3 140 | if reservedBits != 0 { 141 | return fmt.Errorf("footer reserved bits %d != 0", reservedBits) 142 | } 143 | f.NumberOfFrames = binary.LittleEndian.Uint32(p[0:]) 144 | f.SeekTableDescriptor.ChecksumFlag = (p[4] & (1 << 7)) > 0 145 | f.SeekableMagicNumber = binary.LittleEndian.Uint32(p[5:]) 146 | if f.SeekableMagicNumber != seekableMagicNumber { 147 | return fmt.Errorf("footer magic mismatch %d vs %d", f.SeekableMagicNumber, seekableMagicNumber) 148 | } 149 | return nil 150 | } 151 | 152 | /* 153 | seekTableEntry is an element of the Seek Table describing each of the ZSTD-compressed frames in the stream. 154 | 155 | `Seek_Table_Entries` consists of `Number_Of_Frames` (one for each frame in the data, not including the seek table frame) entries of the following form, in sequence: 156 | 157 | |`Compressed_Size`|`Decompressed_Size`|`[Checksum]`| 158 | |-----------------|-------------------|------------| 159 | | 4 bytes | 4 bytes | 4 bytes | 160 | 161 | https://github.com/facebook/zstd/blob/dev/contrib/seekable_format/zstd_seekable_compression_format.md#seek_table_entries 162 | */ 163 | type seekTableEntry struct { 164 | // The compressed size of the frame. 165 | // The cumulative sum of the `Compressed_Size` fields of frames `0` to `i` gives the offset in the compressed file of frame `i+1`. 166 | CompressedSize uint32 167 | // The size of the decompressed data contained in the frame. For skippable or otherwise empty frames, this value is 0. 168 | DecompressedSize uint32 169 | // Only present if `Checksum_Flag` is set in the `Seek_Table_Descriptor`. Value : the least significant 32 bits of the XXH64 digest of the uncompressed data, stored in little-endian format. 170 | Checksum uint32 171 | } 172 | 173 | func (e *seekTableEntry) marshalBinaryInline(dst []byte) { 174 | binary.LittleEndian.PutUint32(dst[0:], e.CompressedSize) 175 | binary.LittleEndian.PutUint32(dst[4:], e.DecompressedSize) 176 | binary.LittleEndian.PutUint32(dst[8:], e.Checksum) 177 | } 178 | 179 | func (e *seekTableEntry) MarshalBinary() ([]byte, error) { 180 | dst := make([]byte, 12) 181 | e.marshalBinaryInline(dst) 182 | return dst, nil 183 | } 184 | 185 | func (e *seekTableEntry) MarshalLogObject(enc zapcore.ObjectEncoder) error { 186 | enc.AddUint32("CompressedSize", e.CompressedSize) 187 | enc.AddUint32("DecompressedSize", e.DecompressedSize) 188 | enc.AddUint32("Checksum", e.Checksum) 189 | return nil 190 | } 191 | 192 | func (e *seekTableEntry) UnmarshalBinary(p []byte) error { 193 | if len(p) < 8 { 194 | return fmt.Errorf("entry length mismatch %d vs %d", len(p), 8) 195 | } 196 | e.CompressedSize = binary.LittleEndian.Uint32(p[0:]) 197 | e.DecompressedSize = binary.LittleEndian.Uint32(p[4:]) 198 | if len(p) >= 12 { 199 | e.Checksum = binary.LittleEndian.Uint32(p[8:]) 200 | } 201 | return nil 202 | } 203 | 204 | /* 205 | createSkippableFrame returns a payload formatted as a ZSDT skippable frame. 206 | 207 | | `Magic_Number` | `Frame_Size` | `User_Data` | 208 | |:--------------:|:------------:|:-----------:| 209 | | 4 bytes | 4 bytes | n bytes | 210 | 211 | Skippable frames allow the insertion of user-defined metadata 212 | into a flow of concatenated frames. 213 | 214 | Magic_Number 215 | 216 | 4 Bytes, __little-endian__ format. 217 | Value : 0x184D2A5?, which means any value from 0x184D2A50 to 0x184D2A5F. 218 | All 16 values are valid to identify a skippable frame. 219 | This specification doesn't detail any specific tagging for skippable frames. 220 | 221 | Frame_Size 222 | 223 | This is the size, in bytes, of the following `User_Data` 224 | (without including the magic number nor the size field itself). 225 | This field is represented using 4 Bytes, __little-endian__ format, unsigned 32-bits. 226 | This means `User_Data` can’t be bigger than (2^32-1) bytes. 227 | 228 | User_Data 229 | 230 | The `User_Data` can be anything. Data will just be skipped by the decoder. 231 | 232 | https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#skippable-frames 233 | */ 234 | func createSkippableFrame(tag uint32, payload []byte) ([]byte, error) { 235 | if len(payload) == 0 { 236 | return nil, nil 237 | } 238 | 239 | if tag > 0xf { 240 | return nil, fmt.Errorf("requested tag (%d) > 0xf", tag) 241 | } 242 | 243 | if int64(len(payload)) > maxChunkSize { 244 | return nil, fmt.Errorf("requested skippable frame size (%d) > max uint32", len(payload)) 245 | } 246 | 247 | dst := make([]byte, 8, len(payload)+8) 248 | binary.LittleEndian.PutUint32(dst[0:], skippableFrameMagic+tag) 249 | binary.LittleEndian.PutUint32(dst[4:], uint32(len(payload))) 250 | return append(dst, payload...), nil 251 | } 252 | -------------------------------------------------------------------------------- /pkg/seekable_fuzz_test.go: -------------------------------------------------------------------------------- 1 | //go:build go1.18 2 | // +build go1.18 3 | 4 | package seekable 5 | 6 | import ( 7 | "bufio" 8 | "bytes" 9 | "errors" 10 | "io" 11 | "math/rand" 12 | "testing" 13 | 14 | "github.com/klauspost/compress/zstd" 15 | "github.com/stretchr/testify/assert" 16 | "github.com/stretchr/testify/require" 17 | ) 18 | 19 | func FuzzRoundTrip(f *testing.F) { 20 | dec, err := zstd.NewReader(nil) 21 | require.NoError(f, err) 22 | defer dec.Close() 23 | 24 | enc, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedDefault)) 25 | require.NoError(f, err) 26 | defer func() { require.NoError(f, enc.Close()) }() 27 | 28 | f.Add(int64(1), uint8(0), int16(1), int8(io.SeekStart)) 29 | f.Add(int64(10), uint8(1), int16(2), int8(io.SeekEnd)) 30 | f.Add(int64(111), uint8(2), int16(3), int8(io.SeekCurrent)) 31 | 32 | f.Fuzz(func(t *testing.T, seed int64, frames uint8, l int16, whence int8) { 33 | var b bytes.Buffer 34 | bufWriter := bufio.NewWriter(&b) 35 | 36 | w, err := NewWriter(bufWriter, enc) 37 | require.NoError(t, err) 38 | 39 | total := int16(0) 40 | rng := rand.New(rand.NewSource(seed)) 41 | for i := 0; i < int(frames); i++ { 42 | sz := rng.Int63n(100) 43 | total += int16(sz) 44 | 45 | rndBuf := make([]byte, sz) 46 | 47 | _, err := rng.Read(rndBuf) 48 | require.NoError(t, err) 49 | 50 | _, err = w.Write(rndBuf) 51 | require.NoError(t, err) 52 | } 53 | err = w.Close() 54 | require.NoError(t, err) 55 | 56 | err = bufWriter.Flush() 57 | require.NoError(t, err) 58 | 59 | r, err := NewReader(bytes.NewReader(b.Bytes()), dec) 60 | require.NoError(t, err) 61 | defer func() { require.NoError(t, r.Close()) }() 62 | 63 | off := rng.Int63n(1+4*int64(total)) - 2*int64(total) 64 | i, err := r.Seek(off, int(whence)) 65 | if err != nil { 66 | return 67 | } 68 | 69 | if l > total || l < 0 { 70 | l = total 71 | } 72 | buf1 := make([]byte, l) 73 | 74 | n, err := r.Read(buf1) 75 | if err != nil && !errors.Is(err, io.EOF) { 76 | return 77 | } 78 | 79 | buf2 := make([]byte, n) 80 | m, err := r.ReadAt(buf2, i) 81 | // t.Logf("off: %d, l: %d, whence: %d, i: %d, n: %d, m: %d", off, l, whence, i, n, m) 82 | 83 | if !errors.Is(err, io.EOF) { 84 | require.NoError(t, err) 85 | } 86 | 87 | assert.Equal(t, m, n) 88 | assert.Equal(t, buf1[:n], buf2) 89 | }) 90 | } 91 | -------------------------------------------------------------------------------- /pkg/seekable_test.go: -------------------------------------------------------------------------------- 1 | package seekable 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "os" 7 | "strconv" 8 | "testing" 9 | 10 | "github.com/klauspost/compress/zstd" 11 | "github.com/stretchr/testify/assert" 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | type bytesErr struct { 16 | tag uint32 17 | input []byte 18 | expectedBytes []byte 19 | expectedErr error 20 | } 21 | 22 | func TestCreateSkippableFrame(t *testing.T) { 23 | t.Parallel() 24 | 25 | dec, err := zstd.NewReader(nil) 26 | require.NoError(t, err) 27 | 28 | for i, tab := range []bytesErr{ 29 | { 30 | tag: 0x00, 31 | input: []byte{}, 32 | expectedBytes: nil, 33 | expectedErr: nil, 34 | }, { 35 | tag: 0x01, 36 | input: []byte{'T'}, 37 | expectedBytes: []byte{0x51, 0x2a, 0x4d, 0x18, 0x01, 0x00, 0x00, 0x00, 'T'}, 38 | expectedErr: nil, 39 | }, { 40 | tag: 0xff, 41 | input: []byte{'T'}, 42 | expectedBytes: nil, 43 | expectedErr: fmt.Errorf("requested tag (255) > 0xf"), 44 | }, 45 | } { 46 | tab := tab 47 | t.Run(strconv.Itoa(i), func(t *testing.T) { 48 | t.Parallel() 49 | actualBytes, err := createSkippableFrame(tab.tag, tab.input) 50 | assert.Equal(t, tab.expectedErr, err, "createSkippableFrame err does not match expected") 51 | if tab.expectedErr == nil && err == nil { 52 | assert.Equal(t, tab.expectedBytes, actualBytes, "createSkippableFrame output does not match expected") 53 | decodedBytes, err := dec.DecodeAll(actualBytes, nil) 54 | require.NoError(t, err) 55 | assert.Equal(t, []byte(nil), decodedBytes) 56 | } 57 | }) 58 | } 59 | } 60 | 61 | func TestIntercompat(t *testing.T) { 62 | t.Parallel() 63 | 64 | dec, err := zstd.NewReader(nil) 65 | require.NoError(t, err) 66 | 67 | for _, fn := range []string{ 68 | // t2sz README.md -l 22 -s 1024 -o intercompat-t2sz.zst 69 | "intercompat-t2sz.zst", 70 | // go run ./cmd/zstdseek -- \ 71 | // -f $(realpath README.md) -o $(realpath intercompat-zstdseek_v0.zst) \ 72 | // -c 1:1 -t -q 13 73 | "intercompat-zstdseek_v0.zst", 74 | } { 75 | fn := fn 76 | t.Run(fn, func(t *testing.T) { 77 | t.Parallel() 78 | 79 | f, err := os.Open(fmt.Sprintf("./testdata/%s", fn)) 80 | require.NoError(t, err) 81 | defer func() { require.NoError(t, f.Close()) }() 82 | 83 | r, err := NewReader(f, dec) 84 | require.NoError(t, err) 85 | defer func() { require.NoError(t, r.Close()) }() 86 | 87 | buf := make([]byte, 4000) 88 | n, err := r.Read(buf) 89 | require.NoError(t, err) 90 | assert.Equal(t, 1024, n) 91 | assert.Equal(t, []byte(" [![License]"), buf[:13]) 92 | 93 | all, err := io.ReadAll(r) 94 | require.NoError(t, err) 95 | assert.Greater(t, len(all), 1024) 96 | 97 | i, err := r.Seek(-47, io.SeekEnd) 98 | require.NoError(t, err) 99 | assert.Greater(t, i, int64(1024)) 100 | 101 | n, err = r.ReadAt(buf, i) 102 | require.ErrorIs(t, err, io.EOF) 103 | assert.Equal(t, 47, n) 104 | assert.Equal(t, []byte("[license]: https://opensource.org/licenses/MIT\n"), buf[:n]) 105 | }) 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /pkg/testdata/fuzz/FuzzReader/031af68894ffe89e8860b612d9341b8f4270ec992df264751325ec4828ff8ad4: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("000000000") 3 | int64(96) 4 | byte(' ') 5 | int(1) 6 | -------------------------------------------------------------------------------- /pkg/testdata/fuzz/FuzzReader/0bed2d41cfe3d295a3e183a3a0e658c19f9fd792f9b79310f698a4d6f517a7a6: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("^*M\x18!\x00\x00\x00\x00\x00\x00\x0000000000000000000000\x02\x00\x00\x00\x80\xb1\ua48f") 3 | int64(0) 4 | byte('[') 5 | int(1) 6 | -------------------------------------------------------------------------------- /pkg/testdata/fuzz/FuzzReader/2731075ae7d169285366e009d02826aba105e7470063f29bcf059171d9e76074: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("^*M\x180000000000000000000000000000\x02\x00\x00\x00\x80\xb1\ua48f") 3 | int64(28) 4 | byte(',') 5 | int(1) 6 | -------------------------------------------------------------------------------- /pkg/testdata/fuzz/FuzzReader/302d18d199dcc9f0f854687128e6e51f686d130ab36266e2f9cfc98a0bb0d105: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("0") 3 | int64(-93) 4 | byte('\x01') 5 | int(-18) 6 | -------------------------------------------------------------------------------- /pkg/testdata/fuzz/FuzzReader/3f17aa2f634b16b95c0cfbcad9894f23255ad4b0e1fc78f54146ac260330d86d: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("(\xb5/\xfdD\b\x00\x03\xa5\x11\x00\x12df#\x10o\u074c]/4\xa6\xb5\x1a\xdd^\xb6\xdf\a\u007f\x1dZc\xbb\x83\x1c\xc6\x060j}\xb9ڪ\xaa\x86\x06\n\a\xeaI\xd0\xc8ń\tƔD\x9d\xa1\x83qQ\xb3U\xd2\xc7.\x98ҭ\tl\xdf\x0e\x98\x89!\x00\xb3H\xb81\t\x02\xa4e\x8a\xf2\xae\xef\xed\xe0\xa9Z\x90\x04\xaa\x84*\xb4>vр\xfa}\x18\x80\x1b\x93\t]`\x92\xa0\x91ݚ\x03.*c\nc0\x00\x18\xa3yg˯\x06#\f\x00\x96d\xb5\x8c'\b\x82X\xa3\xf5\xe3&\xd4\nQ\xd3\xc5M}j䗾\xdb\x0f\x8c)\xf5\x0e\f\xe2iY\x96\x8a\x8f\x18\x94jP\x89\x06\xc9k\xc9\xd7)y\x82\x18\x83\xb0\xbdDŽ\x14\x10`\x8c[h]oї\xefWč\x1d\xb6\xdfT\x1c\x0fG\x12\x89\x17\xe5ȧq%}|dGR\xbf\x13\xf9\x14Z\x17;Z)\x1a\xfc\xa8\xdd,\x8f\xad\xff\x9b\xb6\xbbR\xa3\x8c\xcde2^5\xed\xf7E\xf1W\x97$N\x85+\xe5\xd3x\xe5\x14ׯ$[_\xd3Nwu\xaee\xc5/\xb6Ζ\xaeo%\xb8\xad\xa4\xe8\xd6\xd1\xc2\xf4\xa5\x1a\x18c\xa6\xd8\xe0S\xe5ULDD\x13m\u007fd\xd7H\xe8k\"\x86Q\xf3\xde+\x9a\bs\xa4\xaeIMŎ\x89.\xeawj\x9a\x86[\xffT\xf2@\x1c\xc6\b\x19\xf7\xdbG\xad\x14Z\xd5淪Y\xca\xc8\xeb\xaemU\xecRa\xd4\"\xaa\x9e\x8f\x06\bD\n\xb0\xa7\x9d\xe7\x9b-e\xc8u\xeb\x9cǯo\x0f\xc9\a\xf7*p\xfc\x1c?\xe7q\xfc`\x84癪]\xa0\xf3\xa8Gu\xab\xe0\x048\x99\x01? \x00B\xc2 \xb4\x0e\x17e\xcd\xdfu\x1e\xba\xf3\x90\xcb`\x17\xca\\]\x1dM\x1a\xd0\xf3C\x13\f\x1d*\x19Ih\x0f\xbc.H\xa9y\xee\x97\x1f\xd6Gvf\xea\vե\xc7\xccr$Nw=\xd2\xd9\xd7/\xcf4@vX\xb94\x11\xa7\x84us\x8a\xfb7w\t[5Y\xe7\xb5Be#\xf8\xc1\xe1\\,\xb1sႬ\xecjT\x02?\x19X\xc4&r\x86\xb3\x06кč\xde\xe3R\x02\x024\x1e\x83\x1b\xa8=\x87\xfc\x02\x10\x87\xa0\tFCZ\x87}\xb9ʼn\x01L4\xb77J\xd18\x80:F\x82\x92X(\xb5/\xfdD\b\x00\x03]\x0f\x00B\xdaM%\x00\x91V\x00\xfc\x06\xb1\xf7\x83,\xa6\n5\xba\xb4\xbb\x02ř'X\x0e;\xe2\x8fK\x81\x90M\xef\x10\xf9\xff\x9fA \x80\x11\t\xbd\xd4J\xb1\n\xbd\xc5\xed\xe5\x02+j\x05\x89\b\n\x8a\xe93`\x84\x15\xe3\xea,\xce\x0e{raB;\x06\x02\xec\xd9U\x90\x91\xb4}\xf5G\xec\xc3r.\x02\x00 Lj\x1a\b\xc6bᡏ\x01;\xac\x1e<\xbb\x1a\x02;앳\xec\x9f\x1d\xe9tW\xf4\xe7\xbd\xf4\x17\xa1d\xaf\xd2\fO$\xd1ߚSkm-\xc9#'\xed\xab\x84n\xf5`\xdfLM\x98\xb9T\xf5\xe5|>\xe4=U\xe1-]\xf4\xdc!֩\xeaI\xc2G\bR9/\xf4k\n]\xa1\x1fIHݚct\x91gu\xfd\xb4Sr\x9d\xd2S\xc1\x0f\x04\x87\xd5Zc\xab\xba\xfa\xa9)\x8a\x89\x92\x9ft>\x9d1a\xc3\xe1X\rĞ\x8c'N\xc5F\x03y\xbc\xb8s\xc2v\x1aȌ\xf1\r\x10\xa6\xf1\x8a\x01U\x00\x80\x8a?I\xb7\x9eH\f٩\xf0Ni=\xb4\x92\x18*\xb2\x84v\xd5=;\x12Ykc5\xbbg\xd1U\xc7\xf0L3:\x96\x86#\x94\x9e<\xbc{Kp5\x91\xbc\x1d\x89\xed+܉\xcc\x1d\x9eG\xc8U\x0fH \x00@`\fs\xd9\x03\x8c\x8caF)\xb8\x16\x98g\x00-\xa3h\b\xe5l\x00(\rj\vH88\xb6c\xee\x82s\xe28\x88!a\x1c\x1e\xf8\xba\x96N<\n\xbbYp,\x06\tI\x04\xe0\x852\xcdH \xee\x02\x1d\xf0\xadś\x1b5\x01t$\x01w \xbc\xa9\xcb\x18\xbbv0֙\xe5罭\xe5\"\xfb\xa0f\xb7\x180\v\x9b\xfb\xba7\x04\xfcTs\x0e0\xb4\xb1\xa5\x82+(\xce\xf0\x84&\x11dۚ]:\x83\x13\x91\xb9\xe0A>\x9bxB\x921\x86\xbdr\xfcD\x0e\x05lҕ\xff6\xd3\x19\x8eF\x82\xe8\xa9\xe5\x854p\x98\x17\xeb\xa0\xf8\xa5\"\x9eb\xca\xe3\v\xef\x01\x15SO\xf8(\xb5/\xfdD\b\x00\x03\xcd\r\x00\x92\x9bQ$ \x8f\xf3\x8c\xfaGL\xb4\xa5\xed\xed\x06adS\xe5.\xce\xdb\xc0\xf8=D\"\x01N\x12/g3*\x84\x00:P\x10/\xf1\x9b\xdd$uƏV\xfc%\x9e\xbc\xd2Α084\xf8d\x0f\x9b'噌&\xbf\xc4o\xf6DA\xba\xcf+\xf5\x89\xe9DI˳\x8e,X\xf3ź\xf6\x17\xef\bQ\xbf\xb4\x97|ַm>\x97\x0e\xc4\xfc\xceEu\x10L\x91\xb0e#,\xe1\xbafa\xc8\\2\tM\xadA~\xaa\x95\xb2 \x132\x9a\xces8Q\xeay\x9e\xee]\x0fˀ\x80\x83\x89M\xe3h\xc0\xa6\x1e\xb1\xbc\x9b\x17\x1c\xccĄ\xe9\xadT$\xb0\xe7j\xe1\x1a+\xceǐ\xe96`>\xbbW\xa9\x86\xe1\xf7>`\x8f\xe1\xa2\x1c\x19\xaa\f\xd5P\xcaLo]\x05&\xbd\x01\x14\xc6\xea)\xad\x82F\x9c\xaa\x89̅\x1f\xd8*\x80,Jy=\x1eC\xd5T\xba5\xe2s=\b\x18\x889¢\xa2\f\x97!\xe8Bikd\xb5\xa6\\>ϣ\x14$Z\xa6\xf5\x17\x82$\"\xb1\xe0쵬]?W\x9d\xcb\u007f5\xcf\xd1\xd2\xe8p\xbb\x9d\xa6\t}\x00\x8cX݈z\xa4\x90|\xfe3\x9d\"\u007f\x14\xcfa\xe3_n\xa6\xcdy\xbb\x06\xeb_4\xc8\xdd\xf1\xdb\xd8\"\u007f\xccU(u\xca\x16*\x00@\x18 \"\x03\x9e\x95ͩ|\x19\x90L\x8d\xac\xbb>C@\x06\x03\b\x19PH\x00\x18\t\xa0\x11\x02\x157\xd2\xc1\xee\xf1\x11\x163&\xa8a\t^\x1b|\"L\xe0\xb1\x0ec\"\n)\x14Xqú\r`.\xea\xe2m=\x84\x97\v\x17\xacOz\u058b\xa0$\xc2-\x9b\x81\xb9k\xa5\x90L\x84k\x84\x85\x15\xfel\xc5*\x92'c\xa1K\xb3\xd8P.*\x85Y\x06$\xd9f\xcd(\xb5/\xfd\x04\x009\x00\x00es/MIT\n\x01\xf0\xca\xd5^*M\x189\x00\x00\x00C\x02\x00\x00\x00\x04\x00\x00F\x82\x92X\xfa\x01\x00\x00\x00\x04\x00\x00\x15SO\xf8\xc8\x01\x00\x00\x00\x04\xde\x00$\xd9f\xcd\x14\x00\x00\x00\a\x00\x00\x00\x01\xf0\xca\xd5\x04\x00\x00\x00\x80\xb1\ua48f") 3 | int64(-120) 4 | byte('\u0083') 5 | int(2) 6 | -------------------------------------------------------------------------------- /pkg/testdata/fuzz/FuzzReader/55a6d9e48f240cf1cd588b5bf9aac882dbcfb765f4a0f2f0796bdb7271d6c1d2: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("(\xb5/\xfd\x04\x00!\x00\x00test9\x81g\xdb(\xb5/\xfd\x04\x00)\x00\x00test2\x87\xeb\x11q^*\x00\x00\x00\x04\x00\x00\x11M\x18!\x00\x00\x00\x009\x81g\xdb\x12\x00\x00\x00\x05\x00\x00\x00\x87\xeb\x11q\x02\x00\x00\x00\x80\xb1\ua48f") 3 | int64(-1) 4 | byte('\x02') 5 | int(2) 6 | -------------------------------------------------------------------------------- /pkg/testdata/fuzz/FuzzReader/6ac3e90e33489f6374af1d4a80567a49a4e33229f6944ff7a4a1ece24ee1a0ec: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("(\xb5/\xfdd\x00\x03\xc5\x11\x00\x96\xa5k# \x8ft\f]IJ\x91l\x8b\x17Z\x93\xbbԉi\xb7c)0\xff\x19o\x88\xca\tt\x83\xfe\x80\x10x\xa5&d\x00[\x00b\x00\xef\x15N\x85\xf9Q\x96\x9031k\x9eJ\xfa\x8d\x1cg\xc0\xa9\u007fi\x99D$\x8b\x11\xf0\xed\xe7?\x9d\xd4Q\xd3\xe2\xe7\xd4\f\xe1\xe3uצ&f\xa8\xeeɊ\xaa#\xa4!\x02\x81\x8d5i\xc9\xe4\x9b)e\xc0e\xcb$\x13\xaf>?\x82\xcdmU\xc0\xb81n\x92\tc\b#L&Ѵ\x0e'\x99\xa8?e\xab \t\x90 \x06\xee\x9fvR\xfe\xb2\x1f\xa3\xb7\x9fڬ\xa9W\xa8\x0f\xbed\x88\x8dO\xa9\xfd\xeeh\x86\x90\xf1W\x87 Fu\va\xd3Wa\xf4֫\x05[[j\xa3\xbb2\x968q{٘\xe8\xdaV\x82\x9bB\x8a\xce~֡\xaf\u0530V\x13\xbdtM\x9965\x11\x11γ\xfd\x8f\x99#\xa1\xcdy\xd8=N\v\xff\x16)\xad\x14)\xaa\xb8\xa5G}\xdc\xd0w\xfba-\x95\xdc\x01\x1b\x8d\xb5(\x1c9\x1c\x8e\x8a\u007f\x18TjP\x89\x8c`\xe5\xc0\xd7\b9\xa2\xac\xe5\\\xbe\xbf\x04l4V\xf3\xc9\xd6QUsЃ\xb2\xd0\xc0\xe0\x91\xf9\x8b\xcc\x03\xf2X0z[y\x95\xf4\xc0\xa6o!\x85\xfc\xe3\aJ\xbf\x13\xd8\xd4Q\x15?\x10\x1bo\xa5\xc8\xea\x13k1\x8b:\xbbGÞb+\xa4m],\xa6[\x17\x97\x9f\aM\xc4\x0e`Y\xa0ۇ\x04\x01\xd0.\x86y\xd7\xf7\nsu^G\xc0\x90\b\x14\xf5H\xf9\x95\x93\x9e_\xae\xe5\xc6'\xb1Ȱ\x1d\xc0Z\xfe\xca<\xe1\xd9\xc5\x18\xb7q\x1b}\xfb\x93\xd0Dt\x92%\xce\x12\xb0/\xa90\x80K\tr\xd0+\x86\b*\x94C\n\x00a\xb8\x81\x89HOL\x9c\xc5\xe9\x00\x12\x8d\xba1\xad1tuF\x82\x92X(\xb5/\xfdd\x00\x03\x8d\x0f\x00\x06\x9cT&\x00\x8fX\a\f\xb6\x13\x12\xa9\xb1\x11\xbd@/\x84\rk\x1bF\xc5#S\xfc\x0f\xa6'\xe2\xf5pi\xc1Q\xfe\xffo\x15\x0f!I\x00H\x00E\x00r\x1b\x1c\xed\xb2}!I\"\x92ȷ\x9a\x8f\x1fA\x97\xeeP\x9c)Şz͊5}ƿ~\xf0\u007f<\x18\xcbQ\r\xf36\xf9\xfa1<\x15\x95\rv\xb9\x8c\xf3\x11s\xe6=\x15Y3ͽC5ގ,aYȳRz\xd7\x06R\x8auRJ]~\x8d\xef\xf8\x9df\xacw\xeeR\xbe\xe9\xb4\xdf[1\xa5\xdet\x9e*\x9cA\x800\xe7\\c\xfdk\xb7ު\x15I\x1d\xce\xf7\xadT&d4\x1a\xac\x91\xdacy\xe4P4\xd8\xe5\x19\x8d\x83\x81\x92\xa9\xc4\xef4\x0e*\x03m\xaf7y\x04\x9dJ\xb9\xecP[\x99M\v\xcfK-\x94q6\xcbB\x1e\xd8\x15\xe3\x114-d\xb4r\xce1\x06\xed\xf4(ޞ\xf1\xae\x0fxݭ\xaaB\xd9\xea\xef\xa4\xf0+\x9d\xd7긟o\x82o\x1f\xd8Z\xfdj\xc6\r\x0f\xa8R\xba\xad&\xa9;yL\xa1ī^!\x02b\"\xf2\xba\x18QƮ\xbc\xdf\xe4\xddip.\xd0x\xc7@\xa0=\xcc\n\xa0dj\x81\x1c\xef\xb5\v\xb1V\xb3\x03s\x1e1\xb7\b\x00p4\xaa\x91\xafR\xb1\xf0Tf\xe0N냇\xd9\x1f\x0f\xd6\x10\xdc\t? B\x84\x1c\xc3\x1e\x8c\x8caF)\xf8\x16\x98\x14Ƚ(\x1cBI\x1b\x00n\x83\xda\x022\x0e\xce호\xe0\xac8^14\x8c\xeb\x03\\\xb7\x85\xa9\xb3\xea\xb8X\xf1\xf0j\x99\xe6\x12\xe8wC-\x12p\xeeʎБ\x05\xcc6\xe7\xbald@ǰa\x9cK\xb8}\x8f\xdf\xdex\x17\xb0P\x8bZ\x9a\x00#\x03[*\xb8\x82\xe2\fC\xc8,\xf6\xbb|\x06Ǒ\xf9\xe0At6\xf1\x84\xe4c\xccx\xe5x\x88\x1c\bئ+\xdfm&e8\x1a\t\"L-+D\x03W\x05\xe6\x82D<\xf5)\x8f\xaf\xb0\a\x15SO\xf8(\xb5/\xfdd\x00\x03\xf5\r\x00\xf6\\W%\x10\xaf\xb6\x01\x186>\x89\xa6\xb3{\x9b\x90:\x89\xba=\x15^\xafӻ\xe1L/НcMr\x87\xd8m\xd4&wqJ\x00L\x00Q\x00ϳ\x16dZ\x96\xf3\xf5\x802\x81\xd0\x17{1\xba\xfaQgT\u007fu\x9e\xa7\x95\xb0\xc7\u007f\x96\xd5\xe4N\xc2\xf9\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0у\x1bڪ\x8a\xd2\a\xbc\x89͝\xa4\"v\x13U\x8d\xcd\xf7G\x12\x94\xc7\u007f\x96\xd5\xe4N\xc2\xf9k\xfc\xcbͲA\xdeP{\xf335\xff\xa9\xad\xaajl\xc0\f\xbaXf\x84-\xb1{\xd7r\x1a~o\x03\xe6\xf8\xa9\xe4\x88ߺX\x8e\x94\xdc\xf4\xceM`\xd0\x1b`YhGJ#\x8c(.'j(\u0601\xbd\xf9\x19\x8d@TJV\x87㷜)wN<\xaa\x83\xa0\x91\x98',Z\x92߲D\xee\xa3vN\xb4\xb5\xe5\xf0\t\x90\x84-\x14\xc6\xec\x84\xe1[\xa8a\xf81\x87\fJUk\x90\xaf\xe6\xb4($\x02N\xf1,\xa7{ա\x11\x86b\x06\x04\x1aM\xe48\r\xc6k\xe9\x0f\xeaݬ\xd0h\"*M\xefe\x02\x81=\xcf\n\x88\xa1Qt\xfc\x9a\ue0b7,a\x1eO\xb1\x8bݠt\xc6N^\xfc)\xae\xfc\xd2h\xc3\xf7\xea\xe1dw\xfdk\x8e\x96gp\x8a\xfc\x14\xbbؓ;\xf6\xe4:\x9f\xd2+\x96\x95\x15]eA̳\x9e*Xsf\xa1\xf6\x14ޑ$\x9d\xa2=\xe5q\xbam\xd19|H\xcc\xff\xc8\xcc$ \x10\x14\xa64;\x00\x03\x04\v\xf0\xf0\x9a+_!ejdg\xa3\x1f\xc8\x03@\x19P\b\b\x97\x80\x861\xab\av\x84\x00\x10;#ކ\x840Z\x1e\xe5\x96\x11\x99\xc4\a\xeb6\xacB\x80\x99\xa6_\x11|\b\x8f\x83\x89\x03%\xbce\x04\x16\xbd\x96\x14D\x85\xb5\xc2\u007f+ۇ\xa3\xd1,6rQ)\xdc2$\xd9f\xcd(\xb5/\xfd$\a9\x00\x00es/MIT\n\x01\xf0\xca\xd5^*M\x18)\x00\x00\x00F\x02\x00\x00\x00\x04\x00\x00\xff\x01\x00\x00\x00\x04\x00\x00\xcc\x01\x00\x00\x00\x04\x00\x00\x14\x00\x00\x00\a\x00\x00\x00\x04\x00\x00\x00\x00\xb1\ua48f") 3 | int64(-98) 4 | byte('\b') 5 | int(2) 6 | -------------------------------------------------------------------------------- /pkg/testdata/fuzz/FuzzReader/724dfd4c280cae6fdb4a26c64c6084e0afeea42ba22a012a47eae7f132927a83: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("^*M\x18!\x00\x00\x00000000000000000000000000\x02\x00\x00\x00\x80\xb1\ua48f") 3 | int64(0) 4 | byte('[') 5 | int(1) 6 | -------------------------------------------------------------------------------- /pkg/testdata/fuzz/FuzzReader/785bc3dd4151628f17e2150b2e453f7e5e5d30b386810e6e2018229dd571a250: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("^*M\x18!\x00\x00\x000000\x00\x00\x00\x000000000\x0000000000\x02\x00\x00\x00\x80\xb1\ua48f") 3 | int64(56) 4 | byte('[') 5 | int(1) 6 | -------------------------------------------------------------------------------- /pkg/testdata/fuzz/FuzzReader/f42940a36e752debf0e50f41e949bc36d55a128b9331723b1c91da5248308762: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("(\xb5/\xfd\x04\x00!\x00\x00test9\x81g\xdb(\xb5/\xfd\x04\x00)\x00\x00test2\x87\xeb\x11q^*M\x18!\x00\x00\x00\x11\x00\x00\x00\x04\x00\x00\x009\x81g\xdb\x12\x00\x00\x00\x05\x00\x00\x00\x87\xeb\x11q\x02\x00s\x00\x80\xb1\ua48f") 3 | int64(-1) 4 | byte(',') 5 | int(2) 6 | -------------------------------------------------------------------------------- /pkg/testdata/fuzz/FuzzRoundTrip/4c7e5250c36d4db0: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | int64(1) 3 | byte(',') 4 | int16(1) 5 | int8(0) 6 | -------------------------------------------------------------------------------- /pkg/testdata/intercompat-t2sz.zst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/zstd-seekable-format-go/a50a9c6033718166a4c65d723d6c4ec16d5c61dd/pkg/testdata/intercompat-t2sz.zst -------------------------------------------------------------------------------- /pkg/testdata/intercompat-zstdseek_v0.zst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SaveTheRbtz/zstd-seekable-format-go/a50a9c6033718166a4c65d723d6c4ec16d5c61dd/pkg/testdata/intercompat-zstdseek_v0.zst -------------------------------------------------------------------------------- /pkg/writer.go: -------------------------------------------------------------------------------- 1 | package seekable 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "runtime" 8 | "sync" 9 | 10 | "golang.org/x/sync/errgroup" 11 | 12 | "go.uber.org/multierr" 13 | "go.uber.org/zap" 14 | 15 | "github.com/SaveTheRbtz/zstd-seekable-format-go/pkg/env" 16 | ) 17 | 18 | // writerEnvImpl is the environment implementation of for the underlying WriteCloser. 19 | type writerEnvImpl struct { 20 | w io.Writer 21 | } 22 | 23 | func (w *writerEnvImpl) WriteFrame(p []byte) (n int, err error) { 24 | return w.w.Write(p) 25 | } 26 | 27 | func (w *writerEnvImpl) WriteSeekTable(p []byte) (n int, err error) { 28 | return w.w.Write(p) 29 | } 30 | 31 | type writerImpl struct { 32 | enc ZSTDEncoder 33 | frameEntries []seekTableEntry 34 | 35 | logger *zap.Logger 36 | env env.WEnvironment 37 | 38 | once *sync.Once 39 | } 40 | 41 | var ( 42 | _ io.Writer = (*writerImpl)(nil) 43 | _ io.Closer = (*writerImpl)(nil) 44 | ) 45 | 46 | type Writer interface { 47 | // Write writes a chunk of data as a separate frame into the datastream. 48 | // 49 | // Note that Write does not do any coalescing nor splitting of data, 50 | // so each write will map to a separate ZSTD Frame. 51 | Write(src []byte) (int, error) 52 | 53 | // Close implement io.Closer interface. It writes the seek table footer 54 | // and releases occupied memory. 55 | // 56 | // Caller is still responsible to Close the underlying writer. 57 | Close() (err error) 58 | } 59 | 60 | // FrameSource returns one frame of data at a time. 61 | // When there are no more frames, returns nil. 62 | type FrameSource func() ([]byte, error) 63 | 64 | // ConcurrentWriter allows writing many frames concurrently 65 | type ConcurrentWriter interface { 66 | Writer 67 | 68 | // WriteMany writes many frames concurrently 69 | WriteMany(ctx context.Context, frameSource FrameSource, options ...WriteManyOption) error 70 | } 71 | 72 | // ZSTDEncoder is the compressor. Tested with github.com/klauspost/compress/zstd. 73 | type ZSTDEncoder interface { 74 | EncodeAll(src, dst []byte) []byte 75 | } 76 | 77 | // NewWriter wraps the passed io.Writer and Encoder into and indexed ZSTD stream. 78 | // Resulting stream then can be randomly accessed through the Reader and Decoder interfaces. 79 | func NewWriter(w io.Writer, encoder ZSTDEncoder, opts ...wOption) (ConcurrentWriter, error) { 80 | sw := writerImpl{ 81 | once: &sync.Once{}, 82 | enc: encoder, 83 | } 84 | 85 | sw.logger = zap.NewNop() 86 | for _, o := range opts { 87 | err := o(&sw) 88 | if err != nil { 89 | return nil, err 90 | } 91 | } 92 | 93 | if sw.env == nil { 94 | sw.env = &writerEnvImpl{ 95 | w: w, 96 | } 97 | } 98 | 99 | return &sw, nil 100 | } 101 | 102 | func (s *writerImpl) Write(src []byte) (int, error) { 103 | dst, err := s.Encode(src) 104 | if err != nil { 105 | return 0, err 106 | } 107 | 108 | n, err := s.env.WriteFrame(dst) 109 | if err != nil { 110 | return 0, err 111 | } 112 | if n != len(dst) { 113 | return 0, fmt.Errorf("partial write: %d out of %d", n, len(dst)) 114 | } 115 | 116 | return len(src), nil 117 | } 118 | 119 | func (s *writerImpl) Close() (err error) { 120 | s.once.Do(func() { 121 | err = multierr.Append(err, s.writeSeekTable()) 122 | }) 123 | return 124 | } 125 | 126 | type encodeResult struct { 127 | buf []byte 128 | entry seekTableEntry 129 | } 130 | 131 | func (s *writerImpl) writeManyEncoder(ctx context.Context, ch chan<- encodeResult, frame []byte) func() error { 132 | return func() error { 133 | dst, entry, err := s.encodeOne(frame) 134 | if err != nil { 135 | return fmt.Errorf("failed to encode frame: %w", err) 136 | } 137 | 138 | select { 139 | case <-ctx.Done(): 140 | // Fulfill our promise 141 | case ch <- encodeResult{dst, entry}: 142 | close(ch) 143 | } 144 | 145 | return nil 146 | } 147 | } 148 | 149 | func (s *writerImpl) writeManyProducer(ctx context.Context, frameSource FrameSource, g *errgroup.Group, queue chan<- chan encodeResult) func() error { 150 | return func() error { 151 | for { 152 | frame, err := frameSource() 153 | if err != nil { 154 | return fmt.Errorf("frame source failed: %w", err) 155 | } 156 | if frame == nil { 157 | close(queue) 158 | return nil 159 | } 160 | 161 | // Put a channel on the queue as a sort of promise. 162 | // This is a nice trick to keep our results ordered, even when compression 163 | // completes out-of-order. 164 | ch := make(chan encodeResult, 1) 165 | select { 166 | case <-ctx.Done(): 167 | return nil 168 | case queue <- ch: 169 | } 170 | 171 | g.Go(s.writeManyEncoder(ctx, ch, frame)) 172 | } 173 | } 174 | } 175 | 176 | func (s *writerImpl) writeManyConsumer(ctx context.Context, callback func(uint32), queue <-chan chan encodeResult) func() error { 177 | return func() error { 178 | for { 179 | var ch <-chan encodeResult 180 | select { 181 | case <-ctx.Done(): 182 | return nil 183 | case ch = <-queue: 184 | } 185 | if ch == nil { 186 | return nil 187 | } 188 | 189 | // Wait for the block to be complete 190 | var result encodeResult 191 | select { 192 | case <-ctx.Done(): 193 | return nil 194 | case result = <-ch: 195 | } 196 | 197 | n, err := s.env.WriteFrame(result.buf) 198 | if err != nil { 199 | return fmt.Errorf("failed to write compressed data: %w", err) 200 | } 201 | if n != len(result.buf) { 202 | return fmt.Errorf("partial write: %d out of %d", n, len(result.buf)) 203 | } 204 | s.frameEntries = append(s.frameEntries, result.entry) 205 | 206 | if callback != nil { 207 | callback(result.entry.DecompressedSize) 208 | } 209 | } 210 | } 211 | } 212 | 213 | func (s *writerImpl) WriteMany(ctx context.Context, frameSource FrameSource, options ...WriteManyOption) error { 214 | opts := writeManyOptions{concurrency: runtime.GOMAXPROCS(0)} 215 | for _, o := range options { 216 | if err := o(&opts); err != nil { 217 | return err // no wrap, these should be user-comprehensible 218 | } 219 | } 220 | 221 | g, gCtx := errgroup.WithContext(ctx) 222 | g.SetLimit(opts.concurrency + 2) // reader and writer 223 | // Add extra room in the queue, so we can keep throughput high even if blocks finish out of order 224 | queue := make(chan chan encodeResult, opts.concurrency*2) 225 | g.Go(s.writeManyProducer(gCtx, frameSource, g, queue)) 226 | g.Go(s.writeManyConsumer(gCtx, opts.writeCallback, queue)) 227 | return g.Wait() 228 | } 229 | 230 | func (s *writerImpl) writeSeekTable() error { 231 | seekTableBytes, err := s.EndStream() 232 | if err != nil { 233 | return err 234 | } 235 | 236 | n, err := s.env.WriteSeekTable(seekTableBytes) 237 | if err != nil { 238 | return err 239 | } 240 | if n != len(seekTableBytes) { 241 | return fmt.Errorf("partial write: %d out of %d", n, len(seekTableBytes)) 242 | } 243 | return nil 244 | } 245 | -------------------------------------------------------------------------------- /pkg/writer_options.go: -------------------------------------------------------------------------------- 1 | package seekable 2 | 3 | import ( 4 | "fmt" 5 | 6 | "go.uber.org/zap" 7 | 8 | "github.com/SaveTheRbtz/zstd-seekable-format-go/pkg/env" 9 | ) 10 | 11 | type wOption func(*writerImpl) error 12 | 13 | func WithWLogger(l *zap.Logger) wOption { 14 | return func(w *writerImpl) error { w.logger = l; return nil } 15 | } 16 | 17 | func WithWEnvironment(e env.WEnvironment) wOption { 18 | return func(w *writerImpl) error { w.env = e; return nil } 19 | } 20 | 21 | type writeManyOptions struct { 22 | concurrency int 23 | writeCallback func(uint32) 24 | } 25 | 26 | type WriteManyOption func(options *writeManyOptions) error 27 | 28 | func WithConcurrency(concurrency int) WriteManyOption { 29 | return func(options *writeManyOptions) error { 30 | if concurrency < 1 { 31 | return fmt.Errorf("concurrency must be positive: %d", concurrency) 32 | } 33 | options.concurrency = concurrency 34 | return nil 35 | } 36 | } 37 | 38 | func WithWriteCallback(cb func(size uint32)) WriteManyOption { 39 | return func(options *writeManyOptions) error { 40 | options.writeCallback = cb 41 | return nil 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /pkg/writer_test.go: -------------------------------------------------------------------------------- 1 | package seekable 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "crypto/rand" 7 | "encoding/binary" 8 | "errors" 9 | "fmt" 10 | "io" 11 | "testing" 12 | 13 | "github.com/klauspost/compress/zstd" 14 | "github.com/stretchr/testify/assert" 15 | "github.com/stretchr/testify/require" 16 | ) 17 | 18 | func TestWriter(t *testing.T) { 19 | t.Parallel() 20 | 21 | enc, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedFastest)) 22 | require.NoError(t, err) 23 | 24 | var b bytes.Buffer 25 | bw := io.Writer(&b) 26 | w, err := NewWriter(bw, enc) 27 | require.NoError(t, err) 28 | 29 | bytes1 := []byte("test") 30 | bytesWritten1, err := w.Write(bytes1) 31 | require.NoError(t, err) 32 | bytes2 := []byte("test2") 33 | bytesWritten2, err := w.Write(bytes2) 34 | require.NoError(t, err) 35 | 36 | // test internals 37 | sw := w.(*writerImpl) 38 | assert.Len(t, sw.frameEntries, 2) 39 | assert.Len(t, bytes1, int(sw.frameEntries[0].DecompressedSize)) 40 | assert.Len(t, bytes1, bytesWritten1) 41 | assert.Equal(t, uint32(len(bytes2)), sw.frameEntries[1].DecompressedSize) 42 | assert.Equal(t, uint32(bytesWritten2), sw.frameEntries[1].DecompressedSize) 43 | 44 | index1CompressedSize := sw.frameEntries[0].CompressedSize 45 | err = w.Close() 46 | require.NoError(t, err) 47 | 48 | // verify buffer content 49 | buf := b.Bytes() 50 | // magic footer 51 | assert.Equal(t, []byte{0xb1, 0xea, 0x92, 0x8f}, buf[len(buf)-4:]) 52 | assert.Equal(t, uint32(2), binary.LittleEndian.Uint32(buf[len(buf)-9:len(buf)-5])) 53 | // index.1 54 | indexOffset := len(buf) - 4 - 1 - 4 - 2*12 55 | assert.Equal(t, index1CompressedSize, binary.LittleEndian.Uint32(buf[indexOffset:indexOffset+4])) 56 | assert.Equal(t, uint32(len(bytes1)), binary.LittleEndian.Uint32(buf[indexOffset+4:indexOffset+8])) 57 | // skipframe header 58 | frameOffset := indexOffset - 4 - 4 59 | assert.Equal(t, []byte{0x5e, 0x2a, 0x4d, 0x18}, buf[frameOffset:frameOffset+4]) 60 | assert.Equal(t, uint32(0x21), binary.LittleEndian.Uint32(buf[frameOffset+4:frameOffset+8])) 61 | 62 | // test decompression 63 | br := io.Reader(&b) 64 | dec, err := zstd.NewReader(br) 65 | require.NoError(t, err) 66 | readBuf := make([]byte, 1024) 67 | n, err := dec.Read(readBuf) 68 | require.ErrorIs(t, err, io.EOF) 69 | 70 | concat := append(bytes1, bytes2...) 71 | assert.Equal(t, len(concat), n) 72 | assert.Equal(t, concat, readBuf[:n]) 73 | } 74 | 75 | func makeTestFrame(t *testing.T, idx int) []byte { 76 | var b bytes.Buffer 77 | for i := 0; i < 100; i++ { 78 | s := fmt.Sprintf("test%d", idx+i) 79 | _, err := b.WriteString(s) 80 | require.NoError(t, err) 81 | } 82 | return b.Bytes() 83 | } 84 | 85 | func makeTestFrameSource(frames [][]byte) FrameSource { 86 | idx := 0 87 | return func() ([]byte, error) { 88 | if idx >= len(frames) { 89 | return nil, nil 90 | } 91 | ret := frames[idx] 92 | idx++ 93 | return ret, nil 94 | } 95 | } 96 | 97 | func TestConcurrentWriter(t *testing.T) { 98 | t.Parallel() 99 | 100 | ctx := context.Background() 101 | 102 | enc, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedFastest)) 103 | require.NoError(t, err) 104 | 105 | // Setup test data 106 | const frameCount = 20 107 | var frames [][]byte 108 | var concat []byte 109 | for i := 0; i < frameCount; i++ { 110 | frame := makeTestFrame(t, i) 111 | frames = append(frames, frame) 112 | concat = append(concat, frame...) 113 | } 114 | 115 | // Write concurrently 116 | var b bytes.Buffer 117 | bw := io.Writer(&b) 118 | concurrentWriter, err := NewWriter(bw, enc) 119 | require.NoError(t, err) 120 | 121 | var totalWritten int 122 | err = concurrentWriter.WriteMany(ctx, makeTestFrameSource(frames), WithConcurrency(5), 123 | WithWriteCallback(func(size uint32) { 124 | totalWritten += int(size) 125 | })) 126 | require.NoError(t, err) 127 | require.Equal(t, len(concat), totalWritten) 128 | 129 | // Write one at a time 130 | var nb bytes.Buffer 131 | nbw := io.Writer(&nb) 132 | oneWriter, err := NewWriter(nbw, enc) 133 | require.NoError(t, err) 134 | 135 | for i := 0; i < frameCount; i++ { 136 | require.NoError(t, err) 137 | _, err = oneWriter.Write(frames[i]) 138 | require.NoError(t, err) 139 | } 140 | 141 | // Output should be the same 142 | assert.Equal(t, b.Bytes(), nb.Bytes()) 143 | 144 | concurrentImpl := concurrentWriter.(*writerImpl) 145 | oneImpl := oneWriter.(*writerImpl) 146 | assert.Equal(t, concurrentImpl.frameEntries, oneImpl.frameEntries) 147 | 148 | // test decompression 149 | dec, err := zstd.NewReader(nil) 150 | require.NoError(t, err) 151 | decoded, err := dec.DecodeAll(b.Bytes(), nil) 152 | require.NoError(t, err) 153 | assert.Equal(t, concat, decoded) 154 | } 155 | 156 | type failingWriteEnvironment struct { 157 | n int 158 | err error 159 | } 160 | 161 | func (e failingWriteEnvironment) WriteFrame(p []byte) (n int, err error) { 162 | return e.n, e.err 163 | } 164 | 165 | func (e failingWriteEnvironment) WriteSeekTable(p []byte) (n int, err error) { 166 | return e.n, e.err 167 | } 168 | 169 | func TestConcurrentWriterErrors(t *testing.T) { 170 | t.Parallel() 171 | 172 | manyFrames := [][]byte{} 173 | for i := 0; i < 100; i++ { 174 | manyFrames = append(manyFrames, []byte(fmt.Sprintf("test%d", i))) 175 | } 176 | 177 | ctx := context.Background() 178 | enc, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedFastest)) 179 | require.NoError(t, err) 180 | w, err := NewWriter(nil, enc) 181 | require.NoError(t, err) 182 | 183 | frameSource := makeTestFrameSource([][]byte{}) 184 | err = w.WriteMany(ctx, frameSource, WithConcurrency(0)) 185 | assert.ErrorContains(t, err, "concurrency must be positive") 186 | 187 | frameSource = func() ([]byte, error) { 188 | return nil, errors.New("test error") 189 | } 190 | err = w.WriteMany(ctx, frameSource) 191 | assert.ErrorContains(t, err, "frame source failed: test error") 192 | 193 | var b bytes.Buffer 194 | w, err = NewWriter(&b, enc, 195 | WithWEnvironment(failingWriteEnvironment{0, errors.New("test error")})) 196 | require.NoError(t, err) 197 | frameSource = makeTestFrameSource(manyFrames) // enough that we have to wait on ctx 198 | err = w.WriteMany(ctx, frameSource, WithConcurrency(1)) 199 | assert.ErrorContains(t, err, "failed to write compressed data") 200 | 201 | w, err = NewWriter(&b, enc, 202 | WithWEnvironment(failingWriteEnvironment{1, nil})) 203 | require.NoError(t, err) 204 | err = w.WriteMany(ctx, frameSource, WithConcurrency(1)) 205 | assert.ErrorContains(t, err, "partial write") 206 | } 207 | 208 | type fakeWriteEnvironment struct { 209 | bw io.Writer 210 | } 211 | 212 | func (s *fakeWriteEnvironment) WriteFrame(p []byte) (n int, err error) { 213 | return s.bw.Write(p) 214 | } 215 | 216 | func (s *fakeWriteEnvironment) WriteSeekTable(p []byte) (n int, err error) { 217 | return s.bw.Write(p) 218 | } 219 | 220 | func TestWriteEnvironment(t *testing.T) { 221 | t.Parallel() 222 | 223 | var b bytes.Buffer 224 | 225 | enc, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedFastest)) 226 | require.NoError(t, err) 227 | 228 | w, err := NewWriter(nil, enc, WithWEnvironment(&fakeWriteEnvironment{ 229 | bw: io.Writer(&b), 230 | })) 231 | require.NoError(t, err) 232 | 233 | bytes1 := []byte("test") 234 | _, err = w.Write(bytes1) 235 | require.NoError(t, err) 236 | bytes2 := []byte("test2") 237 | _, err = w.Write(bytes2) 238 | require.NoError(t, err) 239 | 240 | err = w.Close() 241 | require.NoError(t, err) 242 | 243 | // test decompression 244 | br := io.Reader(&b) 245 | dec, err := zstd.NewReader(br) 246 | require.NoError(t, err) 247 | readBuf := make([]byte, 1024) 248 | n, err := dec.Read(readBuf) 249 | require.ErrorIs(t, err, io.EOF) 250 | concat := append(bytes1, bytes2...) 251 | assert.Equal(t, len(concat), n) 252 | assert.Equal(t, concat, readBuf[:n]) 253 | } 254 | 255 | func TestCloseErrors(t *testing.T) { 256 | t.Parallel() 257 | 258 | enc, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedFastest)) 259 | require.NoError(t, err) 260 | 261 | // environment returns error on WriteSeekTable 262 | w, err := NewWriter(nil, enc, 263 | WithWEnvironment(failingWriteEnvironment{0, errors.New("test error")})) 264 | require.NoError(t, err) 265 | err = w.Close() 266 | assert.ErrorContains(t, err, "test error") 267 | 268 | // environment reports partial write 269 | w, err = NewWriter(nil, enc, WithWEnvironment(failingWriteEnvironment{1, nil})) 270 | require.NoError(t, err) 271 | err = w.Close() 272 | assert.ErrorContains(t, err, "partial write") 273 | } 274 | 275 | func makeRepeatingFrameSource(frame []byte, count int) FrameSource { 276 | idx := 0 277 | return func() ([]byte, error) { 278 | if idx >= count { 279 | return nil, nil 280 | } 281 | idx++ 282 | return frame, nil 283 | } 284 | } 285 | 286 | type nullWriter struct{} 287 | 288 | func (nullWriter) Write(p []byte) (n int, err error) { 289 | return len(p), nil 290 | } 291 | 292 | func BenchmarkWrite(b *testing.B) { 293 | ctx := context.Background() 294 | 295 | enc, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedFastest)) 296 | require.NoError(b, err) 297 | 298 | sizes := []int64{128, 4 * 1024, 16 * 1024, 64 * 1024, 1 * 1024 * 1024} 299 | for _, sz := range sizes { 300 | writeBuf := make([]byte, sz) 301 | _, err := rand.Read(writeBuf) 302 | require.NoError(b, err) 303 | 304 | w, err := NewWriter(nullWriter{}, enc) 305 | require.NoError(b, err) 306 | 307 | b.Run(fmt.Sprintf("%d", sz), func(b *testing.B) { 308 | b.SetBytes(sz) 309 | b.ResetTimer() 310 | 311 | for i := 0; i < b.N; i++ { 312 | _, _ = w.Write(writeBuf) 313 | } 314 | }) 315 | b.Run(fmt.Sprintf("Parallel-%d", sz), func(b *testing.B) { 316 | b.SetBytes(sz) 317 | b.ResetTimer() 318 | 319 | err = w.WriteMany(ctx, makeRepeatingFrameSource(writeBuf, b.N)) 320 | require.NoError(b, err) 321 | }) 322 | 323 | err = w.Close() 324 | require.NoError(b, err) 325 | } 326 | } 327 | --------------------------------------------------------------------------------