├── version.json ├── .github ├── workflows │ ├── stale.yml │ ├── releaser.yml │ ├── tagpush.yml │ ├── automerge.yml │ ├── release-check.yml │ ├── go-check.yml │ └── go-test.yml ├── ISSUE_TEMPLATE │ ├── config.yml │ └── open_an_issue.md └── config.yml ├── buzhash_norace_test.go ├── gen └── main.go ├── LICENSE ├── benchmark_test.go ├── go.mod ├── rabin.go ├── buzhash_test.go ├── parse_test.go ├── README.md ├── rabin_test.go ├── splitting_test.go ├── splitting.go ├── parse.go ├── go.sum └── buzhash.go /version.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "v0.0.6" 3 | } 4 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Close and mark stale issue 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * *' 6 | 7 | jobs: 8 | stale: 9 | uses: pl-strflt/.github/.github/workflows/reusable-stale-issue.yml@v0.3 10 | -------------------------------------------------------------------------------- /buzhash_norace_test.go: -------------------------------------------------------------------------------- 1 | //go:build !race 2 | 3 | package chunk 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | func TestFuzzBuzhashChunking(t *testing.T) { 10 | buf := make([]byte, 1024*1024*16) 11 | for i := 0; i < 100; i++ { 12 | testBuzhashChunking(t, buf) 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /.github/workflows/releaser.yml: -------------------------------------------------------------------------------- 1 | # File managed by web3-bot. DO NOT EDIT. 2 | # See https://github.com/protocol/.github/ for details. 3 | 4 | name: Releaser 5 | on: 6 | push: 7 | paths: [ 'version.json' ] 8 | 9 | jobs: 10 | releaser: 11 | uses: protocol/.github/.github/workflows/releaser.yml@master 12 | -------------------------------------------------------------------------------- /.github/workflows/tagpush.yml: -------------------------------------------------------------------------------- 1 | # File managed by web3-bot. DO NOT EDIT. 2 | # See https://github.com/protocol/.github/ for details. 3 | 4 | name: Tag Push Checker 5 | on: 6 | push: 7 | tags: 8 | - v* 9 | 10 | jobs: 11 | releaser: 12 | uses: protocol/.github/.github/workflows/tagpush.yml@master 13 | -------------------------------------------------------------------------------- /.github/workflows/automerge.yml: -------------------------------------------------------------------------------- 1 | # File managed by web3-bot. DO NOT EDIT. 2 | # See https://github.com/protocol/.github/ for details. 3 | 4 | name: Automerge 5 | on: [ pull_request ] 6 | 7 | jobs: 8 | automerge: 9 | uses: protocol/.github/.github/workflows/automerge.yml@master 10 | with: 11 | job: 'automerge' 12 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Getting Help on IPFS 4 | url: https://ipfs.io/help 5 | about: All information about how and where to get help on IPFS. 6 | - name: IPFS Official Forum 7 | url: https://discuss.ipfs.io 8 | about: Please post general questions, support requests, and discussions here. 9 | -------------------------------------------------------------------------------- /.github/workflows/release-check.yml: -------------------------------------------------------------------------------- 1 | # File managed by web3-bot. DO NOT EDIT. 2 | # See https://github.com/protocol/.github/ for details. 3 | 4 | name: Release Checker 5 | on: 6 | pull_request_target: 7 | paths: [ 'version.json' ] 8 | 9 | jobs: 10 | release-check: 11 | uses: protocol/.github/.github/workflows/release-check.yml@master 12 | with: 13 | go-version: 1.20.x 14 | -------------------------------------------------------------------------------- /gen/main.go: -------------------------------------------------------------------------------- 1 | // This file generates bytehash LUT 2 | package main 3 | 4 | import ( 5 | "fmt" 6 | "math/rand" 7 | ) 8 | 9 | const nRounds = 200 10 | 11 | func main() { 12 | rnd := rand.New(rand.NewSource(0)) 13 | 14 | lut := make([]uint32, 256) 15 | for i := 0; i < 256/2; i++ { 16 | lut[i] = 1<<32 - 1 17 | } 18 | 19 | for r := 0; r < nRounds; r++ { 20 | for b := uint32(0); b < 32; b++ { 21 | mask := uint32(1) << b 22 | nmask := ^mask 23 | for i, j := range rnd.Perm(256) { 24 | li := lut[i] 25 | lj := lut[j] 26 | lut[i] = li&nmask | (lj & mask) 27 | lut[j] = lj&nmask | (li & mask) 28 | } 29 | } 30 | } 31 | 32 | fmt.Printf("%#v", lut) 33 | } 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/open_an_issue.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Open an issue 3 | about: Only for actionable issues relevant to this repository. 4 | title: '' 5 | labels: need/triage 6 | assignees: '' 7 | 8 | --- 9 | 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 IPFS 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /benchmark_test.go: -------------------------------------------------------------------------------- 1 | package chunk 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "math/rand" 7 | "testing" 8 | ) 9 | 10 | type newSplitter func(io.Reader) Splitter 11 | 12 | type bencSpec struct { 13 | size int 14 | name string 15 | } 16 | 17 | var bSizes = []bencSpec{ 18 | {1 << 10, "1K"}, 19 | {1 << 20, "1M"}, 20 | {16 << 20, "16M"}, 21 | {100 << 20, "100M"}, 22 | } 23 | 24 | func benchmarkChunker(b *testing.B, ns newSplitter) { 25 | for _, s := range bSizes { 26 | s := s 27 | b.Run(s.name, func(b *testing.B) { 28 | benchmarkChunkerSize(b, ns, s.size) 29 | }) 30 | } 31 | } 32 | 33 | func benchmarkChunkerSize(b *testing.B, ns newSplitter, size int) { 34 | rng := rand.New(rand.NewSource(1)) 35 | data := make([]byte, size) 36 | rng.Read(data) 37 | 38 | b.SetBytes(int64(size)) 39 | b.ReportAllocs() 40 | b.ResetTimer() 41 | 42 | var res uint64 43 | 44 | for i := 0; i < b.N; i++ { 45 | r := ns(bytes.NewReader(data)) 46 | 47 | for { 48 | chunk, err := r.NextBytes() 49 | if err != nil { 50 | if err == io.EOF { 51 | break 52 | } 53 | b.Fatal(err) 54 | } 55 | res = res + uint64(len(chunk)) 56 | } 57 | } 58 | Res = Res + res 59 | } 60 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/ipfs/go-ipfs-chunker 2 | 3 | require ( 4 | github.com/ipfs/go-block-format v0.0.2 5 | github.com/ipfs/go-ipfs-util v0.0.1 6 | github.com/ipfs/go-log v0.0.1 7 | github.com/libp2p/go-buffer-pool v0.0.2 8 | github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f 9 | ) 10 | 11 | require ( 12 | github.com/gogo/protobuf v1.2.1 // indirect 13 | github.com/gxed/hashland/keccakpg v0.0.1 // indirect 14 | github.com/gxed/hashland/murmur3 v0.0.1 // indirect 15 | github.com/ipfs/go-cid v0.0.1 // indirect 16 | github.com/mattn/go-colorable v0.1.1 // indirect 17 | github.com/mattn/go-isatty v0.0.5 // indirect 18 | github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1 // indirect 19 | github.com/minio/sha256-simd v0.0.0-20190131020904-2d45a736cd16 // indirect 20 | github.com/mr-tron/base58 v1.1.0 // indirect 21 | github.com/multiformats/go-base32 v0.0.3 // indirect 22 | github.com/multiformats/go-multibase v0.0.1 // indirect 23 | github.com/multiformats/go-multihash v0.0.1 // indirect 24 | github.com/opentracing/opentracing-go v1.0.2 // indirect 25 | github.com/whyrusleeping/go-logging v0.0.0-20170515211332-0457bb6b88fc // indirect 26 | golang.org/x/crypto v0.0.0-20190211182817-74369b46fc67 // indirect 27 | golang.org/x/net v0.0.0-20190227160552-c95aed5357e7 // indirect 28 | golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223 // indirect 29 | ) 30 | 31 | go 1.19 32 | -------------------------------------------------------------------------------- /rabin.go: -------------------------------------------------------------------------------- 1 | package chunk 2 | 3 | import ( 4 | "hash/fnv" 5 | "io" 6 | 7 | "github.com/whyrusleeping/chunker" 8 | ) 9 | 10 | // IpfsRabinPoly is the irreducible polynomial of degree 53 used by for Rabin. 11 | // 12 | // Deprecated: use github.com/ipfs/boxo/chunker.IpfsRabinPoly 13 | var IpfsRabinPoly = chunker.Pol(17437180132763653) 14 | 15 | // Rabin implements the Splitter interface and splits content with Rabin 16 | // fingerprints. 17 | // 18 | // Deprecated: use github.com/ipfs/boxo/chunker.Rabin 19 | type Rabin struct { 20 | r *chunker.Chunker 21 | reader io.Reader 22 | } 23 | 24 | // NewRabin creates a new Rabin splitter with the given 25 | // average block size. 26 | // 27 | // Deprecated: use github.com/ipfs/boxo/chunker.NewRabin 28 | func NewRabin(r io.Reader, avgBlkSize uint64) *Rabin { 29 | min := avgBlkSize / 3 30 | max := avgBlkSize + (avgBlkSize / 2) 31 | 32 | return NewRabinMinMax(r, min, avgBlkSize, max) 33 | } 34 | 35 | // NewRabinMinMax returns a new Rabin splitter which uses 36 | // the given min, average and max block sizes. 37 | // 38 | // Deprecated: use github.com/ipfs/boxo/chunker.NewRabinMinMax 39 | func NewRabinMinMax(r io.Reader, min, avg, max uint64) *Rabin { 40 | h := fnv.New32a() 41 | ch := chunker.New(r, IpfsRabinPoly, h, avg, min, max) 42 | 43 | return &Rabin{ 44 | r: ch, 45 | reader: r, 46 | } 47 | } 48 | 49 | // NextBytes reads the next bytes from the reader and returns a slice. 50 | func (r *Rabin) NextBytes() ([]byte, error) { 51 | ch, err := r.r.Next() 52 | if err != nil { 53 | return nil, err 54 | } 55 | 56 | return ch.Data, nil 57 | } 58 | 59 | // Reader returns the io.Reader associated to this Splitter. 60 | func (r *Rabin) Reader() io.Reader { 61 | return r.reader 62 | } 63 | -------------------------------------------------------------------------------- /buzhash_test.go: -------------------------------------------------------------------------------- 1 | package chunk 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "testing" 7 | 8 | util "github.com/ipfs/go-ipfs-util" 9 | ) 10 | 11 | func testBuzhashChunking(t *testing.T, buf []byte) (chunkCount int) { 12 | n, err := util.NewTimeSeededRand().Read(buf) 13 | if n < len(buf) { 14 | t.Fatalf("expected %d bytes, got %d", len(buf), n) 15 | } 16 | if err != nil { 17 | t.Fatal(err) 18 | } 19 | 20 | r := NewBuzhash(bytes.NewReader(buf)) 21 | 22 | var chunks [][]byte 23 | 24 | for { 25 | chunk, err := r.NextBytes() 26 | if err != nil { 27 | if err == io.EOF { 28 | break 29 | } 30 | t.Fatal(err) 31 | } 32 | 33 | chunks = append(chunks, chunk) 34 | } 35 | chunkCount += len(chunks) 36 | 37 | for i, chunk := range chunks { 38 | if len(chunk) == 0 { 39 | t.Fatalf("chunk %d/%d is empty", i+1, len(chunks)) 40 | } 41 | } 42 | 43 | for i, chunk := range chunks[:len(chunks)-1] { 44 | if len(chunk) < buzMin { 45 | t.Fatalf("chunk %d/%d is less than the minimum size", i+1, len(chunks)) 46 | } 47 | } 48 | 49 | unchunked := bytes.Join(chunks, nil) 50 | if !bytes.Equal(unchunked, buf) { 51 | t.Fatal("data was chunked incorrectly") 52 | } 53 | 54 | return chunkCount 55 | } 56 | 57 | func TestBuzhashChunking(t *testing.T) { 58 | buf := make([]byte, 1024*1024*16) 59 | count := testBuzhashChunking(t, buf) 60 | t.Logf("average block size: %d\n", len(buf)/count) 61 | } 62 | 63 | func TestBuzhashChunkReuse(t *testing.T) { 64 | newBuzhash := func(r io.Reader) Splitter { 65 | return NewBuzhash(r) 66 | } 67 | testReuse(t, newBuzhash) 68 | } 69 | 70 | func BenchmarkBuzhash2(b *testing.B) { 71 | benchmarkChunker(b, func(r io.Reader) Splitter { 72 | return NewBuzhash(r) 73 | }) 74 | } 75 | 76 | func TestBuzhashBitsHashBias(t *testing.T) { 77 | counts := make([]byte, 32) 78 | for _, h := range bytehash { 79 | for i := 0; i < 32; i++ { 80 | if h&1 == 1 { 81 | counts[i]++ 82 | } 83 | h = h >> 1 84 | } 85 | } 86 | for i, c := range counts { 87 | if c != 128 { 88 | t.Errorf("Bit balance in position %d broken, %d ones", i, c) 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /parse_test.go: -------------------------------------------------------------------------------- 1 | package chunk 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "testing" 7 | ) 8 | 9 | const ( 10 | testTwoThirdsOfChunkLimit = 2 * (float32(ChunkSizeLimit) / float32(3)) 11 | ) 12 | 13 | func TestParseRabin(t *testing.T) { 14 | r := bytes.NewReader(randBuf(t, 1000)) 15 | 16 | _, err := FromString(r, "rabin-18-25-32") 17 | if err != nil { 18 | t.Errorf(err.Error()) 19 | } 20 | 21 | _, err = FromString(r, "rabin-15-23-31") 22 | if err != ErrRabinMin { 23 | t.Fatalf("Expected an 'ErrRabinMin' error, got: %#v", err) 24 | } 25 | 26 | _, err = FromString(r, "rabin-20-20-21") 27 | if err == nil || err.Error() != "incorrect format: rabin-min must be smaller than rabin-avg" { 28 | t.Fatalf("Expected an arg-out-of-order error, got: %#v", err) 29 | } 30 | 31 | _, err = FromString(r, "rabin-19-21-21") 32 | if err == nil || err.Error() != "incorrect format: rabin-avg must be smaller than rabin-max" { 33 | t.Fatalf("Expected an arg-out-of-order error, got: %#v", err) 34 | } 35 | 36 | _, err = FromString(r, fmt.Sprintf("rabin-19-21-%d", ChunkSizeLimit)) 37 | if err != nil { 38 | t.Fatalf("Expected success, got: %#v", err) 39 | } 40 | 41 | _, err = FromString(r, fmt.Sprintf("rabin-19-21-%d", 1+ChunkSizeLimit)) 42 | if err != ErrSizeMax { 43 | t.Fatalf("Expected 'ErrSizeMax', got: %#v", err) 44 | } 45 | 46 | _, err = FromString(r, fmt.Sprintf("rabin-%.0f", testTwoThirdsOfChunkLimit)) 47 | if err != nil { 48 | t.Fatalf("Expected success, got: %#v", err) 49 | } 50 | 51 | _, err = FromString(r, fmt.Sprintf("rabin-%.0f", 1+testTwoThirdsOfChunkLimit)) 52 | if err != ErrSizeMax { 53 | t.Fatalf("Expected 'ErrSizeMax', got: %#v", err) 54 | } 55 | 56 | } 57 | 58 | func TestParseSize(t *testing.T) { 59 | r := bytes.NewReader(randBuf(t, 1000)) 60 | 61 | _, err := FromString(r, "size-0") 62 | if err != ErrSize { 63 | t.Fatalf("Expected an 'ErrSize' error, got: %#v", err) 64 | } 65 | 66 | _, err = FromString(r, "size-32") 67 | if err != nil { 68 | t.Fatalf("Expected success, got: %#v", err) 69 | } 70 | 71 | _, err = FromString(r, fmt.Sprintf("size-%d", ChunkSizeLimit)) 72 | if err != nil { 73 | t.Fatalf("Expected success, got: %#v", err) 74 | } 75 | 76 | _, err = FromString(r, fmt.Sprintf("size-%d", 1+ChunkSizeLimit)) 77 | if err != ErrSizeMax { 78 | t.Fatalf("Expected 'ErrSizeMax', got: %#v", err) 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # go-ipfs-chunker 2 | 3 | > go-ipfs-chunker implements data Splitters for go-ipfs. 4 | 5 | [![](https://img.shields.io/badge/made%20by-Protocol%20Labs-blue.svg?style=flat-square)](http://ipn.io) 6 | [![](https://img.shields.io/badge/project-IPFS-blue.svg?style=flat-square)](http://ipfs.io/) 7 | [![standard-readme compliant](https://img.shields.io/badge/standard--readme-OK-green.svg?style=flat-square)](https://github.com/RichardLitt/standard-readme) 8 | [![GoDoc](https://godoc.org/github.com/ipfs/go-ipfs-chunker?status.svg)](https://godoc.org/github.com/ipfs/go-ipfs-chunker) 9 | [![Build Status](https://travis-ci.org/ipfs/go-ipfs-chunker.svg?branch=master)](https://travis-ci.org/ipfs/go-ipfs-chunker) 10 | 11 | ## ❗ This repo is no longer maintained. 12 | 👉 We highly recommend switching to the maintained version at https://github.com/ipfs/boxo/tree/main/chunker. 13 | 🏎️ Good news! There is [tooling and documentation](https://github.com/ipfs/boxo#migrating-to-boxo) to expedite a switch in your repo. 14 | 15 | ⚠️ If you continue using this repo, please note that security fixes will not be provided (unless someone steps in to maintain it). 16 | 17 | 📚 Learn more, including how to take the maintainership mantle or ask questions, [here](https://github.com/ipfs/boxo/wiki/Copied-or-Migrated-Repos-FAQ). 18 | 19 | 20 | ## Summary 21 | 22 | `go-ipfs-chunker` provides the `Splitter` interface. IPFS splitters read data from a reader an create "chunks". These chunks are used to build the ipfs DAGs (Merkle Tree) and are the base unit to obtain the sums that ipfs uses to address content. 23 | 24 | The package provides a `SizeSplitter` which creates chunks of equal size and it is used by default in most cases, and a `rabin` fingerprint chunker. This chunker will attempt to split data in a way that the resulting blocks are the same when the data has repetitive patterns, thus optimizing the resulting DAGs. 25 | 26 | ## Table of Contents 27 | 28 | - [Install](#install) 29 | - [Usage](#usage) 30 | - [License](#license) 31 | 32 | ## Install 33 | 34 | `go-ipfs-chunker` works like a regular Go module: 35 | 36 | ``` 37 | > go get github.com/ipfs/go-ipfs-chunker 38 | ``` 39 | 40 | ## Usage 41 | 42 | ``` 43 | import "github.com/ipfs/go-ipfs-chunker" 44 | ``` 45 | 46 | Check the [GoDoc documentation](https://godoc.org/github.com/ipfs/go-ipfs-chunker) 47 | 48 | ## License 49 | 50 | MIT © Protocol Labs, Inc. 51 | -------------------------------------------------------------------------------- /rabin_test.go: -------------------------------------------------------------------------------- 1 | package chunk 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "testing" 8 | 9 | blocks "github.com/ipfs/go-block-format" 10 | util "github.com/ipfs/go-ipfs-util" 11 | ) 12 | 13 | func TestRabinChunking(t *testing.T) { 14 | data := make([]byte, 1024*1024*16) 15 | n, err := util.NewTimeSeededRand().Read(data) 16 | if n < len(data) { 17 | t.Fatalf("expected %d bytes, got %d", len(data), n) 18 | } 19 | if err != nil { 20 | t.Fatal(err) 21 | } 22 | 23 | r := NewRabin(bytes.NewReader(data), 1024*256) 24 | 25 | var chunks [][]byte 26 | 27 | for { 28 | chunk, err := r.NextBytes() 29 | if err != nil { 30 | if err == io.EOF { 31 | break 32 | } 33 | t.Fatal(err) 34 | } 35 | 36 | chunks = append(chunks, chunk) 37 | } 38 | 39 | fmt.Printf("average block size: %d\n", len(data)/len(chunks)) 40 | 41 | unchunked := bytes.Join(chunks, nil) 42 | if !bytes.Equal(unchunked, data) { 43 | fmt.Printf("%d %d\n", len(unchunked), len(data)) 44 | t.Fatal("data was chunked incorrectly") 45 | } 46 | } 47 | 48 | func chunkData(t *testing.T, newC newSplitter, data []byte) map[string]blocks.Block { 49 | r := newC(bytes.NewReader(data)) 50 | 51 | blkmap := make(map[string]blocks.Block) 52 | 53 | for { 54 | blk, err := r.NextBytes() 55 | if err != nil { 56 | if err == io.EOF { 57 | break 58 | } 59 | t.Fatal(err) 60 | } 61 | 62 | b := blocks.NewBlock(blk) 63 | blkmap[b.Cid().KeyString()] = b 64 | } 65 | 66 | return blkmap 67 | } 68 | 69 | func testReuse(t *testing.T, cr newSplitter) { 70 | data := make([]byte, 1024*1024*16) 71 | n, err := util.NewTimeSeededRand().Read(data) 72 | if n < len(data) { 73 | t.Fatalf("expected %d bytes, got %d", len(data), n) 74 | } 75 | if err != nil { 76 | t.Fatal(err) 77 | } 78 | 79 | ch1 := chunkData(t, cr, data[1000:]) 80 | ch2 := chunkData(t, cr, data) 81 | 82 | var extra int 83 | for k := range ch2 { 84 | _, ok := ch1[k] 85 | if !ok { 86 | extra++ 87 | } 88 | } 89 | 90 | if extra > 2 { 91 | t.Logf("too many spare chunks made: %d", extra) 92 | } 93 | } 94 | 95 | func TestRabinChunkReuse(t *testing.T) { 96 | newRabin := func(r io.Reader) Splitter { 97 | return NewRabin(r, 256*1024) 98 | } 99 | testReuse(t, newRabin) 100 | } 101 | 102 | var Res uint64 103 | 104 | func BenchmarkRabin(b *testing.B) { 105 | benchmarkChunker(b, func(r io.Reader) Splitter { 106 | return NewRabin(r, 256<<10) 107 | }) 108 | } 109 | -------------------------------------------------------------------------------- /.github/workflows/go-check.yml: -------------------------------------------------------------------------------- 1 | # File managed by web3-bot. DO NOT EDIT. 2 | # See https://github.com/protocol/.github/ for details. 3 | 4 | on: [push, pull_request] 5 | name: Go Checks 6 | 7 | jobs: 8 | unit: 9 | runs-on: ubuntu-latest 10 | name: All 11 | steps: 12 | - uses: actions/checkout@v3 13 | with: 14 | submodules: recursive 15 | - id: config 16 | uses: protocol/.github/.github/actions/read-config@master 17 | - uses: actions/setup-go@v3 18 | with: 19 | go-version: 1.20.x 20 | - name: Run repo-specific setup 21 | uses: ./.github/actions/go-check-setup 22 | if: hashFiles('./.github/actions/go-check-setup') != '' 23 | - name: Install staticcheck 24 | run: go install honnef.co/go/tools/cmd/staticcheck@4970552d932f48b71485287748246cf3237cebdf # 2023.1 (v0.4.0) 25 | - name: Check that go.mod is tidy 26 | uses: protocol/multiple-go-modules@v1.2 27 | with: 28 | run: | 29 | go mod tidy 30 | if [[ -n $(git ls-files --other --exclude-standard --directory -- go.sum) ]]; then 31 | echo "go.sum was added by go mod tidy" 32 | exit 1 33 | fi 34 | git diff --exit-code -- go.sum go.mod 35 | - name: gofmt 36 | if: success() || failure() # run this step even if the previous one failed 37 | run: | 38 | out=$(gofmt -s -l .) 39 | if [[ -n "$out" ]]; then 40 | echo $out | awk '{print "::error file=" $0 ",line=0,col=0::File is not gofmt-ed."}' 41 | exit 1 42 | fi 43 | - name: go vet 44 | if: success() || failure() # run this step even if the previous one failed 45 | uses: protocol/multiple-go-modules@v1.2 46 | with: 47 | run: go vet ./... 48 | - name: staticcheck 49 | if: success() || failure() # run this step even if the previous one failed 50 | uses: protocol/multiple-go-modules@v1.2 51 | with: 52 | run: | 53 | set -o pipefail 54 | staticcheck ./... | sed -e 's@\(.*\)\.go@./\1.go@g' 55 | - name: go generate 56 | uses: protocol/multiple-go-modules@v1.2 57 | if: (success() || failure()) && fromJSON(steps.config.outputs.json).gogenerate == true 58 | with: 59 | run: | 60 | git clean -fd # make sure there aren't untracked files / directories 61 | go generate -x ./... 62 | # check if go generate modified or added any files 63 | if ! $(git add . && git diff-index HEAD --exit-code --quiet); then 64 | echo "go generated caused changes to the repository:" 65 | git status --short 66 | exit 1 67 | fi 68 | -------------------------------------------------------------------------------- /.github/config.yml: -------------------------------------------------------------------------------- 1 | # Configuration for welcome - https://github.com/behaviorbot/welcome 2 | 3 | # Configuration for new-issue-welcome - https://github.com/behaviorbot/new-issue-welcome 4 | # Comment to be posted to on first time issues 5 | newIssueWelcomeComment: > 6 | Thank you for submitting your first issue to this repository! A maintainer 7 | will be here shortly to triage and review. 8 | 9 | In the meantime, please double-check that you have provided all the 10 | necessary information to make this process easy! Any information that can 11 | help save additional round trips is useful! We currently aim to give 12 | initial feedback within **two business days**. If this does not happen, feel 13 | free to leave a comment. 14 | 15 | Please keep an eye on how this issue will be labeled, as labels give an 16 | overview of priorities, assignments and additional actions requested by the 17 | maintainers: 18 | 19 | - "Priority" labels will show how urgent this is for the team. 20 | - "Status" labels will show if this is ready to be worked on, blocked, or in progress. 21 | - "Need" labels will indicate if additional input or analysis is required. 22 | 23 | Finally, remember to use https://discuss.ipfs.io if you just need general 24 | support. 25 | 26 | # Configuration for new-pr-welcome - https://github.com/behaviorbot/new-pr-welcome 27 | # Comment to be posted to on PRs from first time contributors in your repository 28 | newPRWelcomeComment: > 29 | Thank you for submitting this PR! 30 | 31 | A maintainer will be here shortly to review it. 32 | 33 | We are super grateful, but we are also overloaded! Help us by making sure 34 | that: 35 | 36 | * The context for this PR is clear, with relevant discussion, decisions 37 | and stakeholders linked/mentioned. 38 | 39 | * Your contribution itself is clear (code comments, self-review for the 40 | rest) and in its best form. Follow the [code contribution 41 | guidelines](https://github.com/ipfs/community/blob/master/CONTRIBUTING.md#code-contribution-guidelines) 42 | if they apply. 43 | 44 | Getting other community members to do a review would be great help too on 45 | complex PRs (you can ask in the chats/forums). If you are unsure about 46 | something, just leave us a comment. 47 | 48 | Next steps: 49 | 50 | * A maintainer will triage and assign priority to this PR, commenting on 51 | any missing things and potentially assigning a reviewer for high 52 | priority items. 53 | 54 | * The PR gets reviews, discussed and approvals as needed. 55 | 56 | * The PR is merged by maintainers when it has been approved and comments addressed. 57 | 58 | We currently aim to provide initial feedback/triaging within **two business 59 | days**. Please keep an eye on any labelling actions, as these will indicate 60 | priorities and status of your contribution. 61 | 62 | We are very grateful for your contribution! 63 | 64 | 65 | # Configuration for first-pr-merge - https://github.com/behaviorbot/first-pr-merge 66 | # Comment to be posted to on pull requests merged by a first time user 67 | # Currently disabled 68 | #firstPRMergeComment: "" 69 | -------------------------------------------------------------------------------- /splitting_test.go: -------------------------------------------------------------------------------- 1 | package chunk 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "testing" 7 | 8 | u "github.com/ipfs/go-ipfs-util" 9 | ) 10 | 11 | func randBuf(t *testing.T, size int) []byte { 12 | buf := make([]byte, size) 13 | if _, err := u.NewTimeSeededRand().Read(buf); err != nil { 14 | t.Fatal("failed to read enough randomness") 15 | } 16 | return buf 17 | } 18 | 19 | func copyBuf(buf []byte) []byte { 20 | cpy := make([]byte, len(buf)) 21 | copy(cpy, buf) 22 | return cpy 23 | } 24 | 25 | func TestSizeSplitterOverAllocate(t *testing.T) { 26 | max := 1000 27 | r := bytes.NewReader(randBuf(t, max)) 28 | chunksize := int64(1024 * 256) 29 | splitter := NewSizeSplitter(r, chunksize) 30 | chunk, err := splitter.NextBytes() 31 | if err != nil { 32 | t.Fatal(err) 33 | } 34 | if cap(chunk) > len(chunk) { 35 | t.Fatal("chunk capacity too large") 36 | } 37 | } 38 | 39 | func TestSizeSplitterIsDeterministic(t *testing.T) { 40 | if testing.Short() { 41 | t.SkipNow() 42 | } 43 | 44 | test := func() { 45 | bufR := randBuf(t, 10000000) // crank this up to satisfy yourself. 46 | bufA := copyBuf(bufR) 47 | bufB := copyBuf(bufR) 48 | 49 | chunksA, _ := Chan(DefaultSplitter(bytes.NewReader(bufA))) 50 | chunksB, _ := Chan(DefaultSplitter(bytes.NewReader(bufB))) 51 | 52 | for n := 0; ; n++ { 53 | a, moreA := <-chunksA 54 | b, moreB := <-chunksB 55 | 56 | if !moreA { 57 | if moreB { 58 | t.Fatal("A ended, B didnt.") 59 | } 60 | return 61 | } 62 | 63 | if !bytes.Equal(a, b) { 64 | t.Fatalf("chunk %d not equal", n) 65 | } 66 | } 67 | } 68 | 69 | for run := 0; run < 1; run++ { // crank this up to satisfy yourself. 70 | test() 71 | } 72 | } 73 | 74 | func TestSizeSplitterFillsChunks(t *testing.T) { 75 | if testing.Short() { 76 | t.SkipNow() 77 | } 78 | 79 | max := 10000000 80 | b := randBuf(t, max) 81 | r := &clipReader{r: bytes.NewReader(b), size: 4000} 82 | chunksize := int64(1024 * 256) 83 | c, _ := Chan(NewSizeSplitter(r, chunksize)) 84 | 85 | sofar := 0 86 | whole := make([]byte, max) 87 | for chunk := range c { 88 | 89 | bc := b[sofar : sofar+len(chunk)] 90 | if !bytes.Equal(bc, chunk) { 91 | t.Fatalf("chunk not correct: (sofar: %d) %d != %d, %v != %v", sofar, len(bc), len(chunk), bc[:100], chunk[:100]) 92 | } 93 | 94 | copy(whole[sofar:], chunk) 95 | 96 | sofar += len(chunk) 97 | if sofar != max && len(chunk) < int(chunksize) { 98 | t.Fatal("sizesplitter split at a smaller size") 99 | } 100 | } 101 | 102 | if !bytes.Equal(b, whole) { 103 | t.Fatal("splitter did not split right") 104 | } 105 | } 106 | 107 | type clipReader struct { 108 | size int 109 | r io.Reader 110 | } 111 | 112 | func (s *clipReader) Read(buf []byte) (int, error) { 113 | 114 | // clip the incoming buffer to produce smaller chunks 115 | if len(buf) > s.size { 116 | buf = buf[:s.size] 117 | } 118 | 119 | return s.r.Read(buf) 120 | } 121 | 122 | func BenchmarkDefault(b *testing.B) { 123 | benchmarkChunker(b, func(r io.Reader) Splitter { 124 | return DefaultSplitter(r) 125 | }) 126 | } 127 | -------------------------------------------------------------------------------- /splitting.go: -------------------------------------------------------------------------------- 1 | // Package chunk implements streaming block splitters. 2 | // Splitters read data from a reader and provide byte slices (chunks) 3 | // The size and contents of these slices depend on the splitting method 4 | // used. 5 | package chunk 6 | 7 | import ( 8 | "io" 9 | 10 | logging "github.com/ipfs/go-log" 11 | pool "github.com/libp2p/go-buffer-pool" 12 | ) 13 | 14 | var log = logging.Logger("chunk") 15 | 16 | // A Splitter reads bytes from a Reader and creates "chunks" (byte slices) 17 | // that can be used to build DAG nodes. 18 | // 19 | // Deprecated: use github.com/ipfs/boxo/chunker.Splitter 20 | type Splitter interface { 21 | Reader() io.Reader 22 | NextBytes() ([]byte, error) 23 | } 24 | 25 | // SplitterGen is a splitter generator, given a reader. 26 | // 27 | // Deprecated: use github.com/ipfs/boxo/chunker.SplitterGen 28 | type SplitterGen func(r io.Reader) Splitter 29 | 30 | // DefaultSplitter returns a SizeSplitter with the DefaultBlockSize. 31 | // 32 | // Deprecated: use github.com/ipfs/boxo/chunker.DefaultSplitter 33 | func DefaultSplitter(r io.Reader) Splitter { 34 | return NewSizeSplitter(r, DefaultBlockSize) 35 | } 36 | 37 | // SizeSplitterGen returns a SplitterGen function which will create 38 | // a splitter with the given size when called. 39 | // 40 | // Deprecated: use github.com/ipfs/boxo/chunker.SizeSplitterGen 41 | func SizeSplitterGen(size int64) SplitterGen { 42 | return func(r io.Reader) Splitter { 43 | return NewSizeSplitter(r, size) 44 | } 45 | } 46 | 47 | // Chan returns a channel that receives each of the chunks produced 48 | // by a splitter, along with another one for errors. 49 | // 50 | // Deprecated: use github.com/ipfs/boxo/chunker.Chan 51 | func Chan(s Splitter) (<-chan []byte, <-chan error) { 52 | out := make(chan []byte) 53 | errs := make(chan error, 1) 54 | go func() { 55 | defer close(out) 56 | defer close(errs) 57 | 58 | // all-chunks loop (keep creating chunks) 59 | for { 60 | b, err := s.NextBytes() 61 | if err != nil { 62 | errs <- err 63 | return 64 | } 65 | 66 | out <- b 67 | } 68 | }() 69 | return out, errs 70 | } 71 | 72 | type sizeSplitterv2 struct { 73 | r io.Reader 74 | size uint32 75 | err error 76 | } 77 | 78 | // NewSizeSplitter returns a new size-based Splitter with the given block size. 79 | // 80 | // Deprecated: use github.com/ipfs/boxo/chunker.NewSizeSplitter 81 | func NewSizeSplitter(r io.Reader, size int64) Splitter { 82 | return &sizeSplitterv2{ 83 | r: r, 84 | size: uint32(size), 85 | } 86 | } 87 | 88 | // NextBytes produces a new chunk. 89 | func (ss *sizeSplitterv2) NextBytes() ([]byte, error) { 90 | if ss.err != nil { 91 | return nil, ss.err 92 | } 93 | 94 | full := pool.Get(int(ss.size)) 95 | n, err := io.ReadFull(ss.r, full) 96 | switch err { 97 | case io.ErrUnexpectedEOF: 98 | ss.err = io.EOF 99 | small := make([]byte, n) 100 | copy(small, full) 101 | pool.Put(full) 102 | return small, nil 103 | case nil: 104 | return full, nil 105 | default: 106 | pool.Put(full) 107 | return nil, err 108 | } 109 | } 110 | 111 | // Reader returns the io.Reader associated to this Splitter. 112 | func (ss *sizeSplitterv2) Reader() io.Reader { 113 | return ss.r 114 | } 115 | -------------------------------------------------------------------------------- /.github/workflows/go-test.yml: -------------------------------------------------------------------------------- 1 | # File managed by web3-bot. DO NOT EDIT. 2 | # See https://github.com/protocol/.github/ for details. 3 | 4 | on: [push, pull_request] 5 | name: Go Test 6 | 7 | jobs: 8 | unit: 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | os: [ "ubuntu", "windows", "macos" ] 13 | go: ["1.19.x","1.20.x"] 14 | env: 15 | COVERAGES: "" 16 | runs-on: ${{ fromJSON(vars[format('UCI_GO_TEST_RUNNER_{0}', matrix.os)] || format('"{0}-latest"', matrix.os)) }} 17 | name: ${{ matrix.os }} (go ${{ matrix.go }}) 18 | steps: 19 | - uses: actions/checkout@v3 20 | with: 21 | submodules: recursive 22 | - id: config 23 | uses: protocol/.github/.github/actions/read-config@master 24 | - uses: actions/setup-go@v3 25 | with: 26 | go-version: ${{ matrix.go }} 27 | - name: Go information 28 | run: | 29 | go version 30 | go env 31 | - name: Use msys2 on windows 32 | if: matrix.os == 'windows' 33 | shell: bash 34 | # The executable for msys2 is also called bash.cmd 35 | # https://github.com/actions/virtual-environments/blob/main/images/win/Windows2019-Readme.md#shells 36 | # If we prepend its location to the PATH 37 | # subsequent 'shell: bash' steps will use msys2 instead of gitbash 38 | run: echo "C:/msys64/usr/bin" >> $GITHUB_PATH 39 | - name: Run repo-specific setup 40 | uses: ./.github/actions/go-test-setup 41 | if: hashFiles('./.github/actions/go-test-setup') != '' 42 | - name: Run tests 43 | if: contains(fromJSON(steps.config.outputs.json).skipOSes, matrix.os) == false 44 | uses: protocol/multiple-go-modules@v1.2 45 | with: 46 | # Use -coverpkg=./..., so that we include cross-package coverage. 47 | # If package ./A imports ./B, and ./A's tests also cover ./B, 48 | # this means ./B's coverage will be significantly higher than 0%. 49 | run: go test -v -shuffle=on -coverprofile=module-coverage.txt -coverpkg=./... ./... 50 | - name: Run tests (32 bit) 51 | # can't run 32 bit tests on OSX. 52 | if: matrix.os != 'macos' && 53 | fromJSON(steps.config.outputs.json).skip32bit != true && 54 | contains(fromJSON(steps.config.outputs.json).skipOSes, matrix.os) == false 55 | uses: protocol/multiple-go-modules@v1.2 56 | env: 57 | GOARCH: 386 58 | with: 59 | run: | 60 | export "PATH=$PATH_386:$PATH" 61 | go test -v -shuffle=on ./... 62 | - name: Run tests with race detector 63 | # speed things up. Windows and OSX VMs are slow 64 | if: matrix.os == 'ubuntu' && 65 | contains(fromJSON(steps.config.outputs.json).skipOSes, matrix.os) == false 66 | uses: protocol/multiple-go-modules@v1.2 67 | with: 68 | run: go test -v -race ./... 69 | - name: Collect coverage files 70 | shell: bash 71 | run: echo "COVERAGES=$(find . -type f -name 'module-coverage.txt' | tr -s '\n' ',' | sed 's/,$//')" >> $GITHUB_ENV 72 | - name: Upload coverage to Codecov 73 | uses: codecov/codecov-action@d9f34f8cd5cb3b3eb79b3e4b5dae3a16df499a70 # v3.1.1 74 | with: 75 | files: '${{ env.COVERAGES }}' 76 | env_vars: OS=${{ matrix.os }}, GO=${{ matrix.go }} 77 | -------------------------------------------------------------------------------- /parse.go: -------------------------------------------------------------------------------- 1 | package chunk 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io" 7 | "strconv" 8 | "strings" 9 | ) 10 | 11 | const ( 12 | // DefaultBlockSize is the chunk size that splitters produce (or aim to). 13 | // 14 | // Deprecated: use github.com/ipfs/boxo/chunker.DefaultBlockSize 15 | DefaultBlockSize int64 = 1024 * 256 16 | 17 | // No leaf block should contain more than 1MiB of payload data ( wrapping overhead aside ) 18 | // This effectively mandates the maximum chunk size 19 | // See discussion at https://github.com/ipfs/go-ipfs-chunker/pull/21#discussion_r369124879 for background 20 | // 21 | // Deprecated: use github.com/ipfs/boxo/chunker.ChunkSizeLimit 22 | ChunkSizeLimit int = 1048576 23 | ) 24 | 25 | var ( 26 | // Deprecated: use github.com/ipfs/boxo/chunker.ErrRabinMin 27 | ErrRabinMin = errors.New("rabin min must be greater than 16") 28 | // Deprecated: use github.com/ipfs/boxo/chunker.ErrSize 29 | ErrSize = errors.New("chunker size must be greater than 0") 30 | // Deprecated: use github.com/ipfs/boxo/chunker.ErrSizeMax 31 | ErrSizeMax = fmt.Errorf("chunker parameters may not exceed the maximum chunk size of %d", ChunkSizeLimit) 32 | ) 33 | 34 | // FromString returns a Splitter depending on the given string: 35 | // it supports "default" (""), "size-{size}", "rabin", "rabin-{blocksize}", 36 | // "rabin-{min}-{avg}-{max}" and "buzhash". 37 | // 38 | // Deprecated: use github.com/ipfs/boxo/chunker.FromString 39 | func FromString(r io.Reader, chunker string) (Splitter, error) { 40 | switch { 41 | case chunker == "" || chunker == "default": 42 | return DefaultSplitter(r), nil 43 | 44 | case strings.HasPrefix(chunker, "size-"): 45 | sizeStr := strings.Split(chunker, "-")[1] 46 | size, err := strconv.Atoi(sizeStr) 47 | if err != nil { 48 | return nil, err 49 | } else if size <= 0 { 50 | return nil, ErrSize 51 | } else if size > ChunkSizeLimit { 52 | return nil, ErrSizeMax 53 | } 54 | return NewSizeSplitter(r, int64(size)), nil 55 | 56 | case strings.HasPrefix(chunker, "rabin"): 57 | return parseRabinString(r, chunker) 58 | 59 | case chunker == "buzhash": 60 | return NewBuzhash(r), nil 61 | 62 | default: 63 | return nil, fmt.Errorf("unrecognized chunker option: %s", chunker) 64 | } 65 | } 66 | 67 | func parseRabinString(r io.Reader, chunker string) (Splitter, error) { 68 | parts := strings.Split(chunker, "-") 69 | switch len(parts) { 70 | case 1: 71 | return NewRabin(r, uint64(DefaultBlockSize)), nil 72 | case 2: 73 | size, err := strconv.Atoi(parts[1]) 74 | if err != nil { 75 | return nil, err 76 | } else if int(float32(size)*1.5) > ChunkSizeLimit { // FIXME - this will be addressed in a subsequent PR 77 | return nil, ErrSizeMax 78 | } 79 | return NewRabin(r, uint64(size)), nil 80 | case 4: 81 | sub := strings.Split(parts[1], ":") 82 | if len(sub) > 1 && sub[0] != "min" { 83 | return nil, errors.New("first label must be min") 84 | } 85 | min, err := strconv.Atoi(sub[len(sub)-1]) 86 | if err != nil { 87 | return nil, err 88 | } 89 | if min < 16 { 90 | return nil, ErrRabinMin 91 | } 92 | sub = strings.Split(parts[2], ":") 93 | if len(sub) > 1 && sub[0] != "avg" { 94 | log.Error("sub == ", sub) 95 | return nil, errors.New("second label must be avg") 96 | } 97 | avg, err := strconv.Atoi(sub[len(sub)-1]) 98 | if err != nil { 99 | return nil, err 100 | } 101 | 102 | sub = strings.Split(parts[3], ":") 103 | if len(sub) > 1 && sub[0] != "max" { 104 | return nil, errors.New("final label must be max") 105 | } 106 | max, err := strconv.Atoi(sub[len(sub)-1]) 107 | if err != nil { 108 | return nil, err 109 | } 110 | 111 | if min >= avg { 112 | return nil, errors.New("incorrect format: rabin-min must be smaller than rabin-avg") 113 | } else if avg >= max { 114 | return nil, errors.New("incorrect format: rabin-avg must be smaller than rabin-max") 115 | } else if max > ChunkSizeLimit { 116 | return nil, ErrSizeMax 117 | } 118 | 119 | return NewRabinMinMax(r, uint64(min), uint64(avg), uint64(max)), nil 120 | default: 121 | return nil, errors.New("incorrect format (expected 'rabin' 'rabin-[avg]' or 'rabin-[min]-[avg]-[max]'") 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= 2 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/gogo/protobuf v1.2.1 h1:/s5zKNz0uPFCZ5hddgPdo2TK2TVrUNMn0OOX8/aZMTE= 4 | github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= 5 | github.com/gxed/hashland/keccakpg v0.0.1 h1:wrk3uMNaMxbXiHibbPO4S0ymqJMm41WiudyFSs7UnsU= 6 | github.com/gxed/hashland/keccakpg v0.0.1/go.mod h1:kRzw3HkwxFU1mpmPP8v1WyQzwdGfmKFJ6tItnhQ67kU= 7 | github.com/gxed/hashland/murmur3 v0.0.1 h1:SheiaIt0sda5K+8FLz952/1iWS9zrnKsEJaOJu4ZbSc= 8 | github.com/gxed/hashland/murmur3 v0.0.1/go.mod h1:KjXop02n4/ckmZSnY2+HKcLud/tcmvhST0bie/0lS48= 9 | github.com/ipfs/go-block-format v0.0.2 h1:qPDvcP19izTjU8rgo6p7gTXZlkMkF5bz5G3fqIsSCPE= 10 | github.com/ipfs/go-block-format v0.0.2/go.mod h1:AWR46JfpcObNfg3ok2JHDUfdiHRgWhJgCQF+KIgOPJY= 11 | github.com/ipfs/go-cid v0.0.1 h1:GBjWPktLnNyX0JiQCNFpUuUSoMw5KMyqrsejHYlILBE= 12 | github.com/ipfs/go-cid v0.0.1/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= 13 | github.com/ipfs/go-ipfs-util v0.0.1 h1:Wz9bL2wB2YBJqggkA4dD7oSmqB4cAnpNbGrlHJulv50= 14 | github.com/ipfs/go-ipfs-util v0.0.1/go.mod h1:spsl5z8KUnrve+73pOhSVZND1SIxPW5RyBCNzQxlJBc= 15 | github.com/ipfs/go-log v0.0.1 h1:9XTUN/rW64BCG1YhPK9Hoy3q8nr4gOmHHBpgFdfw6Lc= 16 | github.com/ipfs/go-log v0.0.1/go.mod h1:kL1d2/hzSpI0thNYjiKfjanbVNU+IIGA/WnNESY9leM= 17 | github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= 18 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= 19 | github.com/libp2p/go-buffer-pool v0.0.2 h1:QNK2iAFa8gjAe1SPz6mHSMuCcjs+X1wlHzeOSqcmlfs= 20 | github.com/libp2p/go-buffer-pool v0.0.2/go.mod h1:MvaB6xw5vOrDl8rYZGLFdKAuk/hRoRZd1Vi32+RXyFM= 21 | github.com/mattn/go-colorable v0.1.1 h1:G1f5SKeVxmagw/IyvzvtZE4Gybcc4Tr1tf7I8z0XgOg= 22 | github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcnceauSikq3lYCQ= 23 | github.com/mattn/go-isatty v0.0.5 h1:tHXDdz1cpzGaovsTB+TVB8q90WEokoVmfMqoVcrLUgw= 24 | github.com/mattn/go-isatty v0.0.5/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= 25 | github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1 h1:lYpkrQH5ajf0OXOcUbGjvZxxijuBwbbmlSxLiuofa+g= 26 | github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1/go.mod h1:pD8RvIylQ358TN4wwqatJ8rNavkEINozVn9DtGI3dfQ= 27 | github.com/minio/sha256-simd v0.0.0-20190131020904-2d45a736cd16 h1:5W7KhL8HVF3XCFOweFD3BNESdnO8ewyYTFT2R+/b8FQ= 28 | github.com/minio/sha256-simd v0.0.0-20190131020904-2d45a736cd16/go.mod h1:2FMWW+8GMoPweT6+pI63m9YE3Lmw4J71hV56Chs1E/U= 29 | github.com/mr-tron/base58 v1.1.0 h1:Y51FGVJ91WBqCEabAi5OPUz38eAx8DakuAm5svLcsfQ= 30 | github.com/mr-tron/base58 v1.1.0/go.mod h1:xcD2VGqlgYjBdcBLw+TuYLr8afG+Hj8g2eTVqeSzSU8= 31 | github.com/multiformats/go-base32 v0.0.3 h1:tw5+NhuwaOjJCC5Pp82QuXbrmLzWg7uxlMFp8Nq/kkI= 32 | github.com/multiformats/go-base32 v0.0.3/go.mod h1:pLiuGC8y0QR3Ue4Zug5UzK9LjgbkL8NSQj0zQ5Nz/AA= 33 | github.com/multiformats/go-multibase v0.0.1 h1:PN9/v21eLywrFWdFNsFKaU04kLJzuYzmrJR+ubhT9qA= 34 | github.com/multiformats/go-multibase v0.0.1/go.mod h1:bja2MqRZ3ggyXtZSEDKpl0uO/gviWFaSteVbWT51qgs= 35 | github.com/multiformats/go-multihash v0.0.1 h1:HHwN1K12I+XllBCrqKnhX949Orn4oawPkegHMu2vDqQ= 36 | github.com/multiformats/go-multihash v0.0.1/go.mod h1:w/5tugSrLEbWqlcgJabL3oHFKTwfvkofsjW2Qa1ct4U= 37 | github.com/opentracing/opentracing-go v1.0.2 h1:3jA2P6O1F9UOrWVpwrIo17pu01KWvNWg4X946/Y5Zwg= 38 | github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= 39 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 40 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 41 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 42 | github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= 43 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 44 | github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f h1:jQa4QT2UP9WYv2nzyawpKMOCl+Z/jW7djv2/J50lj9E= 45 | github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f/go.mod h1:p9UJB6dDgdPgMJZs7UjUOdulKyRr9fqkS+6JKAInPy8= 46 | github.com/whyrusleeping/go-logging v0.0.0-20170515211332-0457bb6b88fc h1:9lDbC6Rz4bwmou+oE6Dt4Cb2BGMur5eR/GYptkKUVHo= 47 | github.com/whyrusleeping/go-logging v0.0.0-20170515211332-0457bb6b88fc/go.mod h1:bopw91TMyo8J3tvftk8xmU2kPmlrt4nScJQZU2hE5EM= 48 | golang.org/x/crypto v0.0.0-20190211182817-74369b46fc67 h1:ng3VDlRp5/DHpSWl02R4rM9I+8M2rhmsuLwAMmkLQWE= 49 | golang.org/x/crypto v0.0.0-20190211182817-74369b46fc67/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= 50 | golang.org/x/net v0.0.0-20190227160552-c95aed5357e7 h1:C2F/nMkR/9sfUTpvR3QrjBuTdvMUC/cFajkphs1YLQo= 51 | golang.org/x/net v0.0.0-20190227160552-c95aed5357e7/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 52 | golang.org/x/sys v0.0.0-20190219092855-153ac476189d/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 53 | golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223 h1:DH4skfRX4EBpamg7iV4ZlCpblAHI6s6TDM39bFZumv8= 54 | golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 55 | golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 56 | -------------------------------------------------------------------------------- /buzhash.go: -------------------------------------------------------------------------------- 1 | package chunk 2 | 3 | import ( 4 | "io" 5 | "math/bits" 6 | 7 | pool "github.com/libp2p/go-buffer-pool" 8 | ) 9 | 10 | const ( 11 | buzMin = 128 << 10 12 | buzMax = 512 << 10 13 | buzMask = 1<<17 - 1 14 | ) 15 | 16 | // Deprecated: use github.com/ipfs/boxo/chunker.Buzhash 17 | type Buzhash struct { 18 | r io.Reader 19 | buf []byte 20 | n int 21 | 22 | err error 23 | } 24 | 25 | // Deprecated: use github.com/ipfs/boxo/chunker.NewBuzhash 26 | func NewBuzhash(r io.Reader) *Buzhash { 27 | return &Buzhash{ 28 | r: r, 29 | buf: pool.Get(buzMax), 30 | } 31 | } 32 | 33 | func (b *Buzhash) Reader() io.Reader { 34 | return b.r 35 | } 36 | 37 | func (b *Buzhash) NextBytes() ([]byte, error) { 38 | if b.err != nil { 39 | return nil, b.err 40 | } 41 | 42 | n, err := io.ReadFull(b.r, b.buf[b.n:]) 43 | if err != nil { 44 | if err == io.ErrUnexpectedEOF || err == io.EOF { 45 | buffered := b.n + n 46 | if buffered < buzMin { 47 | b.err = io.EOF 48 | // Read nothing? Don't return an empty block. 49 | if buffered == 0 { 50 | pool.Put(b.buf) 51 | b.buf = nil 52 | return nil, b.err 53 | } 54 | res := make([]byte, buffered) 55 | copy(res, b.buf) 56 | 57 | pool.Put(b.buf) 58 | b.buf = nil 59 | return res, nil 60 | } 61 | } else { 62 | b.err = err 63 | pool.Put(b.buf) 64 | b.buf = nil 65 | return nil, err 66 | } 67 | } 68 | 69 | i := buzMin - 32 70 | 71 | var state uint32 = 0 72 | 73 | if buzMin > len(b.buf) { 74 | panic("this is impossible") 75 | } 76 | 77 | for ; i < buzMin; i++ { 78 | state = bits.RotateLeft32(state, 1) 79 | state = state ^ bytehash[b.buf[i]] 80 | } 81 | 82 | { 83 | max := b.n + n - 32 - 1 84 | 85 | buf := b.buf 86 | bufshf := b.buf[32:] 87 | i = buzMin - 32 88 | _ = buf[max] 89 | _ = bufshf[max] 90 | 91 | for ; i <= max; i++ { 92 | if state&buzMask == 0 { 93 | break 94 | } 95 | state = bits.RotateLeft32(state, 1) ^ 96 | bytehash[buf[i]] ^ 97 | bytehash[bufshf[i]] 98 | } 99 | i += 32 100 | } 101 | 102 | res := make([]byte, i) 103 | copy(res, b.buf) 104 | 105 | b.n = copy(b.buf, b.buf[i:b.n+n]) 106 | 107 | return res, nil 108 | } 109 | 110 | var bytehash = [256]uint32{ 111 | 0x6236e7d5, 0x10279b0b, 0x72818182, 0xdc526514, 0x2fd41e3d, 0x777ef8c8, 112 | 0x83ee5285, 0x2c8f3637, 0x2f049c1a, 0x57df9791, 0x9207151f, 0x9b544818, 113 | 0x74eef658, 0x2028ca60, 0x0271d91a, 0x27ae587e, 0xecf9fa5f, 0x236e71cd, 114 | 0xf43a8a2e, 0xbb13380, 0x9e57912c, 0x89a26cdb, 0x9fcf3d71, 0xa86da6f1, 115 | 0x9c49f376, 0x346aecc7, 0xf094a9ee, 0xea99e9cb, 0xb01713c6, 0x88acffb, 116 | 0x2960a0fb, 0x344a626c, 0x7ff22a46, 0x6d7a1aa5, 0x6a714916, 0x41d454ca, 117 | 0x8325b830, 0xb65f563, 0x447fecca, 0xf9d0ea5e, 0xc1d9d3d4, 0xcb5ec574, 118 | 0x55aae902, 0x86edc0e7, 0xd3a9e33, 0xe70dc1e1, 0xe3c5f639, 0x9b43140a, 119 | 0xc6490ac5, 0x5e4030fb, 0x8e976dd5, 0xa87468ea, 0xf830ef6f, 0xcc1ed5a5, 120 | 0x611f4e78, 0xddd11905, 0xf2613904, 0x566c67b9, 0x905a5ccc, 0x7b37b3a4, 121 | 0x4b53898a, 0x6b8fd29d, 0xaad81575, 0x511be414, 0x3cfac1e7, 0x8029a179, 122 | 0xd40efeda, 0x7380e02, 0xdc9beffd, 0x2d049082, 0x99bc7831, 0xff5002a8, 123 | 0x21ce7646, 0x1cd049b, 0xf43994f, 0xc3c6c5a5, 0xbbda5f50, 0xec15ec7, 124 | 0x9adb19b6, 0xc1e80b9, 0xb9b52968, 0xae162419, 0x2542b405, 0x91a42e9d, 125 | 0x6be0f668, 0x6ed7a6b9, 0xbc2777b4, 0xe162ce56, 0x4266aad5, 0x60fdb704, 126 | 0x66f832a5, 0x9595f6ca, 0xfee83ced, 0x55228d99, 0x12bf0e28, 0x66896459, 127 | 0x789afda, 0x282baa8, 0x2367a343, 0x591491b0, 0x2ff1a4b1, 0x410739b6, 128 | 0x9b7055a0, 0x2e0eb229, 0x24fc8252, 0x3327d3df, 0xb0782669, 0x1c62e069, 129 | 0x7f503101, 0xf50593ae, 0xd9eb275d, 0xe00eb678, 0x5917ccde, 0x97b9660a, 130 | 0xdd06202d, 0xed229e22, 0xa9c735bf, 0xd6316fe6, 0x6fc72e4c, 0x206dfa2, 131 | 0xd6b15c5a, 0x69d87b49, 0x9c97745, 0x13445d61, 0x35a975aa, 0x859aa9b9, 132 | 0x65380013, 0xd1fb6391, 0xc29255fd, 0x784a3b91, 0xb9e74c26, 0x63ce4d40, 133 | 0xc07cbe9e, 0xe6e4529e, 0xfb3632f, 0x9438d9c9, 0x682f94a8, 0xf8fd4611, 134 | 0x257ec1ed, 0x475ce3d6, 0x60ee2db1, 0x2afab002, 0x2b9e4878, 0x86b340de, 135 | 0x1482fdca, 0xfe41b3bf, 0xd4a412b0, 0xe09db98c, 0xc1af5d53, 0x7e55e25f, 136 | 0xd3346b38, 0xb7a12cbd, 0x9c6827ba, 0x71f78bee, 0x8c3a0f52, 0x150491b0, 137 | 0xf26de912, 0x233e3a4e, 0xd309ebba, 0xa0a9e0ff, 0xca2b5921, 0xeeb9893c, 138 | 0x33829e88, 0x9870cc2a, 0x23c4b9d0, 0xeba32ea3, 0xbdac4d22, 0x3bc8c44c, 139 | 0x1e8d0397, 0xf9327735, 0x783b009f, 0xeb83742, 0x2621dc71, 0xed017d03, 140 | 0x5c760aa1, 0x5a69814b, 0x96e3047f, 0xa93c9cde, 0x615c86f5, 0xb4322aa5, 141 | 0x4225534d, 0xd2e2de3, 0xccfccc4b, 0xbac2a57, 0xf0a06d04, 0xbc78d737, 142 | 0xf2d1f766, 0xf5a7953c, 0xbcdfda85, 0x5213b7d5, 0xbce8a328, 0xd38f5f18, 143 | 0xdb094244, 0xfe571253, 0x317fa7ee, 0x4a324f43, 0x3ffc39d9, 0x51b3fa8e, 144 | 0x7a4bee9f, 0x78bbc682, 0x9f5c0350, 0x2fe286c, 0x245ab686, 0xed6bf7d7, 145 | 0xac4988a, 0x3fe010fa, 0xc65fe369, 0xa45749cb, 0x2b84e537, 0xde9ff363, 146 | 0x20540f9a, 0xaa8c9b34, 0x5bc476b3, 0x1d574bd7, 0x929100ad, 0x4721de4d, 147 | 0x27df1b05, 0x58b18546, 0xb7e76764, 0xdf904e58, 0x97af57a1, 0xbd4dc433, 148 | 0xa6256dfd, 0xf63998f3, 0xf1e05833, 0xe20acf26, 0xf57fd9d6, 0x90300b4d, 149 | 0x89df4290, 0x68d01cbc, 0xcf893ee3, 0xcc42a046, 0x778e181b, 0x67265c76, 150 | 0xe981a4c4, 0x82991da1, 0x708f7294, 0xe6e2ae62, 0xfc441870, 0x95e1b0b6, 151 | 0x445f825, 0x5a93b47f, 0x5e9cf4be, 0x84da71e7, 0x9d9582b0, 0x9bf835ef, 152 | 0x591f61e2, 0x43325985, 0x5d2de32e, 0x8d8fbf0f, 0x95b30f38, 0x7ad5b6e, 153 | 0x4e934edf, 0x3cd4990e, 0x9053e259, 0x5c41857d} 154 | --------------------------------------------------------------------------------