├── go.mod
├── README.md
├── example_test.go
├── LICENSE
├── options.go
├── .github
    └── workflows
    │   └── tests.yml
├── doc.go
├── polynomials.go
├── polynomials_test.go
├── chunker.go
└── chunker_test.go


/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/restic/chunker
2 | 
3 | go 1.9
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![GoDoc](https://godoc.org/github.com/restic/chunker?status.svg)](http://godoc.org/github.com/restic/chunker)
 2 | [![Build Status](https://github.com/restic/chunker/workflows/test/badge.svg)](https://github.com/restic/chunker/actions?query=workflow%3Atest)
 3 | 
 4 | The package `chunker` implements content-defined-chunking (CDC) based on a
 5 | rolling Rabin Hash. The library is part of the [restic backup
 6 | program](https://github.com/restic/restic).
 7 | 
 8 | An introduction to Content Defined Chunking can be found in the restic blog
 9 | post [Foundation - Introducing Content Defined Chunking (CDC)](https://restic.github.io/blog/2015-09-12/restic-foundation1-cdc).
10 | 
11 | You can find the API documentation at
12 | https://godoc.org/github.com/restic/chunker
13 | 


--------------------------------------------------------------------------------
/example_test.go:
--------------------------------------------------------------------------------
 1 | package chunker_test
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"crypto/sha256"
 6 | 	"fmt"
 7 | 	"io"
 8 | 	"math/rand"
 9 | 
10 | 	"github.com/restic/chunker"
11 | )
12 | 
13 | func ExampleChunker() {
14 | 	// generate 32MiB of deterministic pseudo-random data
15 | 	rng := rand.New(rand.NewSource(23))
16 | 	data := make([]byte, 32*1024*1024)
17 | 
18 | 	_, err := rng.Read(data)
19 | 	if err != nil {
20 | 		panic(err)
21 | 	}
22 | 
23 | 	// create a chunker
24 | 	chnkr := chunker.New(bytes.NewReader(data), chunker.Pol(0x3DA3358B4DC173))
25 | 
26 | 	// reuse this buffer
27 | 	buf := make([]byte, 8*1024*1024)
28 | 
29 | 	for i := 0; i < 5; i++ {
30 | 		chunk, err := chnkr.Next(buf)
31 | 		if err == io.EOF {
32 | 			break
33 | 		}
34 | 
35 | 		if err != nil {
36 | 			panic(err)
37 | 		}
38 | 
39 | 		fmt.Printf("%d %02x\n", chunk.Length, sha256.Sum256(chunk.Data))
40 | 	}
41 | 
42 | 	// Output:
43 | 	// 1015370 615e8851030f318751f3c8baf8fbfa9958e2dd7f25dc1a87dcf6d6f79d1f1a9f
44 | 	// 1276199 f1cb038c558d3a2093049815cc45f80cd367712634a28f6dd36642f905d35c37
45 | 	// 1124437 a8e19dcd4224b58eb2b480ae42bb1a4a3b0c91c074f4745dbe3f8e4ec1a926e7
46 | 	// 3580969 2b3a3fe65ce9d689599c3b26375c40c22955bf92b170b24258e54dee91e3c2af
47 | 	// 3709129 47672502d75db244cb3dc3098eed87ffd537c9f0d66fb82a0198b6f6994409f2
48 | }
49 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Alexander Neumann <alexander@bumpern.de>
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer.
 9 | 
10 | 2. Redistributions in binary form must reproduce the above copyright notice,
11 |    this list of conditions and the following disclaimer in the documentation
12 |    and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 | 


--------------------------------------------------------------------------------
/options.go:
--------------------------------------------------------------------------------
 1 | package chunker
 2 | 
 3 | type option func(*Chunker)
 4 | type baseOption func(*BaseChunker)
 5 | 
 6 | // WithAverageBits allows to control the frequency of chunk discovery:
 7 | // the lower averageBits, the higher amount of chunks will be identified.
 8 | // The default value is 20 bits, so chunks will be of 1MiB size on average.
 9 | func WithBaseAverageBits(averageBits int) baseOption {
10 | 	return func(c *BaseChunker) { c.splitmask = (1 << uint64(averageBits)) - 1 }
11 | }
12 | 
13 | // WithBoundaries allows to set custom min and max size boundaries.
14 | func WithBaseBoundaries(min, max uint) baseOption {
15 | 	return func(c *BaseChunker) {
16 | 		c.MinSize = min
17 | 		c.MaxSize = max
18 | 	}
19 | }
20 | 
21 | // WithAverageBits allows to control the frequency of chunk discovery:
22 | // the lower averageBits, the higher amount of chunks will be identified.
23 | // The default value is 20 bits, so chunks will be of 1MiB size on average.
24 | func WithAverageBits(averageBits int) option {
25 | 	return func(c *Chunker) { c.splitmask = (1 << uint64(averageBits)) - 1 }
26 | }
27 | 
28 | // WithBoundaries allows to set custom min and max size boundaries.
29 | func WithBoundaries(min, max uint) option {
30 | 	return func(c *Chunker) {
31 | 		c.MinSize = min
32 | 		c.MaxSize = max
33 | 	}
34 | }
35 | 
36 | // WithBuffer allows to set custom buffer for chunker.
37 | func WithBuffer(buf []byte) option {
38 | 	return func(c *Chunker) { c.buf = buf }
39 | }
40 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
  1 | name: test
  2 | on:
  3 |   # run tests on push to master, but not when other branches are pushed to
  4 |   push:
  5 |     branches:
  6 |       - master
  7 | 
  8 |   # run tests for all pull requests
  9 |   pull_request:
 10 | 
 11 | env:
 12 |   latest_go: "1.19.x"
 13 |   GO111MODULE: on
 14 | 
 15 | jobs:
 16 |   test:
 17 |     strategy:
 18 |       matrix:
 19 |         # list of jobs to run:
 20 |         include:
 21 |           - job_name: Windows
 22 |             go: 1.19.x
 23 |             os: windows-latest
 24 |             install_verb: install
 25 | 
 26 |           - job_name: macOS
 27 |             go: 1.19.x
 28 |             os: macOS-latest
 29 |             install_verb: install
 30 | 
 31 |           - job_name: Linux
 32 |             go: 1.19.x
 33 |             os: ubuntu-latest
 34 |             install_verb: install
 35 | 
 36 |           - job_name: Linux
 37 |             go: 1.18.x
 38 |             os: ubuntu-latest
 39 |             install_verb: install
 40 | 
 41 |           - job_name: Linux
 42 |             go: 1.17.x
 43 |             os: ubuntu-latest
 44 |             install_verb: install
 45 | 
 46 |           - job_name: Linux
 47 |             go: 1.16.x
 48 |             os: ubuntu-latest
 49 |             install_verb: get
 50 | 
 51 |           - job_name: Linux
 52 |             go: 1.15.x
 53 |             os: ubuntu-latest
 54 |             install_verb: get
 55 | 
 56 |     name: ${{ matrix.job_name }} Go ${{ matrix.go }}
 57 |     runs-on: ${{ matrix.os }}
 58 | 
 59 |     env:
 60 |       GOPROXY: https://proxy.golang.org
 61 | 
 62 |     steps:
 63 |       - name: Set up Go ${{ matrix.go }}
 64 |         uses: actions/setup-go@v2
 65 |         with:
 66 |           go-version: ${{ matrix.go }}
 67 | 
 68 |       - name: Check out code
 69 |         uses: actions/checkout@v2
 70 | 
 71 |       - name: Run tests
 72 |         run: |
 73 |           go test -cover ./...
 74 | 
 75 |   lint:
 76 |     name: lint
 77 |     runs-on: ubuntu-latest
 78 |     steps:
 79 |       - name: Set up Go ${{ env.latest_go }}
 80 |         uses: actions/setup-go@v2
 81 |         with:
 82 |           go-version: ${{ env.latest_go }}
 83 | 
 84 |       - name: Check out code
 85 |         uses: actions/checkout@v2
 86 | 
 87 |       - name: golangci-lint
 88 |         uses: golangci/golangci-lint-action@v2
 89 |         with:
 90 |           # Required: the version of golangci-lint is required and must be specified without patch version: we always use the latest patch version.
 91 |           version: v1.48
 92 |           # Optional: show only new issues if it's a pull request. The default value is `false`.
 93 |           only-new-issues: true
 94 |           args: --verbose --timeout 5m
 95 |           skip-go-installation: true
 96 | 
 97 |         # only run golangci-lint for pull requests, otherwise ALL hints get
 98 |         # reported. We need to slowly address all issues until we can enable
 99 |         # linting the master branch :)
100 |         if: github.event_name == 'pull_request'
101 | 
102 |       - name: Check go.mod
103 |         run: |
104 |           echo "check if go.mod is up to date"
105 |           go mod tidy
106 |           git diff --exit-code go.mod
107 | 


--------------------------------------------------------------------------------
/doc.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Alexander Neumann. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | /*
 6 | Package chunker implements Content Defined Chunking (CDC) based on a rolling
 7 | Rabin Checksum.
 8 | 
 9 | Choosing a Random Irreducible Polynomial
10 | 
11 | The function RandomPolynomial() returns a new random polynomial of degree 53
12 | for use with the chunker. The degree 53 is chosen because it is the largest
13 | prime below 64-8 = 56, so that the top 8 bits of an uint64 can be used for
14 | optimising calculations in the chunker.
15 | 
16 | A random polynomial is chosen selecting 64 random bits, masking away bits
17 | 64..54 and setting bit 53 to one (otherwise the polynomial is not of the
18 | desired degree) and bit 0 to one (otherwise the polynomial is trivially
19 | reducible), so that 51 bits are chosen at random.
20 | 
21 | This process is repeated until Irreducible() returns true, then this
22 | polynomials is returned. If this doesn't happen after 1 million tries, the
23 | function returns an error. The probability for selecting an irreducible
24 | polynomial at random is about 7.5% ( (2^53-2)/53 / 2^51), so the probability
25 | that no irreducible polynomial has been found after 100 tries is lower than
26 | 0.04%.
27 | 
28 | Verifying Irreducible Polynomials
29 | 
30 | During development the results have been verified using the computational
31 | discrete algebra system GAP, which can be obtained from the website at
32 | http://www.gap-system.org/.
33 | 
34 | For filtering a given list of polynomials in hexadecimal coefficient notation,
35 | the following script can be used:
36 | 
37 | 	# create x over F_2 = GF(2)
38 | 	x := Indeterminate(GF(2), "x");
39 | 
40 | 	# test if polynomial is irreducible, i.e. the number of factors is one
41 | 	IrredPoly := function (poly)
42 | 		return (Length(Factors(poly)) = 1);
43 | 	end;;
44 | 
45 | 	# create a polynomial in x from the hexadecimal representation of the
46 | 	# coefficients
47 | 	Hex2Poly := function (s)
48 | 		return ValuePol(CoefficientsQadic(IntHexString(s), 2), x);
49 | 	end;;
50 | 
51 | 	# list of candidates, in hex
52 | 	candidates := [ "3DA3358B4DC173" ];
53 | 
54 | 	# create real polynomials
55 | 	L := List(candidates, Hex2Poly);
56 | 
57 | 	# filter and display the list of irreducible polynomials contained in L
58 | 	Display(Filtered(L, x -> (IrredPoly(x))));
59 | 
60 | All irreducible polynomials from the list are written to the output.
61 | 
62 | Background Literature
63 | 
64 | An introduction to Rabin Fingerprints/Checksums can be found in the following articles:
65 | 
66 | Michael O. Rabin (1981): "Fingerprinting by Random Polynomials"
67 | http://www.xmailserver.org/rabin.pdf
68 | 
69 | Ross N. Williams (1993): "A Painless Guide to CRC Error Detection Algorithms"
70 | http://www.zlib.net/crc_v3.txt
71 | 
72 | Andrei Z. Broder (1993): "Some Applications of Rabin's Fingerprinting Method"
73 | http://www.xmailserver.org/rabin_apps.pdf
74 | 
75 | Shuhong Gao and Daniel Panario (1997): "Tests and Constructions of Irreducible Polynomials over Finite Fields"
76 | http://www.math.clemson.edu/~sgao/papers/GP97a.pdf
77 | 
78 | Andrew Kadatch, Bob Jenkins (2007): "Everything we know about CRC but afraid to forget"
79 | http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
80 | 
81 | */
82 | package chunker
83 | 


--------------------------------------------------------------------------------
/polynomials.go:
--------------------------------------------------------------------------------
  1 | package chunker
  2 | 
  3 | import (
  4 | 	"crypto/rand"
  5 | 	"encoding/binary"
  6 | 	"errors"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"math/bits"
 10 | 	"strconv"
 11 | )
 12 | 
 13 | // Pol is a polynomial from F_2[X].
 14 | type Pol uint64
 15 | 
 16 | // Add returns x+y.
 17 | func (x Pol) Add(y Pol) Pol {
 18 | 	r := Pol(uint64(x) ^ uint64(y))
 19 | 	return r
 20 | }
 21 | 
 22 | // Mul returns x*y. When an overflow occurs, Mul panics.
 23 | func (x Pol) Mul(y Pol) Pol {
 24 | 	switch {
 25 | 	case x == 0 || y == 0:
 26 | 		return 0
 27 | 	case x == 1:
 28 | 		return y
 29 | 	case y == 1:
 30 | 		return x
 31 | 	case y == 2:
 32 | 		return x.mul2()
 33 | 	}
 34 | 
 35 | 	var res Pol
 36 | 	for i := 0; i <= y.Deg(); i++ {
 37 | 		if (y & (1 << uint(i))) > 0 {
 38 | 			res = res.Add(x << uint(i))
 39 | 		}
 40 | 	}
 41 | 
 42 | 	if res.Div(y) != x {
 43 | 		panic("multiplication would overflow uint64")
 44 | 	}
 45 | 
 46 | 	return res
 47 | }
 48 | 
 49 | // 2*x.
 50 | func (x Pol) mul2() Pol {
 51 | 	if x&(1<<63) != 0 {
 52 | 		panic("multiplication would overflow uint64")
 53 | 	}
 54 | 	return x << 1
 55 | }
 56 | 
 57 | // Deg returns the degree of the polynomial x. If x is zero, -1 is returned.
 58 | func (x Pol) Deg() int {
 59 | 	return bits.Len64(uint64(x)) - 1
 60 | }
 61 | 
 62 | // String returns the coefficients in hex.
 63 | func (x Pol) String() string {
 64 | 	return "0x" + strconv.FormatUint(uint64(x), 16)
 65 | }
 66 | 
 67 | // Expand returns the string representation of the polynomial x.
 68 | func (x Pol) Expand() string {
 69 | 	if x == 0 {
 70 | 		return "0"
 71 | 	}
 72 | 
 73 | 	s := ""
 74 | 	for i := x.Deg(); i > 1; i-- {
 75 | 		if x&(1<<uint(i)) > 0 {
 76 | 			s += fmt.Sprintf("+x^%d", i)
 77 | 		}
 78 | 	}
 79 | 
 80 | 	if x&2 > 0 {
 81 | 		s += "+x"
 82 | 	}
 83 | 
 84 | 	if x&1 > 0 {
 85 | 		s += "+1"
 86 | 	}
 87 | 
 88 | 	return s[1:]
 89 | }
 90 | 
 91 | // DivMod returns x / d = q, and remainder r,
 92 | // see https://en.wikipedia.org/wiki/Division_algorithm
 93 | func (x Pol) DivMod(d Pol) (Pol, Pol) {
 94 | 	if x == 0 {
 95 | 		return 0, 0
 96 | 	}
 97 | 
 98 | 	if d == 0 {
 99 | 		panic("division by zero")
100 | 	}
101 | 
102 | 	D := d.Deg()
103 | 	diff := x.Deg() - D
104 | 	if diff < 0 {
105 | 		return 0, x
106 | 	}
107 | 
108 | 	var q Pol
109 | 	for diff >= 0 {
110 | 		m := d << uint(diff)
111 | 		q |= (1 << uint(diff))
112 | 		x = x.Add(m)
113 | 
114 | 		diff = x.Deg() - D
115 | 	}
116 | 
117 | 	return q, x
118 | }
119 | 
120 | // Div returns the integer division result x / d.
121 | func (x Pol) Div(d Pol) Pol {
122 | 	q, _ := x.DivMod(d)
123 | 	return q
124 | }
125 | 
126 | // Mod returns the remainder of x / d
127 | func (x Pol) Mod(d Pol) Pol {
128 | 	_, r := x.DivMod(d)
129 | 	return r
130 | }
131 | 
132 | // I really dislike having a function that does not terminate, so specify a
133 | // really large upper bound for finding a new irreducible polynomial, and
134 | // return an error when no irreducible polynomial has been found within
135 | // randPolMaxTries.
136 | const randPolMaxTries = 1e6
137 | 
138 | // RandomPolynomial returns a new random irreducible polynomial
139 | // of degree 53 using the default System CSPRNG as source.
140 | // It is equivalent to calling DerivePolynomial(rand.Reader).
141 | func RandomPolynomial() (Pol, error) {
142 | 	return DerivePolynomial(rand.Reader)
143 | }
144 | 
145 | // DerivePolynomial returns an irreducible polynomial of degree 53
146 | // (largest prime number below 64-8) by reading bytes from source.
147 | // There are (2^53-2/53) irreducible polynomials of degree 53 in
148 | // F_2[X], c.f. Michael O. Rabin (1981): "Fingerprinting by Random
149 | // Polynomials", page 4. If no polynomial could be found in one
150 | // million tries, an error is returned.
151 | func DerivePolynomial(source io.Reader) (Pol, error) {
152 | 	for i := 0; i < randPolMaxTries; i++ {
153 | 		var f Pol
154 | 
155 | 		// choose polynomial at (pseudo)random
156 | 		err := binary.Read(source, binary.LittleEndian, &f)
157 | 		if err != nil {
158 | 			return 0, err
159 | 		}
160 | 
161 | 		// mask away bits above bit 53
162 | 		f &= Pol((1 << 54) - 1)
163 | 
164 | 		// set highest and lowest bit so that the degree is 53 and the
165 | 		// polynomial is not trivially reducible
166 | 		f |= (1 << 53) | 1
167 | 
168 | 		// test if f is irreducible
169 | 		if f.Irreducible() {
170 | 			return f, nil
171 | 		}
172 | 	}
173 | 
174 | 	// If this is reached, we haven't found an irreducible polynomial in
175 | 	// randPolMaxTries. This error is very unlikely to occur.
176 | 	return 0, errors.New("unable to find new random irreducible polynomial")
177 | }
178 | 
179 | // GCD computes the Greatest Common Divisor x and f.
180 | func (x Pol) GCD(f Pol) Pol {
181 | 	if f == 0 {
182 | 		return x
183 | 	}
184 | 
185 | 	if x == 0 {
186 | 		return f
187 | 	}
188 | 
189 | 	if x.Deg() < f.Deg() {
190 | 		x, f = f, x
191 | 	}
192 | 
193 | 	return f.GCD(x.Mod(f))
194 | }
195 | 
196 | // Irreducible returns true iff x is irreducible over F_2. This function
197 | // uses Ben Or's reducibility test.
198 | //
199 | // For details see "Tests and Constructions of Irreducible Polynomials over
200 | // Finite Fields".
201 | func (x Pol) Irreducible() bool {
202 | 	for i := 1; i <= x.Deg()/2; i++ {
203 | 		if x.GCD(qp(uint(i), x)) != 1 {
204 | 			return false
205 | 		}
206 | 	}
207 | 
208 | 	return true
209 | }
210 | 
211 | // MulMod computes x*f mod g
212 | func (x Pol) MulMod(f, g Pol) Pol {
213 | 	if x == 0 || f == 0 {
214 | 		return 0
215 | 	}
216 | 
217 | 	var res Pol
218 | 	for i := 0; i <= f.Deg(); i++ {
219 | 		if (f & (1 << uint(i))) > 0 {
220 | 			a := x
221 | 			for j := 0; j < i; j++ {
222 | 				a = a.Mul(2).Mod(g)
223 | 			}
224 | 			res = res.Add(a).Mod(g)
225 | 		}
226 | 	}
227 | 
228 | 	return res
229 | }
230 | 
231 | // qp computes the polynomial (x^(2^p)-x) mod g. This is needed for the
232 | // reducibility test.
233 | func qp(p uint, g Pol) Pol {
234 | 	num := (1 << p)
235 | 	i := 1
236 | 
237 | 	// start with x
238 | 	res := Pol(2)
239 | 
240 | 	for i < num {
241 | 		// repeatedly square res
242 | 		res = res.MulMod(res, g)
243 | 		i *= 2
244 | 	}
245 | 
246 | 	// add x
247 | 	return res.Add(2).Mod(g)
248 | }
249 | 
250 | // MarshalJSON returns the JSON representation of the Pol.
251 | func (x Pol) MarshalJSON() ([]byte, error) {
252 | 	buf := strconv.AppendUint([]byte{'"'}, uint64(x), 16)
253 | 	buf = append(buf, '"')
254 | 	return buf, nil
255 | }
256 | 
257 | // UnmarshalJSON parses a Pol from the JSON data.
258 | func (x *Pol) UnmarshalJSON(data []byte) error {
259 | 	if len(data) < 2 {
260 | 		return errors.New("invalid string for polynomial")
261 | 	}
262 | 	n, err := strconv.ParseUint(string(data[1:len(data)-1]), 16, 64)
263 | 	if err != nil {
264 | 		return err
265 | 	}
266 | 	*x = Pol(n)
267 | 
268 | 	return nil
269 | }
270 | 


--------------------------------------------------------------------------------
/polynomials_test.go:
--------------------------------------------------------------------------------
  1 | package chunker
  2 | 
  3 | import (
  4 | 	"strconv"
  5 | 	"testing"
  6 | )
  7 | 
  8 | var polAddTests = []struct {
  9 | 	x, y Pol
 10 | 	sum  Pol
 11 | }{
 12 | 	{23, 16, 23 ^ 16},
 13 | 	{0x9a7e30d1e855e0a0, 0x670102a1f4bcd414, 0xfd7f32701ce934b4},
 14 | 	{0x9a7e30d1e855e0a0, 0x9a7e30d1e855e0a0, 0},
 15 | }
 16 | 
 17 | func TestPolAdd(t *testing.T) {
 18 | 	for i, test := range polAddTests {
 19 | 		if test.sum != test.x.Add(test.y) {
 20 | 			t.Errorf("test %d failed: sum != x+y", i)
 21 | 		}
 22 | 
 23 | 		if test.sum != test.y.Add(test.x) {
 24 | 			t.Errorf("test %d failed: sum != y+x", i)
 25 | 		}
 26 | 	}
 27 | }
 28 | 
 29 | func parseBin(s string) Pol {
 30 | 	i, err := strconv.ParseUint(s, 2, 64)
 31 | 	if err != nil {
 32 | 		panic(err)
 33 | 	}
 34 | 
 35 | 	return Pol(i)
 36 | }
 37 | 
 38 | var polMulTests = []struct {
 39 | 	x, y Pol
 40 | 	res  Pol
 41 | }{
 42 | 	{1, 2, 2},
 43 | 	{
 44 | 		parseBin("1101"),
 45 | 		parseBin("10"),
 46 | 		parseBin("11010"),
 47 | 	},
 48 | 	{
 49 | 		parseBin("1101"),
 50 | 		parseBin("11"),
 51 | 		parseBin("10111"),
 52 | 	},
 53 | 	{
 54 | 		0x40000000,
 55 | 		0x40000000,
 56 | 		0x1000000000000000,
 57 | 	},
 58 | 	{
 59 | 		parseBin("1010"),
 60 | 		parseBin("100100"),
 61 | 		parseBin("101101000"),
 62 | 	},
 63 | 	{
 64 | 		parseBin("100"),
 65 | 		parseBin("11"),
 66 | 		parseBin("1100"),
 67 | 	},
 68 | 	{
 69 | 		parseBin("11"),
 70 | 		parseBin("110101"),
 71 | 		parseBin("1011111"),
 72 | 	},
 73 | 	{
 74 | 		parseBin("10011"),
 75 | 		parseBin("110101"),
 76 | 		parseBin("1100001111"),
 77 | 	},
 78 | }
 79 | 
 80 | func TestPolMul(t *testing.T) {
 81 | 	for i, test := range polMulTests {
 82 | 		m := test.x.Mul(test.y)
 83 | 		if test.res != m {
 84 | 			t.Errorf("TestPolMul failed for test %d: %v * %v: want %v, got %v",
 85 | 				i, test.x, test.y, test.res, m)
 86 | 		}
 87 | 		m = test.y.Mul(test.x)
 88 | 		if test.res != test.y.Mul(test.x) {
 89 | 			t.Errorf("TestPolMul failed for %d: %v * %v: want %v, got %v",
 90 | 				i, test.x, test.y, test.res, m)
 91 | 		}
 92 | 	}
 93 | }
 94 | 
 95 | func TestPolMulOverflow(t *testing.T) {
 96 | 	defer func() {
 97 | 		// try to recover overflow error
 98 | 		err := recover()
 99 | 
100 | 		if e, ok := err.(string); ok && e == "multiplication would overflow uint64" {
101 | 			return
102 | 		}
103 | 
104 | 		t.Logf("invalid error raised: %v", err)
105 | 		// re-raise error if not overflow
106 | 		panic(err)
107 | 	}()
108 | 
109 | 	x := Pol(1 << 63)
110 | 	x.Mul(2)
111 | 	t.Fatal("overflow test did not panic")
112 | }
113 | 
114 | var polDivTests = []struct {
115 | 	x, y Pol
116 | 	res  Pol
117 | }{
118 | 	{10, 50, 0},
119 | 	{0, 1, 0},
120 | 	{
121 | 		parseBin("101101000"), // 0x168
122 | 		parseBin("1010"),      // 0xa
123 | 		parseBin("100100"),    // 0x24
124 | 	},
125 | 	{2, 2, 1},
126 | 	{
127 | 		0x8000000000000000,
128 | 		0x8000000000000000,
129 | 		1,
130 | 	},
131 | 	{
132 | 		parseBin("1100"),
133 | 		parseBin("100"),
134 | 		parseBin("11"),
135 | 	},
136 | 	{
137 | 		parseBin("1100001111"),
138 | 		parseBin("10011"),
139 | 		parseBin("110101"),
140 | 	},
141 | }
142 | 
143 | func TestPolDiv(t *testing.T) {
144 | 	for i, test := range polDivTests {
145 | 		m := test.x.Div(test.y)
146 | 		if test.res != m {
147 | 			t.Errorf("TestPolDiv failed for test %d: %v * %v: want %v, got %v",
148 | 				i, test.x, test.y, test.res, m)
149 | 		}
150 | 	}
151 | }
152 | 
153 | func TestPolDeg(t *testing.T) {
154 | 	var x Pol
155 | 	if x.Deg() != -1 {
156 | 		t.Errorf("deg(0) is not -1: %v", x.Deg())
157 | 	}
158 | 
159 | 	x = 1
160 | 	if x.Deg() != 0 {
161 | 		t.Errorf("deg(1) is not 0: %v", x.Deg())
162 | 	}
163 | 
164 | 	for i := 0; i < 64; i++ {
165 | 		x = 1 << uint(i)
166 | 		if x.Deg() != i {
167 | 			t.Errorf("deg(1<<%d) is not %d: %v", i, i, x.Deg())
168 | 		}
169 | 	}
170 | }
171 | 
172 | var polModTests = []struct {
173 | 	x, y Pol
174 | 	res  Pol
175 | }{
176 | 	{10, 50, 10},
177 | 	{0, 1, 0},
178 | 	{
179 | 		parseBin("101101001"),
180 | 		parseBin("1010"),
181 | 		parseBin("1"),
182 | 	},
183 | 	{2, 2, 0},
184 | 	{
185 | 		0x8000000000000000,
186 | 		0x8000000000000000,
187 | 		0,
188 | 	},
189 | 	{
190 | 		parseBin("1100"),
191 | 		parseBin("100"),
192 | 		parseBin("0"),
193 | 	},
194 | 	{
195 | 		parseBin("1100001111"),
196 | 		parseBin("10011"),
197 | 		parseBin("0"),
198 | 	},
199 | }
200 | 
201 | func TestPolModt(t *testing.T) {
202 | 	for i, test := range polModTests {
203 | 		res := test.x.Mod(test.y)
204 | 		if test.res != res {
205 | 			t.Errorf("test %d failed: want %v, got %v", i, test.res, res)
206 | 		}
207 | 	}
208 | }
209 | 
210 | func BenchmarkPolDivMod(t *testing.B) {
211 | 	f := Pol(0x2482734cacca49)
212 | 	g := Pol(0x3af4b284899)
213 | 
214 | 	for i := 0; i < t.N; i++ {
215 | 		g.DivMod(f)
216 | 	}
217 | }
218 | 
219 | func BenchmarkPolDiv(t *testing.B) {
220 | 	f := Pol(0x2482734cacca49)
221 | 	g := Pol(0x3af4b284899)
222 | 
223 | 	for i := 0; i < t.N; i++ {
224 | 		g.Div(f)
225 | 	}
226 | }
227 | 
228 | func BenchmarkPolMod(t *testing.B) {
229 | 	f := Pol(0x2482734cacca49)
230 | 	g := Pol(0x3af4b284899)
231 | 
232 | 	for i := 0; i < t.N; i++ {
233 | 		g.Mod(f)
234 | 	}
235 | }
236 | 
237 | func BenchmarkPolDeg(t *testing.B) {
238 | 	f := Pol(0x3af4b284899)
239 | 	d := f.Deg()
240 | 	if d != 41 {
241 | 		t.Fatalf("BenchmalPolDeg: Wrong degree %d returned, expected %d",
242 | 			d, 41)
243 | 	}
244 | 
245 | 	var sum int
246 | 	for i := 0; i < t.N; i++ {
247 | 		sum += f.Deg()
248 | 	}
249 | 	// Make sure Deg call isn't optimized away.
250 | 	t.Log("sum of Deg:", sum)
251 | }
252 | 
253 | func TestRandomPolynomial(t *testing.T) {
254 | 	_, err := RandomPolynomial()
255 | 	if err != nil {
256 | 		t.Fatal(err)
257 | 	}
258 | }
259 | 
260 | func BenchmarkRandomPolynomial(t *testing.B) {
261 | 	for i := 0; i < t.N; i++ {
262 | 		_, err := RandomPolynomial()
263 | 		if err != nil {
264 | 			t.Fatal(err)
265 | 		}
266 | 	}
267 | }
268 | 
269 | func TestExpandPolynomial(t *testing.T) {
270 | 	pol := Pol(0x3DA3358B4DC173)
271 | 	s := pol.Expand()
272 | 	if s != "x^53+x^52+x^51+x^50+x^48+x^47+x^45+x^41+x^40+x^37+x^36+x^34+x^32+x^31+x^27+x^25+x^24+x^22+x^19+x^18+x^16+x^15+x^14+x^8+x^6+x^5+x^4+x+1" {
273 | 		t.Fatal("wrong result")
274 | 	}
275 | }
276 | 
277 | var polIrredTests = []struct {
278 | 	f     Pol
279 | 	irred bool
280 | }{
281 | 	{0x38f1e565e288df, false},
282 | 	{0x3DA3358B4DC173, true},
283 | 	{0x30a8295b9d5c91, false},
284 | 	{0x255f4350b962cb, false},
285 | 	{0x267f776110a235, false},
286 | 	{0x2f4dae10d41227, false},
287 | 	{0x2482734cacca49, true},
288 | 	{0x312daf4b284899, false},
289 | 	{0x29dfb6553d01d1, false},
290 | 	{0x3548245eb26257, false},
291 | 	{0x3199e7ef4211b3, false},
292 | 	{0x362f39017dae8b, false},
293 | 	{0x200d57aa6fdacb, false},
294 | 	{0x35e0a4efa1d275, false},
295 | 	{0x2ced55b026577f, false},
296 | 	{0x260b012010893d, false},
297 | 	{0x2df29cbcd59e9d, false},
298 | 	{0x3f2ac7488bd429, false},
299 | 	{0x3e5cb1711669fb, false},
300 | 	{0x226d8de57a9959, false},
301 | 	{0x3c8de80aaf5835, false},
302 | 	{0x2026a59efb219b, false},
303 | 	{0x39dfa4d13fb231, false},
304 | 	{0x3143d0464b3299, false},
305 | }
306 | 
307 | func TestPolIrreducible(t *testing.T) {
308 | 	for _, test := range polIrredTests {
309 | 		if test.f.Irreducible() != test.irred {
310 | 			t.Errorf("Irreducibility test for Polynomial %v failed: got %v, wanted %v",
311 | 				test.f, test.f.Irreducible(), test.irred)
312 | 		}
313 | 	}
314 | }
315 | 
316 | func BenchmarkPolIrreducible(b *testing.B) {
317 | 	// find first irreducible polynomial
318 | 	var pol Pol
319 | 	for _, test := range polIrredTests {
320 | 		if test.irred {
321 | 			pol = test.f
322 | 			break
323 | 		}
324 | 	}
325 | 
326 | 	for i := 0; i < b.N; i++ {
327 | 		if !pol.Irreducible() {
328 | 			b.Errorf("Irreducibility test for Polynomial %v failed", pol)
329 | 		}
330 | 	}
331 | }
332 | 
333 | var polGCDTests = []struct {
334 | 	f1  Pol
335 | 	f2  Pol
336 | 	gcd Pol
337 | }{
338 | 	{10, 50, 2},
339 | 	{0, 1, 1},
340 | 	{
341 | 		parseBin("101101001"),
342 | 		parseBin("1010"),
343 | 		parseBin("1"),
344 | 	},
345 | 	{2, 2, 2},
346 | 	{
347 | 		parseBin("1010"),
348 | 		parseBin("11"),
349 | 		parseBin("11"),
350 | 	},
351 | 	{
352 | 		0x8000000000000000,
353 | 		0x8000000000000000,
354 | 		0x8000000000000000,
355 | 	},
356 | 	{
357 | 		parseBin("1100"),
358 | 		parseBin("101"),
359 | 		parseBin("11"),
360 | 	},
361 | 	{
362 | 		parseBin("1100001111"),
363 | 		parseBin("10011"),
364 | 		parseBin("10011"),
365 | 	},
366 | 	{
367 | 		0x3DA3358B4DC173,
368 | 		0x3DA3358B4DC173,
369 | 		0x3DA3358B4DC173,
370 | 	},
371 | 	{
372 | 		0x3DA3358B4DC173,
373 | 		0x230d2259defd,
374 | 		1,
375 | 	},
376 | 	{
377 | 		0x230d2259defd,
378 | 		0x51b492b3eff2,
379 | 		parseBin("10011"),
380 | 	},
381 | }
382 | 
383 | func TestPolGCD(t *testing.T) {
384 | 	for i, test := range polGCDTests {
385 | 		gcd := test.f1.GCD(test.f2)
386 | 		if test.gcd != gcd {
387 | 			t.Errorf("GCD test %d (%+v) failed: got %v, wanted %v",
388 | 				i, test, gcd, test.gcd)
389 | 		}
390 | 
391 | 		gcd = test.f2.GCD(test.f1)
392 | 		if test.gcd != gcd {
393 | 			t.Errorf("GCD test %d (%+v) failed: got %v, wanted %v",
394 | 				i, test, gcd, test.gcd)
395 | 		}
396 | 	}
397 | }
398 | 
399 | var polMulModTests = []struct {
400 | 	f1  Pol
401 | 	f2  Pol
402 | 	g   Pol
403 | 	mod Pol
404 | }{
405 | 	{
406 | 		0x1230,
407 | 		0x230,
408 | 		0x55,
409 | 		0x22,
410 | 	},
411 | 	{
412 | 		0x0eae8c07dbbb3026,
413 | 		0xd5d6db9de04771de,
414 | 		0xdd2bda3b77c9,
415 | 		0x425ae8595b7a,
416 | 	},
417 | }
418 | 
419 | func TestPolMulMod(t *testing.T) {
420 | 	for i, test := range polMulModTests {
421 | 		mod := test.f1.MulMod(test.f2, test.g)
422 | 		if mod != test.mod {
423 | 			t.Errorf("MulMod test %d (%+v) failed: got %v, wanted %v",
424 | 				i, test, mod, test.mod)
425 | 		}
426 | 	}
427 | }
428 | 


--------------------------------------------------------------------------------
/chunker.go:
--------------------------------------------------------------------------------
  1 | package chunker
  2 | 
  3 | import (
  4 | 	"io"
  5 | 	"sync"
  6 | )
  7 | 
  8 | const (
  9 | 	kiB = 1024
 10 | 	miB = 1024 * kiB
 11 | 
 12 | 	// WindowSize is the size of the sliding window.
 13 | 	windowSize = 64
 14 | 
 15 | 	// MinSize is the default minimal size of a chunk.
 16 | 	MinSize = 512 * kiB
 17 | 	// MaxSize is the default maximal size of a chunk.
 18 | 	MaxSize = 8 * miB
 19 | 
 20 | 	chunkerBufSize = 512 * kiB
 21 | )
 22 | 
 23 | type tables struct {
 24 | 	out [256]Pol
 25 | 	mod [256]Pol
 26 | }
 27 | 
 28 | // cache precomputed tables, these are read-only anyway
 29 | var cache struct {
 30 | 	entries map[Pol]tables
 31 | 	sync.Mutex
 32 | }
 33 | 
 34 | func init() {
 35 | 	cache.entries = make(map[Pol]tables)
 36 | }
 37 | 
 38 | type chunkerState struct {
 39 | 	window [windowSize]byte
 40 | 	wpos   uint
 41 | 	digest uint64
 42 | 
 43 | 	pre   uint // wait for this many bytes before start calculating an new chunk
 44 | 	count uint // used for max chunk size tracking
 45 | }
 46 | 
 47 | type chunkerConfig struct {
 48 | 	MinSize, MaxSize uint
 49 | 
 50 | 	pol               Pol
 51 | 	polShift          uint
 52 | 	tables            tables
 53 | 	tablesInitialized bool
 54 | 	splitmask         uint64
 55 | }
 56 | 
 57 | // Chunker splits content with Rabin Fingerprints.
 58 | type BaseChunker struct {
 59 | 	chunkerConfig
 60 | 	chunkerState
 61 | }
 62 | 
 63 | func NewBase(pol Pol, opts ...baseOption) *BaseChunker {
 64 | 	c := &BaseChunker{
 65 | 		chunkerState: chunkerState{},
 66 | 		chunkerConfig: chunkerConfig{
 67 | 			pol:       pol,
 68 | 			MinSize:   MinSize,
 69 | 			MaxSize:   MaxSize,
 70 | 			splitmask: (1 << 20) - 1, // aim to create chunks of 20 bits or about 1MiB on average.
 71 | 		},
 72 | 	}
 73 | 
 74 | 	for _, opt := range opts {
 75 | 		opt(c)
 76 | 	}
 77 | 
 78 | 	c.reset()
 79 | 	return c
 80 | }
 81 | 
 82 | // Reset reinitializes the chunker with a new reader, polynomial, and options.
 83 | func (c *BaseChunker) Reset(pol Pol, opts ...baseOption) {
 84 | 	*c = *NewBase(pol, opts...)
 85 | }
 86 | 
 87 | func (c *BaseChunker) reset() {
 88 | 	c.polShift = uint(c.pol.Deg() - 8)
 89 | 	c.fillTables()
 90 | 
 91 | 	for i := 0; i < windowSize; i++ {
 92 | 		c.window[i] = 0
 93 | 	}
 94 | 
 95 | 	c.digest = 0
 96 | 	c.wpos = 0
 97 | 	c.count = 0
 98 | 	c.digest = c.slide(c.digest, 1)
 99 | 
100 | 	// do not start a new chunk unless at least MinSize bytes have been read
101 | 	c.pre = c.MinSize - windowSize
102 | }
103 | 
104 | // fillTables calculates out_table and mod_table for optimization. This
105 | // implementation uses a cache in the global variable cache.
106 | func (c *BaseChunker) fillTables() {
107 | 	// if polynomial hasn't been specified, do not compute anything for now
108 | 	if c.pol == 0 {
109 | 		return
110 | 	}
111 | 
112 | 	c.tablesInitialized = true
113 | 
114 | 	// test if the tables are cached for this polynomial
115 | 	cache.Lock()
116 | 	defer cache.Unlock()
117 | 	if t, ok := cache.entries[c.pol]; ok {
118 | 		c.tables = t
119 | 		return
120 | 	}
121 | 
122 | 	// calculate table for sliding out bytes. The byte to slide out is used as
123 | 	// the index for the table, the value contains the following:
124 | 	// out_table[b] = Hash(b || 0 ||        ...        || 0)
125 | 	//                          \ windowsize-1 zero bytes /
126 | 	// To slide out byte b_0 for window size w with known hash
127 | 	// H := H(b_0 || ... || b_w), it is sufficient to add out_table[b_0]:
128 | 	//    H(b_0 || ... || b_w) + H(b_0 || 0 || ... || 0)
129 | 	//  = H(b_0 + b_0 || b_1 + 0 || ... || b_w + 0)
130 | 	//  = H(    0     || b_1 || ...     || b_w)
131 | 	//
132 | 	// Afterwards a new byte can be shifted in.
133 | 	for b := 0; b < 256; b++ {
134 | 		var h Pol
135 | 
136 | 		h = appendByte(h, byte(b), c.pol)
137 | 		for i := 0; i < windowSize-1; i++ {
138 | 			h = appendByte(h, 0, c.pol)
139 | 		}
140 | 		c.tables.out[b] = h
141 | 	}
142 | 
143 | 	// calculate table for reduction mod Polynomial
144 | 	k := c.pol.Deg()
145 | 	for b := 0; b < 256; b++ {
146 | 		// mod_table[b] = A | B, where A = (b(x) * x^k mod pol) and  B = b(x) * x^k
147 | 		//
148 | 		// The 8 bits above deg(Polynomial) determine what happens next and so
149 | 		// these bits are used as a lookup to this table. The value is split in
150 | 		// two parts: Part A contains the result of the modulus operation, part
151 | 		// B is used to cancel out the 8 top bits so that one XOR operation is
152 | 		// enough to reduce modulo Polynomial
153 | 		c.tables.mod[b] = Pol(uint64(b)<<uint(k)).Mod(c.pol) | (Pol(b) << uint(k))
154 | 	}
155 | 
156 | 	cache.entries[c.pol] = c.tables
157 | }
158 | 
159 | // NextSplitPoint returns the index before which the buf should be split
160 | // Returns -1 if no split point was found yet.
161 | func (c *BaseChunker) NextSplitPoint(buf []byte) (int, uint64) {
162 | 	if !c.tablesInitialized {
163 | 		panic("tables for polynomial computation not initialized")
164 | 	}
165 | 
166 | 	tab := &c.tables
167 | 	polShift := c.polShift
168 | 	// go guarantees the expected behavior for bit shifts even for shift counts
169 | 	// larger than the value width. Bounding the value of polShift allows the compiler
170 | 	// to optimize the code for 'digest >> polShift'
171 | 	if polShift > 53-8 {
172 | 		panic("the polynomial must have a degree less than or equal 53")
173 | 	}
174 | 	minSize := c.MinSize
175 | 	maxSize := c.MaxSize
176 | 
177 | 	idx := 0
178 | 	// check if bytes have to be dismissed before starting a new chunk
179 | 	if c.pre > 0 {
180 | 		if c.pre >= uint(len(buf)) {
181 | 			c.pre -= uint(len(buf))
182 | 			c.count += uint(len(buf))
183 | 			return -1, 0
184 | 		}
185 | 
186 | 		buf = buf[c.pre:]
187 | 		idx = int(c.pre)
188 | 		c.count += c.pre
189 | 		c.pre = 0
190 | 	}
191 | 
192 | 	add := c.count
193 | 	digest := c.digest
194 | 	win := c.window
195 | 	wpos := c.wpos
196 | 	for i, b := range buf {
197 | 		// limit wpos to elide array bound checks
198 | 		out := win[wpos%windowSize]
199 | 		win[wpos%windowSize] = b
200 | 		digest ^= uint64(tab.out[out])
201 | 		wpos++
202 | 
203 | 		digest = updateDigest(digest, polShift, tab, b)
204 | 		// end manual inline
205 | 
206 | 		add++
207 | 
208 | 		if (digest&c.splitmask) == 0 || add >= maxSize {
209 | 			if add < minSize {
210 | 				continue
211 | 			}
212 | 			c.reset()
213 | 			return idx + i + 1, digest
214 | 		}
215 | 	}
216 | 	c.digest = digest
217 | 	c.window = win
218 | 	c.wpos = wpos % windowSize
219 | 	c.count += uint(len(buf))
220 | 	return -1, 0
221 | }
222 | 
223 | func updateDigest(digest uint64, polShift uint, tab *tables, b byte) (newDigest uint64) {
224 | 	index := digest >> polShift
225 | 	digest <<= 8
226 | 	digest |= uint64(b)
227 | 
228 | 	digest ^= uint64(tab.mod[index])
229 | 	return digest
230 | }
231 | 
232 | func (c *BaseChunker) slide(digest uint64, b byte) (newDigest uint64) {
233 | 	out := c.window[c.wpos]
234 | 	c.window[c.wpos] = b
235 | 	digest ^= uint64(c.tables.out[out])
236 | 	c.wpos = (c.wpos + 1) % windowSize
237 | 
238 | 	digest = updateDigest(digest, c.polShift, &c.tables, b)
239 | 	return digest
240 | }
241 | 
242 | func appendByte(hash Pol, b byte, pol Pol) Pol {
243 | 	hash <<= 8
244 | 	hash |= Pol(b)
245 | 
246 | 	return hash.Mod(pol)
247 | }
248 | 
249 | // Chunk is one content-dependent chunk of bytes whose end was cut when the
250 | // Rabin Fingerprint had the value stored in Cut.
251 | type Chunk struct {
252 | 	Start  uint
253 | 	Length uint
254 | 	Cut    uint64
255 | 	Data   []byte
256 | }
257 | 
258 | type chunkerBuffer struct {
259 | 	buf  []byte
260 | 	bpos uint
261 | 	bmax uint
262 | 	pos  uint
263 | 
264 | 	rd     io.Reader
265 | 	closed bool
266 | }
267 | 
268 | // Chunker splits content with Rabin Fingerprints.
269 | type Chunker struct {
270 | 	BaseChunker
271 | 	chunkerBuffer
272 | }
273 | 
274 | // New returns a new Chunker based on polynomial p that reads from rd.
275 | // Chunker behavior can be customized by passing options, see With* functions.
276 | func New(rd io.Reader, pol Pol, opts ...option) *Chunker {
277 | 	c := &Chunker{
278 | 		BaseChunker: *NewBase(pol),
279 | 		chunkerBuffer: chunkerBuffer{
280 | 			buf: make([]byte, chunkerBufSize),
281 | 			rd:  rd,
282 | 		},
283 | 	}
284 | 
285 | 	for _, opt := range opts {
286 | 		opt(c)
287 | 	}
288 | 
289 | 	if c.buf == nil {
290 | 		c.buf = make([]byte, chunkerBufSize)
291 | 	}
292 | 
293 | 	c.reset()
294 | 	return c
295 | }
296 | 
297 | // NewWithBoundaries returns a new Chunker based on polynomial p that reads from
298 | // rd and custom min and max size boundaries.
299 | //
300 | // Deprecated: NewWithBoundaries uses should be replaced by New(rd, pol, WithBoundaries(min, max)).
301 | func NewWithBoundaries(rd io.Reader, pol Pol, min, max uint) *Chunker {
302 | 	return New(rd, pol, WithBoundaries(min, max))
303 | }
304 | 
305 | // SetAverageBits allows to control the frequency of chunk discovery:
306 | // the lower averageBits, the higher amount of chunks will be identified.
307 | // The default value is 20 bits, so chunks will be of 1MiB size on average.
308 | //
309 | // Deprecated: SetAverageBits uses should be replaced by NewBase(rd, pol, WithAverageBits(averageBits)).
310 | func (c *Chunker) SetAverageBits(averageBits int) {
311 | 	c.splitmask = (1 << uint64(averageBits)) - 1
312 | }
313 | 
314 | // Reset reinitializes the chunker with a new reader, polynomial, and options.
315 | func (c *Chunker) Reset(rd io.Reader, pol Pol, opts ...option) {
316 | 	opts = append([]option{WithBuffer(c.buf)}, opts...)
317 | 	*c = *New(rd, pol, opts...)
318 | }
319 | 
320 | // Deprecated: ResetWithBoundaries uses should be replaced by Reset(rd, pol, WithBoundaries(min, max)).
321 | func (c *Chunker) ResetWithBoundaries(rd io.Reader, pol Pol, min, max uint) {
322 | 	c.Reset(rd, pol, WithBoundaries(min, max))
323 | }
324 | 
325 | // Next returns the position and length of the next chunk of data. If an error
326 | // occurs while reading, the error is returned. Afterwards, the state of the
327 | // current chunk is undefined. When the last chunk has been returned, all
328 | // subsequent calls yield an io.EOF error.
329 | func (c *Chunker) Next(data []byte) (Chunk, error) {
330 | 	data = data[:0]
331 | 	start := c.pos
332 | 	for {
333 | 		if c.bpos >= c.bmax {
334 | 			n, err := io.ReadFull(c.rd, c.buf)
335 | 
336 | 			if err == io.ErrUnexpectedEOF {
337 | 				err = nil
338 | 			}
339 | 
340 | 			// io.ReadFull only returns io.EOF when no bytes could be read. If
341 | 			// this is the case and we're in this branch, there are no more
342 | 			// bytes to buffer, so this was the last chunk. If a different
343 | 			// error has occurred, return that error and abandon the current
344 | 			// chunk.
345 | 			if err == io.EOF && !c.closed {
346 | 				c.closed = true
347 | 
348 | 				// return current chunk, if any bytes have been processed
349 | 				if len(data) > 0 {
350 | 					return Chunk{
351 | 						Start:  start,
352 | 						Length: uint(len(data)),
353 | 						// somewhat meaningless as this is not a split point
354 | 						Cut:  c.digest,
355 | 						Data: data,
356 | 					}, nil
357 | 				}
358 | 			}
359 | 
360 | 			if err != nil {
361 | 				return Chunk{}, err
362 | 			}
363 | 
364 | 			c.bpos = 0
365 | 			c.bmax = uint(n)
366 | 		}
367 | 
368 | 		split, cut := c.NextSplitPoint(c.buf[c.bpos:c.bmax])
369 | 		if split == -1 {
370 | 			data = append(data, c.buf[c.bpos:c.bmax]...)
371 | 			c.pos += c.bmax - c.bpos
372 | 			c.bpos = c.bmax
373 | 		} else {
374 | 			data = append(data, c.buf[c.bpos:c.bpos+uint(split)]...)
375 | 			c.bpos += uint(split)
376 | 			c.pos += uint(split)
377 | 
378 | 			return Chunk{
379 | 				Start:  start,
380 | 				Length: uint(len(data)),
381 | 				Cut:    cut,
382 | 				Data:   data,
383 | 			}, nil
384 | 		}
385 | 	}
386 | }
387 | 


--------------------------------------------------------------------------------
/chunker_test.go:
--------------------------------------------------------------------------------
  1 | package chunker
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"crypto/sha256"
  6 | 	"encoding/hex"
  7 | 	"io"
  8 | 	"math/rand"
  9 | 	"reflect"
 10 | 	"testing"
 11 | 	"time"
 12 | )
 13 | 
 14 | func parseDigest(s string) []byte {
 15 | 	d, err := hex.DecodeString(s)
 16 | 	if err != nil {
 17 | 		panic(err)
 18 | 	}
 19 | 
 20 | 	return d
 21 | }
 22 | 
 23 | type chunk struct {
 24 | 	Length uint
 25 | 	CutFP  uint64
 26 | 	Digest []byte
 27 | }
 28 | 
 29 | // polynomial used for all the tests below
 30 | const testPol = Pol(0x3DA3358B4DC173)
 31 | 
 32 | // created for 32MB of random data out of math/rand's Uint32() seeded by
 33 | // constant 23
 34 | //
 35 | // chunking configuration:
 36 | // window size 64, avg chunksize 1<<20, min chunksize 1<<19, max chunksize 1<<23
 37 | // polynom 0x3DA3358B4DC173
 38 | var chunks1 = []chunk{
 39 | 	{2163460, 0x000b98d4cdf00000, parseDigest("4b94cb2cf293855ea43bf766731c74969b91aa6bf3c078719aabdd19860d590d")},
 40 | 	{643703, 0x000d4e8364d00000, parseDigest("5727a63c0964f365ab8ed2ccf604912f2ea7be29759a2b53ede4d6841e397407")},
 41 | 	{1528956, 0x0015a25c2ef00000, parseDigest("a73759636a1e7a2758767791c69e81b69fb49236c6929e5d1b654e06e37674ba")},
 42 | 	{1955808, 0x00102a8242e00000, parseDigest("c955fb059409b25f07e5ae09defbbc2aadf117c97a3724e06ad4abd2787e6824")},
 43 | 	{2222372, 0x00045da878000000, parseDigest("6ba5e9f7e1b310722be3627716cf469be941f7f3e39a4c3bcefea492ec31ee56")},
 44 | 	{2538687, 0x00198a8179900000, parseDigest("8687937412f654b5cfe4a82b08f28393a0c040f77c6f95e26742c2fc4254bfde")},
 45 | 	{609606, 0x001d4e8d17100000, parseDigest("5da820742ff5feb3369112938d3095785487456f65a8efc4b96dac4be7ebb259")},
 46 | 	{1205738, 0x000a7204dd600000, parseDigest("cc70d8fad5472beb031b1aca356bcab86c7368f40faa24fe5f8922c6c268c299")},
 47 | 	{959742, 0x00183e71e1400000, parseDigest("4065bdd778f95676c92b38ac265d361f81bff17d76e5d9452cf985a2ea5a4e39")},
 48 | 	{4036109, 0x001fec043c700000, parseDigest("b9cf166e75200eb4993fc9b6e22300a6790c75e6b0fc8f3f29b68a752d42f275")},
 49 | 	{1525894, 0x000b1574b1500000, parseDigest("2f238180e4ca1f7520a05f3d6059233926341090f9236ce677690c1823eccab3")},
 50 | 	{1352720, 0x00018965f2e00000, parseDigest("afd12f13286a3901430de816e62b85cc62468c059295ce5888b76b3af9028d84")},
 51 | 	{811884, 0x00155628aa100000, parseDigest("42d0cdb1ee7c48e552705d18e061abb70ae7957027db8ae8db37ec756472a70a")},
 52 | 	{1282314, 0x001909a0a1400000, parseDigest("819721c2457426eb4f4c7565050c44c32076a56fa9b4515a1c7796441730eb58")},
 53 | 	{1318021, 0x001cceb980000000, parseDigest("842eb53543db55bacac5e25cb91e43cc2e310fe5f9acc1aee86bdf5e91389374")},
 54 | 	{948640, 0x0011f7a470a00000, parseDigest("b8e36bf7019bb96ac3fb7867659d2167d9d3b3148c09fe0de45850b8fe577185")},
 55 | 	{645464, 0x00030ce2d9400000, parseDigest("5584bd27982191c3329f01ed846bfd266e96548dfa87018f745c33cfc240211d")},
 56 | 	{533758, 0x0004435c53c00000, parseDigest("4da778a25b72a9a0d53529eccfe2e5865a789116cb1800f470d8df685a8ab05d")},
 57 | 	{1128303, 0x0000c48517800000, parseDigest("08c6b0b38095b348d80300f0be4c5184d2744a17147c2cba5cc4315abf4c048f")},
 58 | 	{800374, 0x000968473f900000, parseDigest("820284d2c8fd243429674c996d8eb8d3450cbc32421f43113e980f516282c7bf")},
 59 | 	{2453512, 0x001e197c92600000, parseDigest("5fa870ed107c67704258e5e50abe67509fb73562caf77caa843b5f243425d853")},
 60 | 	{2651975, 0x000ae6c868000000, parseDigest("181347d2bbec32bef77ad5e9001e6af80f6abcf3576549384d334ee00c1988d8")},
 61 | 	{237392, 0x0000000000000001, parseDigest("fcd567f5d866357a8e299fd5b2359bb2c8157c30395229c4e9b0a353944a7978")},
 62 | }
 63 | 
 64 | // test if nullbytes are correctly split, even if length is a multiple of MinSize.
 65 | var chunks2 = []chunk{
 66 | 	{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
 67 | 	{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
 68 | 	{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
 69 | 	{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
 70 | }
 71 | 
 72 | // the same as chunks1, but avg chunksize is 1<<19
 73 | var chunks3 = []chunk{
 74 | 	{1491586, 0x00023e586ea80000, parseDigest("4c008237df602048039287427171cef568a6cb965d1b5ca28dc80504a24bb061")},
 75 | 	{671874, 0x000b98d4cdf00000, parseDigest("fa8a42321b90c3d4ce9dd850562b2fd0c0fe4bdd26cf01a24f22046a224225d3")},
 76 | 	{643703, 0x000d4e8364d00000, parseDigest("5727a63c0964f365ab8ed2ccf604912f2ea7be29759a2b53ede4d6841e397407")},
 77 | 	{1284146, 0x0012b527e4780000, parseDigest("16d04cafecbeae9eaedd49da14c7ad7cdc2b1cc8569e5c16c32c9fb045aa899a")},
 78 | 	{823366, 0x000d1d6752180000, parseDigest("48662c118514817825ad4761e8e2e5f28f9bd8281b07e95dcafc6d02e0aa45c3")},
 79 | 	{810134, 0x0016071b6e180000, parseDigest("f629581aa05562f97f2c359890734c8574c5575da32f9289c5ba70bfd05f3f46")},
 80 | 	{567118, 0x00102a8242e00000, parseDigest("d4f0797c56c60d01bac33bfd49957a4816b6c067fc155b026de8a214cab4d70a")},
 81 | 	{821315, 0x001b3e42c8180000, parseDigest("8ebd0fd5db0293bd19140da936eb8b1bbd3cd6ffbec487385b956790014751ca")},
 82 | 	{1401057, 0x00045da878000000, parseDigest("001360af59adf4871ef138cfa2bb49007e86edaf5ac2d6f0b3d3014510991848")},
 83 | 	{2311122, 0x0005cbd885380000, parseDigest("8276d489b566086d9da95dc5c5fe6fc7d72646dd3308ced6b5b6ddb8595f0aa1")},
 84 | 	{608723, 0x001cfcd86f280000, parseDigest("518db33ba6a79d4f3720946f3785c05b9611082586d47ea58390fc2f6de9449e")},
 85 | 	{980456, 0x0013edb7a7f80000, parseDigest("0121b1690738395e15fecba1410cd0bf13fde02225160cad148829f77e7b6c99")},
 86 | 	{1140278, 0x0001f9f017e80000, parseDigest("28ca7c74804b5075d4f5eeb11f0845d99f62e8ea3a42b9a05c7bd5f2fca619dd")},
 87 | 	{2015542, 0x00097bf5d8180000, parseDigest("6fe8291f427d48650a5f0f944305d3a2dbc649bd401d2655fc0bdd42e890ca5a")},
 88 | 	{904752, 0x000e1863eff80000, parseDigest("62af1f1eb3f588d18aff28473303cc4731fc3cafcc52ce818fee3c4c2820854d")},
 89 | 	{713072, 0x001f3bb1b9b80000, parseDigest("4bda9dc2e3031d004d87a5cc93fe5207c4b0843186481b8f31597dc6ffa1496c")},
 90 | 	{675937, 0x001fec043c700000, parseDigest("5299c8c5acec1b90bb020cd75718aab5e12abb9bf66291465fd10e6a823a8b4a")},
 91 | 	{1525894, 0x000b1574b1500000, parseDigest("2f238180e4ca1f7520a05f3d6059233926341090f9236ce677690c1823eccab3")},
 92 | 	{1352720, 0x00018965f2e00000, parseDigest("afd12f13286a3901430de816e62b85cc62468c059295ce5888b76b3af9028d84")},
 93 | 	{811884, 0x00155628aa100000, parseDigest("42d0cdb1ee7c48e552705d18e061abb70ae7957027db8ae8db37ec756472a70a")},
 94 | 	{1282314, 0x001909a0a1400000, parseDigest("819721c2457426eb4f4c7565050c44c32076a56fa9b4515a1c7796441730eb58")},
 95 | 	{1093738, 0x0017f5d048880000, parseDigest("5dddfa7a241b68f65d267744bdb082ee865f3c2f0d8b946ea0ee47868a01bbff")},
 96 | 	{962003, 0x000b921f7ef80000, parseDigest("0cb5c9ebba196b441c715c8d805f6e7143a81cd5b0d2c65c6aacf59ca9124af9")},
 97 | 	{856384, 0x00030ce2d9400000, parseDigest("7734b206d46f3f387e8661e81edf5b1a91ea681867beb5831c18aaa86632d7fb")},
 98 | 	{533758, 0x0004435c53c00000, parseDigest("4da778a25b72a9a0d53529eccfe2e5865a789116cb1800f470d8df685a8ab05d")},
 99 | 	{1128303, 0x0000c48517800000, parseDigest("08c6b0b38095b348d80300f0be4c5184d2744a17147c2cba5cc4315abf4c048f")},
100 | 	{800374, 0x000968473f900000, parseDigest("820284d2c8fd243429674c996d8eb8d3450cbc32421f43113e980f516282c7bf")},
101 | 	{2453512, 0x001e197c92600000, parseDigest("5fa870ed107c67704258e5e50abe67509fb73562caf77caa843b5f243425d853")},
102 | 	{665901, 0x00118c842cb80000, parseDigest("deceec26163842fdef6560311c69bf8a9871a56e16d719e2c4b7e4d668ceb61f")},
103 | 	{1986074, 0x000ae6c868000000, parseDigest("64cd64bf3c3bc389eb20df8310f0427d1c36ab2eaaf09e346bfa7f0453fc1a18")},
104 | 	{237392, 0x0000000000000001, parseDigest("fcd567f5d866357a8e299fd5b2359bb2c8157c30395229c4e9b0a353944a7978")},
105 | }
106 | 
107 | // the same as chunks1, but with boundaries (16*1024*1024, 32*1024*1024)
108 | var chunks4 = []chunk{
109 | 	{17864181, 0x001fec043c700000, parseDigest("4a43d4eccaa3b88514f54d3becddc6ef5e06fbb2de8161b0129888dbae4430a7")},
110 | 	{15690251, 0x0000000000000001, parseDigest("4ca32142da6f9130dc9a5d0df82a3c9e359cc4f1f480f4e52042ec0e8ecd3cc0")},
111 | }
112 | 
113 | func testWithData(t *testing.T, chnker *Chunker, testChunks []chunk, checkDigest bool) []Chunk {
114 | 	chunks := []Chunk{}
115 | 
116 | 	pos := uint(0)
117 | 	for i, chunk := range testChunks {
118 | 		c, err := chnker.Next(nil)
119 | 
120 | 		if err != nil {
121 | 			t.Fatalf("Error returned with chunk %d: %v", i, err)
122 | 		}
123 | 
124 | 		if c.Start != pos {
125 | 			t.Fatalf("Start for chunk %d does not match: expected %d, got %d",
126 | 				i, pos, c.Start)
127 | 		}
128 | 
129 | 		if c.Length != chunk.Length {
130 | 			t.Fatalf("Length for chunk %d does not match: expected %d, got %d",
131 | 				i, chunk.Length, c.Length)
132 | 		}
133 | 
134 | 		if c.Cut != chunk.CutFP {
135 | 			t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x",
136 | 				i, len(chunks)-1, chunk.CutFP, c.Cut)
137 | 		}
138 | 
139 | 		if checkDigest {
140 | 			digest := hashData(c.Data)
141 | 			if !bytes.Equal(chunk.Digest, digest) {
142 | 				t.Fatalf("Digest fingerprint for chunk %d/%d does not match: expected %02x, got %02x",
143 | 					i, len(chunks)-1, chunk.Digest, digest)
144 | 			}
145 | 		}
146 | 
147 | 		pos += c.Length
148 | 		chunks = append(chunks, c)
149 | 	}
150 | 
151 | 	_, err := chnker.Next(nil)
152 | 	if err != io.EOF {
153 | 		t.Fatal("Wrong error returned after last chunk")
154 | 	}
155 | 
156 | 	if len(chunks) != len(testChunks) {
157 | 		t.Fatal("Amounts of test and resulting chunks do not match")
158 | 	}
159 | 
160 | 	return chunks
161 | }
162 | 
163 | func getRandom(seed int64, count int) []byte {
164 | 	buf := make([]byte, count)
165 | 
166 | 	rnd := rand.New(rand.NewSource(seed))
167 | 	for i := 0; i < count; i += 4 {
168 | 		r := rnd.Uint32()
169 | 		buf[i] = byte(r)
170 | 		buf[i+1] = byte(r >> 8)
171 | 		buf[i+2] = byte(r >> 16)
172 | 		buf[i+3] = byte(r >> 24)
173 | 	}
174 | 
175 | 	return buf
176 | }
177 | 
178 | func hashData(d []byte) []byte {
179 | 	h := sha256.New()
180 | 
181 | 	_, err := h.Write(d)
182 | 	if err != nil {
183 | 		panic(err)
184 | 	}
185 | 
186 | 	return h.Sum(nil)
187 | }
188 | 
189 | func TestChunker(t *testing.T) {
190 | 	// setup data source
191 | 	buf := getRandom(23, 32*1024*1024)
192 | 	ch := New(bytes.NewReader(buf), testPol)
193 | 	testWithData(t, ch, chunks1, true)
194 | 
195 | 	// setup nullbyte data source
196 | 	buf = bytes.Repeat([]byte{0}, len(chunks2)*MinSize)
197 | 	ch = New(bytes.NewReader(buf), testPol)
198 | 
199 | 	testWithData(t, ch, chunks2, true)
200 | }
201 | 
202 | func TestChunkerWithCustomAverageBits(t *testing.T) {
203 | 	buf := getRandom(23, 32*1024*1024)
204 | 
205 | 	// sligthly decrease averageBits to get more chunks
206 | 	ch := New(bytes.NewReader(buf), testPol, WithAverageBits(19))
207 | 
208 | 	testWithData(t, ch, chunks3, true)
209 | }
210 | 
211 | func TestChunkerReset(t *testing.T) {
212 | 	buf := getRandom(23, 32*1024*1024)
213 | 	ch := New(bytes.NewReader(buf), testPol)
214 | 	testWithData(t, ch, chunks1, true)
215 | 
216 | 	ch.Reset(bytes.NewReader(buf), testPol)
217 | 	testWithData(t, ch, chunks1, true)
218 | 
219 | 	// test Reset with Options
220 | 	tmpBuf := make([]byte, 1024*1024)
221 | 	ch.Reset(bytes.NewReader(buf), testPol, WithAverageBits(19), WithBuffer(tmpBuf))
222 | 	testWithData(t, ch, chunks3, true)
223 | 	if reflect.DeepEqual(tmpBuf, make([]byte, 1024*1024)) {
224 | 		t.Fatalf("Buffer was not used")
225 | 	}
226 | }
227 | 
228 | // TestChunkerWithOptions tests the chunker with boundaries
229 | func TestChunkerWithOptions(t *testing.T) {
230 | 	buf := getRandom(23, 32*1024*1024)
231 | 
232 | 	// test New with Options
233 | 	tmpBuf := make([]byte, 1024*1024)
234 | 	ch := New(bytes.NewReader(buf), testPol, WithBoundaries(16*1024*1024, 32*1024*1024), WithBuffer(tmpBuf))
235 | 	testWithData(t, ch, chunks4, true)
236 | 	if reflect.DeepEqual(tmpBuf, make([]byte, 1024*1024)) {
237 | 		t.Fatalf("Buffer was not used")
238 | 	}
239 | }
240 | 
241 | func TestChunkerWithRandomPolynomial(t *testing.T) {
242 | 	// setup data source
243 | 	buf := getRandom(23, 32*1024*1024)
244 | 
245 | 	// generate a new random polynomial
246 | 	start := time.Now()
247 | 	p, err := RandomPolynomial()
248 | 	if err != nil {
249 | 		t.Fatal(err)
250 | 	}
251 | 	t.Logf("generating random polynomial took %v", time.Since(start))
252 | 
253 | 	start = time.Now()
254 | 	ch := New(bytes.NewReader(buf), p)
255 | 	t.Logf("creating chunker took %v", time.Since(start))
256 | 
257 | 	// make sure that first chunk is different
258 | 	c, err := ch.Next(nil)
259 | 	if err != nil {
260 | 		t.Fatal(err.Error())
261 | 	}
262 | 
263 | 	if c.Cut == chunks1[0].CutFP {
264 | 		t.Fatal("Cut point is the same")
265 | 	}
266 | 
267 | 	if c.Length == chunks1[0].Length {
268 | 		t.Fatal("Length is the same")
269 | 	}
270 | 
271 | 	if bytes.Equal(hashData(c.Data), chunks1[0].Digest) {
272 | 		t.Fatal("Digest is the same")
273 | 	}
274 | }
275 | 
276 | func TestChunkerWithoutHash(t *testing.T) {
277 | 	// setup data source
278 | 	buf := getRandom(23, 32*1024*1024)
279 | 
280 | 	ch := New(bytes.NewReader(buf), testPol)
281 | 	chunks := testWithData(t, ch, chunks1, false)
282 | 
283 | 	// test reader
284 | 	for i, c := range chunks {
285 | 		if uint(len(c.Data)) != chunks1[i].Length {
286 | 			t.Fatalf("reader returned wrong number of bytes: expected %d, got %d",
287 | 				chunks1[i].Length, len(c.Data))
288 | 		}
289 | 
290 | 		if !bytes.Equal(buf[c.Start:c.Start+c.Length], c.Data) {
291 | 			t.Fatalf("invalid data for chunk returned: expected %02x, got %02x",
292 | 				buf[c.Start:c.Start+c.Length], c.Data)
293 | 		}
294 | 	}
295 | 
296 | 	// setup nullbyte data source
297 | 	buf = bytes.Repeat([]byte{0}, len(chunks2)*MinSize)
298 | 	ch = New(bytes.NewReader(buf), testPol)
299 | 
300 | 	testWithData(t, ch, chunks2, false)
301 | }
302 | 
303 | func benchmarkChunker(b *testing.B, checkDigest bool) {
304 | 	size := 32 * 1024 * 1024
305 | 	rd := bytes.NewReader(getRandom(23, size))
306 | 	ch := New(rd, testPol)
307 | 	buf := make([]byte, MaxSize)
308 | 
309 | 	b.ResetTimer()
310 | 	b.SetBytes(int64(size))
311 | 
312 | 	var chunks int
313 | 	for i := 0; i < b.N; i++ {
314 | 		chunks = 0
315 | 
316 | 		_, err := rd.Seek(0, 0)
317 | 		if err != nil {
318 | 			b.Fatalf("Seek() return error %v", err)
319 | 		}
320 | 
321 | 		ch.Reset(rd, testPol)
322 | 
323 | 		cur := 0
324 | 		for {
325 | 			chunk, err := ch.Next(buf)
326 | 
327 | 			if err == io.EOF {
328 | 				break
329 | 			}
330 | 
331 | 			if err != nil {
332 | 				b.Fatalf("Unexpected error occurred: %v", err)
333 | 			}
334 | 
335 | 			if chunk.Length != chunks1[cur].Length {
336 | 				b.Errorf("wrong chunk length, want %d, got %d",
337 | 					chunks1[cur].Length, chunk.Length)
338 | 			}
339 | 
340 | 			if chunk.Cut != chunks1[cur].CutFP {
341 | 				b.Errorf("wrong cut fingerprint, want 0x%x, got 0x%x",
342 | 					chunks1[cur].CutFP, chunk.Cut)
343 | 			}
344 | 
345 | 			if checkDigest {
346 | 				h := hashData(chunk.Data)
347 | 				if !bytes.Equal(h, chunks1[cur].Digest) {
348 | 					b.Errorf("wrong digest, want %x, got %x",
349 | 						chunks1[cur].Digest, h)
350 | 				}
351 | 			}
352 | 
353 | 			chunks++
354 | 			cur++
355 | 		}
356 | 	}
357 | 
358 | 	b.Logf("%d chunks, average chunk size: %d bytes", chunks, size/chunks)
359 | }
360 | 
361 | func BenchmarkChunkerWithSHA256(b *testing.B) {
362 | 	benchmarkChunker(b, true)
363 | }
364 | 
365 | func BenchmarkChunker(b *testing.B) {
366 | 	benchmarkChunker(b, false)
367 | }
368 | 
369 | func BenchmarkNewChunker(b *testing.B) {
370 | 	p, err := RandomPolynomial()
371 | 	if err != nil {
372 | 		b.Fatal(err)
373 | 	}
374 | 
375 | 	b.ResetTimer()
376 | 
377 | 	for i := 0; i < b.N; i++ {
378 | 		New(bytes.NewBuffer(nil), p)
379 | 	}
380 | }
381 | 


--------------------------------------------------------------------------------