├── .github
└── workflows
│ └── unit-test.yml
├── .gitignore
├── LICENSE
├── README.md
├── go.mod
├── go.sum
├── xrs.go
└── xrs_test.go
/.github/workflows/unit-test.yml:
--------------------------------------------------------------------------------
1 | name: unit-test
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | - release/*
8 | pull_request:
9 | branches:
10 | - master
11 |
12 | jobs:
13 |
14 | test:
15 | name: Test
16 | runs-on: ubuntu-latest
17 | steps:
18 |
19 | - name: Set up Go 1.13
20 | uses: actions/setup-go@v1
21 | with:
22 | go-version: 1.13
23 | id: go
24 |
25 | - name: Check out code into the Go module directory
26 | uses: actions/checkout@v1
27 |
28 | - name: Get dependencies
29 | run: |
30 | go get -v -t -d ./...
31 | if [ -f Gopkg.toml ]; then
32 | curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh
33 | dep ensure
34 | fi
35 | - name: Run test
36 | run: CGO_ENABLED=1 GO111MODULE=on go test -v -race -short
37 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Binaries for programs and plugins
2 | *.exe
3 | *.dll
4 | *.so
5 | *.dylib
6 |
7 | # Test binary, build with `go test -c`
8 | *.test
9 |
10 | # Output of the go coverage tool, specifically when used with LiteIDE
11 | *.out
12 |
13 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
14 | .glide/
15 | /utils/ratetrafficdown
16 | .idea
17 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Temple3x (temple3x@gmail.com)
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # X-Reed-Solomon
2 |
3 | [![GoDoc][1]][2] [![MIT licensed][3]][4] [![Build Status][5]][6] [![Go Report Card][7]][8]
4 |
5 | [1]: https://godoc.org/github.com/templexxx/xrs?status.svg
6 | [2]: https://godoc.org/github.com/templexxx/xrs
7 | [3]: https://img.shields.io/badge/license-MIT-blue.svg
8 | [4]: LICENSE
9 | [5]: https://github.com/templexxx/xrs/workflows/unit-test/badge.svg
10 | [6]: https://github.com/templexxx/xrs
11 | [7]: https://goreportcard.com/badge/github.com/templexxx/xrs
12 | [8]: https://goreportcard.com/report/github.com/templexxx/xrs
13 |
14 | ## Introduction:
15 |
16 | >- Fast and efficient data reconstruction Erasure Code engine in pure Go.
17 | >
18 | >- [Systematic Codes](https://en.wikipedia.org/wiki/Systematic_code) with [MDS property](https://en.wikipedia.org/wiki/Singleton_bound#MDS_codes).
19 | >
20 | >- [More than 10GB/s per physics core.](https://github.com/templexxx/xrs#performance)
21 | >
22 | >- Saving about 30% I/O in reconstruction.
23 | >
24 | >- Has been used for a distributed storage system with more than 10PB data.
25 | >
26 | >- Based on papers:
27 | > 1. [](https://www.cs.cmu.edu/~nihars/publications/Hitchhiker_SIGCOMM14.pdf)
28 | > 2. [](http://www.cs.cmu.edu/~rvinayak/papers/piggybacking_journal_ieee_tit_2017.pdf)
29 |
30 | ## Getting Started
31 |
32 | >- Make sure you have read the papers.
33 | >
34 | >- XRS splits row vector into two equal parts.
35 | >
36 | > e.g. 10+4:
37 | >
38 | +---------+
39 | | a1 | b1 |
40 | +---------+
41 | | a2 | b2 |
42 | +---------+
43 | | a3 | b3 |
44 | +---------+
45 | ...
46 | +---------+
47 | | a10| b10|
48 | +---------+
49 | | a11| b11|
50 | +---------+
51 | | a12| b12|
52 | +---------+
53 | | a13| b13|
54 | +---------+
55 |
56 | >>- So it's important to choose a fit size for reading/write disks efficiently.
57 | >
58 | >- APIs are almost as same as normal Reed-Solomon Erasure Codes.
59 |
60 | ## Performance
61 |
62 | Performance depends mainly on:
63 |
64 | >- CPU instruction extension.
65 | >
66 | >- Number of data/parity row vectors.
67 |
68 | **Platform:**
69 |
70 | *MacBook Pro 15-inch, 2017 (Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz)*
71 |
72 | >All test run on a single Core.
73 | >
74 | >RS means Reed-Solomon Codes(for comparing), the RS lib is [here](https://github.com/templexxx/reedsolomon)
75 |
76 | ### Encode:
77 |
78 | `I/O = (data + parity) * vector_size / cost`
79 |
80 | *Base means no SIMD.*
81 |
82 | | Data | Parity | Vector size | RS I/O (MB/S) | XRS I/O (MB/S) |
83 | |-------|---------|-------------|-------------|---------------|
84 | |12|4|4KB| 12658.00 | 10895.15 |
85 | |12|4|1MB| 8989.67 | 7530.84 |
86 | |12|4|8MB| 8509.06 | 6579.53 |
87 |
88 | ### Reconstruct:
89 |
90 | `Need Data = Data size need read in reconstruction`
91 |
92 | `I/O = (need_data + reconstruct_data_num * vector_size) / cost`
93 |
94 | | Data | Parity | Vector size | Reconstruct Data Num | RS Need Data | XRS Need Data | RS Cost | XRS Cost | RS I/O (MB/S) | XRS I/O (MB/S) |
95 | |-------|---------|-------------|-------------|---------------|---------------|-------------|---------------|-------------|---------------|
96 | |12|4|4KB| 1 | 48KB | 34KB | 2140 ns/op | 3567 ns/op | 24885.17 |10334.99|
97 | |12|4|4KB| 2 | 48KB | 48KB | 3395 ns/op | 5940 ns/op | 16890.41 |9654.17|
98 | |12|4|4KB| 3 | 48KB | 48KB | 4746 ns/op | 7525 ns/op | 12945.61 |8164.76|
99 | |12|4|4KB| 4 | 48KB | 48KB | 5958 ns/op | 8851 ns/op | 10999.75 |7404.41|
100 |
101 | ### Update:
102 |
103 | `I/O = (2 + parity_num + parity_num) * vector_size / cost`
104 |
105 | | Data | Parity | Vector size | RS I/O (MB/S) | XRS I/O (MB/S) |
106 | |-------|---------|-------------|-------------|-------------|
107 | |12|4|4KB| 32739.22 | 26312.14 |
108 |
109 | ### Replace:
110 |
111 | `I/O = (parity_num + parity_num + replace_data_num) * vector_size / cost`
112 |
113 | | Data | Parity | Vector size | Replace Data Num | RS I/O (MB/S) |XRS I/O (MB/S) |
114 | |-------|---------|-------------|-------------|---------------|-------------|
115 | |12|4|4KB| 1 | 63908.06 | 44082.57 |
116 | |12|4|4KB| 2 | 39966.65 | 26554.30 |
117 | |12|4|4KB| 3 | 30007.81 | 19583.16 |
118 | |12|4|4KB| 4 | 25138.38 | 16636.82 |
119 | |12|4|4KB| 5 | 21261.91 | 14301.15 |
120 | |12|4|4KB| 6 | 19833.14 | 13121.98 |
121 | |12|4|4KB| 7 | 18395.47 | 12028.10 |
122 | |12|4|4KB| 8 | 17364.02 | 11300.55 |
123 |
124 | **PS:**
125 |
126 | *And we must know the benchmark test is quite different with encoding/decoding in practice.
127 | Because in benchmark test loops, the CPU Cache may help a lot.*
128 |
129 | ## Links & Deps
130 | * [Reed-Solomon](https://github.com/templexxx/reedsolomon)
131 | * [XOR](https://github.com/templexxx/xorsimd)
132 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/templexxx/xrs
2 |
3 | go 1.13
4 |
5 | require (
6 | github.com/templexxx/reedsolomon v1.1.3
7 | github.com/templexxx/xorsimd v0.1.1
8 | )
9 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/templexxx/cpu v0.0.1 h1:hY4WdLOgKdc8y13EYklu9OUTXik80BkxHoWvTO6MQQY=
2 | github.com/templexxx/cpu v0.0.1/go.mod h1:w7Tb+7qgcAlIyX4NhLuDKt78AHA5SzPmq0Wj6HiEnnk=
3 | github.com/templexxx/reedsolomon v1.1.3 h1:UJZtgOAcp8Ldl9Qp9/8YIPHfW58vsUrPHcJYv15Na50=
4 | github.com/templexxx/reedsolomon v1.1.3/go.mod h1:lCyQlNrc8GTWsFE47kSLkJJvsGL8Lo5pfUDPiJZMm3o=
5 | github.com/templexxx/xorsimd v0.1.1 h1:Y4e8YgMx/4xRJO4G6lq0bSswfDCxbIrGu7KqM2ET524=
6 | github.com/templexxx/xorsimd v0.1.1/go.mod h1:W+ffZz8jJMH2SXwuKu9WhygqBMbFnp14G2fqEr8qaNo=
7 |
--------------------------------------------------------------------------------
/xrs.go:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Temple3x (temple3x@gmail.com)
2 | //
3 | // Use of this source code is governed by the MIT License
4 | // that can be found in the LICENSE file.
5 |
6 | // Package xrs implements Erasure Codes based on
7 | // ,
8 | // split row vectors into two equal size parts:
9 | // e.g. 10+4:
10 | // +---------+
11 | // | a1 | b1 |
12 | // +---------+
13 | // | a2 | b2 |
14 | // +---------+
15 | // | a3 | b3 |
16 | // +---------+
17 | // ...
18 | // +---------+
19 | // | a10| b10|
20 | // +---------+
21 | // | a11| b11|
22 | // +---------+
23 | // | a12| b12|
24 | // +---------+
25 | // | a13| b13|
26 | // +---------+
27 |
28 | package xrs
29 |
30 | import (
31 | "errors"
32 | "fmt"
33 |
34 | rs "github.com/templexxx/reedsolomon"
35 | xor "github.com/templexxx/xorsimd"
36 | )
37 |
38 | // XRS X-Reed-Solomon Codes receiver.
39 | type XRS struct {
40 | // RS is the backend of XRS>
41 | RS *rs.RS
42 | // XORSet shows how XRS combines sub-vectors by xor.
43 | //
44 | // Key: Parity index(except first parity).
45 | // Value: Data indexes.
46 | XORSet map[int][]int
47 | }
48 |
49 | // New create an XRS with specific data & parity numbers.
50 | //
51 | // Warn:
52 | // parityNum can't be 1.
53 | func New(dataNum, parityNum int) (x *XRS, err error) {
54 | if parityNum == 1 {
55 | err = errors.New("illegal parity")
56 | return
57 | }
58 | r, err := rs.New(dataNum, parityNum)
59 | if err != nil {
60 | return
61 | }
62 | xs := make(map[int][]int)
63 | makeXORSet(dataNum, parityNum, xs)
64 | x = &XRS{RS: r, XORSet: xs}
65 | return
66 | }
67 |
68 | // e.g. 10+4:
69 | //
70 | // We will have this xor_set: 11:[0 3 6 9] 12:[1 4 7] 13:[2 5 8],
71 | // which means:
72 | // b11 ⊕ a0 ⊕ a3 ⊕ a6 ⊕ a9 = new_b11
73 | // b12 ⊕ a1 ⊕ a4 ⊕ a7 = new_b12
74 | // b13 ⊕ a2 ⊕ a5 ⊕ a8 = new_b13
75 | func makeXORSet(d, p int, m map[int][]int) {
76 |
77 | // Init map.
78 | for i := d + 1; i < d+p; i++ {
79 | m[i] = make([]int, 0)
80 | }
81 |
82 | // Fill map.
83 | j := d + 1
84 | for i := 0; i < d; i++ {
85 | if j > d+p-1 {
86 | j = d + 1
87 | }
88 | m[j] = append(m[j], i)
89 | j++
90 | }
91 |
92 | // Clean map.
93 | for k, v := range m {
94 | if len(v) == 0 {
95 | delete(m, k)
96 | }
97 | }
98 | }
99 |
100 | // Encode encodes data for generating parity.
101 | // Write parity vectors into vects[r.DataNum:].
102 | func (x *XRS) Encode(vects [][]byte) (err error) {
103 |
104 | err = checkSize(vects[0])
105 | if err != nil {
106 | return
107 | }
108 | size := len(vects[0])
109 |
110 | // Step1: Reed-Solomon encode.
111 | err = x.RS.Encode(vects)
112 | if err != nil {
113 | return
114 | }
115 |
116 | // Step2: XOR by xor_set.
117 | half := size / 2
118 | for bi, xs := range x.XORSet {
119 | xv := make([][]byte, len(xs)+1)
120 | xv[0] = vects[bi][half:]
121 | for j, ai := range xs {
122 | xv[j+1] = vects[ai][:half]
123 | }
124 | xor.Encode(vects[bi][half:], xv)
125 | }
126 | return
127 | }
128 |
129 | func checkSize(vect []byte) error {
130 | size := len(vect)
131 | if size&1 != 0 {
132 | return fmt.Errorf("vect size not even: %d", size)
133 | }
134 | return nil
135 | }
136 |
137 | // GetNeedVects receives needReconst index(it must be data vector)
138 | // returns a_vectors' indexes and b_parity_vectors' indexes for reconstructing needReconst.
139 | // It's used for ReconstOne to read correct vectors for saving I/O.
140 | //
141 | // bNeed always has two elements, the first one is DataNum.
142 | func (x *XRS) GetNeedVects(needReconst int) (aNeed, bNeed []int, err error) {
143 | d := x.RS.DataNum
144 | if needReconst < 0 || needReconst >= d {
145 | err = fmt.Errorf("illegal data index: %d", needReconst)
146 | return
147 | }
148 |
149 | // Find b.
150 | bNeed = make([]int, 2)
151 | bNeed[0] = d // Must has b_vects[d].
152 | xs := x.XORSet
153 | for i, s := range xs {
154 | if isIn(needReconst, s) {
155 | bNeed[1] = i
156 | break
157 | }
158 | }
159 |
160 | // Get a (except needReconst).
161 | for _, i := range xs[bNeed[1]] {
162 | if i != needReconst {
163 | aNeed = append(aNeed, i)
164 | }
165 | }
166 | return
167 | }
168 |
169 | // ReconstOne reconstruct one data vector, it saves I/O.
170 | // Make sure you have some specific vectors data. ( you can get the vectors' indexes from GetNeedVects)
171 | func (x *XRS) ReconstOne(vects [][]byte, needReconst int) (err error) {
172 |
173 | err = checkSize(vects[0])
174 | if err != nil {
175 | return
176 | }
177 |
178 | aNeed, bNeed, err := x.GetNeedVects(needReconst)
179 | if err != nil {
180 | return
181 | }
182 |
183 | // Step1: Reconstruct b_needReconst & rs(bNeed[1]), using original Reed-Solomon Codes.
184 | bVects := make([][]byte, len(vects))
185 | half := len(vects[0]) / 2
186 | for i, v := range vects {
187 | bVects[i] = v[half:]
188 | }
189 |
190 | d := x.RS.DataNum
191 | bDPHas := make([]int, d)
192 | for i := 0; i < d; i++ {
193 | bDPHas[i] = i
194 | }
195 | bDPHas[needReconst] = d // Replace needReconst with DataNum.
196 |
197 | bi := bNeed[1] // B index in XORSet.
198 |
199 | bRS := make([]byte, half)
200 | bVects[bi] = bRS
201 | err = x.RS.Reconst(bVects, bDPHas, []int{needReconst, bi})
202 | if err != nil {
203 | return
204 | }
205 |
206 | // Step2: Reconstruct a_needReconst
207 | // ∵ a_needReconst ⊕ a_need ⊕ bRS = vects[bi]
208 | // ∴ a_needReconst = vects[bi] ⊕ bRS ⊕ a_need
209 | xorV := make([][]byte, len(aNeed)+2)
210 | xorV[0] = vects[bi][half:]
211 | xorV[1] = bRS
212 | for i, ai := range aNeed {
213 | xorV[i+2] = vects[ai][:half]
214 | }
215 | xor.Encode(vects[needReconst][:half], xorV)
216 | return
217 | }
218 |
219 | // Reconst reconstructs missing vectors,
220 | // vects: All vectors, len(vects) = dataNum + parityNum.
221 | // dpHas: Survived data & parity index, need dataNum indexes at least.
222 | // needReconst: Vectors indexes which need to be reconstructed.
223 | //
224 | // Warn:
225 | // If there is only one needReconst, it will call ReconstOne,
226 | // so make sure you have correct data, if there is only one vectors need to repair.
227 | //
228 | // e.g:
229 | // in 3+2, the whole index: [0,1,2,3,4],
230 | // if vects[0,4] are lost & they need to be reconstructed
231 | // (Maybe you only need vects[0], so the needReconst should be [0], but not [0,4]).
232 | // the "dpHas" will be [1,2,3] ,and you must be sure that vects[1] vects[2] vects[3] have correct data,
233 | // results will be written into vects[0]&vects[4] directly.
234 | func (x *XRS) Reconst(vects [][]byte, dpHas, needReconst []int) (err error) {
235 |
236 | if len(needReconst) == 1 && needReconst[0] < x.RS.DataNum {
237 | return x.ReconstOne(vects, needReconst[0])
238 | }
239 |
240 | err = checkSize(vects[0])
241 | if err != nil {
242 | return
243 | }
244 |
245 | // Step1: Reconstruct all a_vectors.
246 | half := len(vects[0]) / 2
247 | aVects := make([][]byte, len(vects))
248 | for i := range vects {
249 | aVects[i] = vects[i][:half]
250 | }
251 | aLost := make([]int, 0)
252 | for i := 0; i < x.RS.DataNum+x.RS.ParityNum; i++ {
253 | if !isIn(i, dpHas) {
254 | aLost = append(aLost, i)
255 | }
256 | }
257 | err = x.RS.Reconst(aVects, dpHas, aLost)
258 | if err != nil {
259 | return
260 | }
261 |
262 | // Step2: Retrieve b_vectors to RS codes(if has).
263 | err = x.retrieveRS(vects, dpHas)
264 | if err != nil {
265 | return
266 | }
267 |
268 | // Step3: Reconstruct b_vectors using RS codes.
269 | bVects := make([][]byte, len(vects))
270 | for i := range vects {
271 | bVects[i] = vects[i][half:]
272 | }
273 | err = x.RS.Reconst(bVects, dpHas, needReconst)
274 | if err != nil {
275 | return
276 | }
277 |
278 | // Step4: XOR b_parity_vectors according to XORSet(if need).
279 | d := x.RS.DataNum
280 | _, pn := rs.SplitNeedReconst(d, needReconst)
281 | if len(pn) != 0 {
282 | if len(pn) == 1 && pn[0] == d {
283 | return nil
284 | }
285 | for _, i := range pn {
286 | if i != d {
287 | xs := x.XORSet[i]
288 | xv := make([][]byte, len(xs)+1)
289 | xv[0] = vects[i][half:]
290 | for j, ai := range xs {
291 | xv[j+1] = vects[ai][:half]
292 | }
293 | xor.Encode(vects[i][half:], xv)
294 | }
295 | }
296 | }
297 |
298 | return nil
299 | }
300 |
301 | // retrieveRS retrieves b_parity_vects(if has) to RS codes
302 | // by XOR itself and a_vects in XORSet.
303 | func (x *XRS) retrieveRS(vects [][]byte, dpHas []int) (err error) {
304 |
305 | half := len(vects[0]) / 2
306 | for _, h := range dpHas {
307 | if h > x.RS.DataNum { // vects[data] is rs_codes
308 | xs := x.XORSet[h]
309 | xv := make([][]byte, len(xs)+1)
310 | xv[0] = vects[h][half:] // put B first
311 | for i, ai := range xs {
312 | xv[i+1] = vects[ai][:half]
313 | }
314 | xor.Encode(vects[h][half:], xv)
315 | }
316 | }
317 | return
318 | }
319 |
320 | // Update updates parity_data when one data_vect changes.
321 | // row: It's the new data's index in the whole vectors.
322 | func (x *XRS) Update(oldData, newData []byte, row int, parity [][]byte) (err error) {
323 |
324 | err = checkSize(oldData)
325 | if err != nil {
326 | return
327 | }
328 |
329 | err = x.RS.Update(oldData, newData, row, parity)
330 | if err != nil {
331 | return
332 | }
333 |
334 | _, bNeed, err := x.GetNeedVects(row)
335 | if err != nil {
336 | return
337 | }
338 | half := len(oldData) / 2
339 | src := make([][]byte, 3)
340 | bv := parity[bNeed[1]-x.RS.DataNum][half:]
341 | src[0], src[1], src[2] = oldData[:half], newData[:half], bv
342 | xor.Encode(bv, src)
343 | return
344 | }
345 |
346 | // Replace replaces oldData vectors with 0 or replaces 0 with newData vectors.
347 | //
348 | // In practice,
349 | // If len(replaceRows) > dataNum-parityNum, it's better to use Encode,
350 | // because Replace need to read len(replaceRows) + parityNum vectors,
351 | // if replaceRows are too many, the cost maybe larger than Encode
352 | // (Encode only need read dataNum).
353 | // Think about an EC compute node, and dataNum+parityNum data nodes model.
354 | //
355 | // It's used in two situations:
356 | // 1. We didn't have enough data for filling in a stripe, but still did ec encode,
357 | // we need replace several zero vectors with new vectors which have data after we get enough data finally.
358 | // 2. After compact, we may have several useless vectors in a stripe,
359 | // we need replaces these useless vectors with zero vectors for free space.
360 | //
361 | // Warn:
362 | // data's index & replaceRows must has the same sort.
363 | func (x *XRS) Replace(data [][]byte, replaceRows []int, parity [][]byte) (err error) {
364 |
365 | err = checkSize(data[0])
366 | if err != nil {
367 | return
368 | }
369 |
370 | err = x.RS.Replace(data, replaceRows, parity)
371 | if err != nil {
372 | return
373 | }
374 |
375 | for i := range replaceRows {
376 | _, bNeed, err2 := x.GetNeedVects(replaceRows[i])
377 | if err2 != nil {
378 | return err2
379 | }
380 |
381 | half := len(data[0]) / 2
382 | bv := parity[bNeed[1]-x.RS.DataNum][half:]
383 | xor.Encode(bv, [][]byte{bv, data[i][:half]})
384 | }
385 |
386 | return
387 | }
388 |
389 | func isIn(e int, s []int) bool {
390 | for _, v := range s {
391 | if e == v {
392 | return true
393 | }
394 | }
395 | return false
396 | }
397 |
--------------------------------------------------------------------------------
/xrs_test.go:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Temple3x (temple3x@gmail.com)
2 | //
3 | // Use of this source code is governed by the MIT License
4 | // that can be found in the LICENSE file.
5 |
6 | package xrs
7 |
8 | import (
9 | "bytes"
10 | "fmt"
11 | "math/rand"
12 | "sort"
13 | "testing"
14 | "time"
15 | )
16 |
17 | const (
18 | kb = 1 << 10
19 | mb = 1 << 20
20 | testDataNum = 12
21 | testParityNum = 4
22 | testSize = 1024
23 | )
24 |
25 | // We need the result to be as same as old one.
26 | func TestMakeXORSet(t *testing.T) {
27 | for d := 1; d <= 255; d++ {
28 | for p := 2; p <= 255; p++ {
29 | if d+p > 256 {
30 | continue
31 | }
32 |
33 | xs1 := make(map[int][]int)
34 | makeXORSet(d, p, xs1)
35 | xs2 := make(map[int][]int)
36 | makeXORSetOld(d, p, xs2)
37 |
38 | if len(xs1) != len(xs2) {
39 | t.Fatal("mismatch map len", d, p, xs1, xs2)
40 | }
41 | for k, v1 := range xs1 {
42 | v2 := xs2[k]
43 | if len(v1) != len(v2) {
44 | t.Fatal("mismatch len")
45 | }
46 | for j, k := range v1 {
47 | if k != v2[j] {
48 | t.Fatal("element mismatch")
49 | }
50 | }
51 | }
52 |
53 | }
54 | }
55 | }
56 |
57 | // makeXORSetOld is the old implementation.
58 | func makeXORSetOld(d, p int, m map[int][]int) {
59 | a := 0
60 | for {
61 | if a == d {
62 | break
63 | }
64 | for i := d + 1; i < d+p; i++ {
65 | if a == d {
66 | break
67 | }
68 | l := m[i]
69 | l = append(l, a)
70 | m[i] = l
71 | a++
72 | }
73 | }
74 |
75 | return
76 | }
77 |
78 | // Powered by MATLAB
79 | func TestXRS_Encode(t *testing.T) {
80 | d, p := 5, 5
81 | x, err := New(d, p)
82 | if err != nil {
83 | t.Fatal(err)
84 | }
85 | vects := [][]byte{{0, 0}, {4, 7}, {2, 4}, {6, 9}, {8, 11},
86 | {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}}
87 | err = x.Encode(vects)
88 | if err != nil {
89 | t.Fatal(err)
90 | }
91 | exp := [][]byte{{0, 0}, {4, 7}, {2, 4}, {6, 9}, {8, 11},
92 | {97, 156}, {173, 117}, {218, 110}, {107, 59}, {110, 153}}
93 |
94 | for i := range exp {
95 | if !bytes.Equal(exp[i], vects[i]) {
96 | t.Fatalf("encode failed: vect %d mismatch", i)
97 | }
98 | }
99 | }
100 |
101 | func TestXRS_GetNeedVects(t *testing.T) {
102 | for d := 1; d <= 255; d++ {
103 | for p := 2; p <= 255; p++ {
104 | if d+p > 256 {
105 | continue
106 | }
107 |
108 | xrs, err := New(d, p)
109 | if err != nil {
110 | t.Fatal(err)
111 | }
112 |
113 | for i := 0; i < d; i++ {
114 | a, b, err := xrs.GetNeedVects(i)
115 | if err != nil {
116 | t.Fatal(err)
117 | }
118 |
119 | a = append(a, i)
120 | expA := xrs.XORSet[b[1]]
121 | if len(a) != len(expA) {
122 | t.Fatal("mismatch len")
123 | }
124 | sort.Ints(a)
125 | for j, k := range a {
126 | if k != expA[j] {
127 | t.Fatal("element mismatch")
128 | }
129 | }
130 | }
131 | }
132 | }
133 | }
134 |
135 | func TestXRS_ReconstOne(t *testing.T) {
136 | testReconstOne(t, testDataNum, testParityNum, 2)
137 | }
138 |
139 | func testReconstOne(t *testing.T, d, p, size int) {
140 | rand.Seed(time.Now().UnixNano())
141 |
142 | for lost := 0; lost < d; lost++ {
143 |
144 | // init expect, result
145 | expect := make([][]byte, d+p)
146 | result := make([][]byte, d+p)
147 | for j := 0; j < d+p; j++ {
148 | expect[j] = make([]byte, size)
149 | result[j] = make([]byte, size)
150 | }
151 | for j := 0; j < d; j++ {
152 | fillRandom(expect[j])
153 | }
154 | x, err := New(d, p)
155 | if err != nil {
156 | t.Fatal(err)
157 | }
158 | err = x.Encode(expect)
159 | if err != nil {
160 | t.Fatal(err)
161 | }
162 | for j := 0; j < d+p; j++ {
163 | copy(result[j], expect[j])
164 | }
165 |
166 | // Clean all data except needed.
167 | // Clean needReconst.
168 | needReconst := lost
169 | result[needReconst] = make([]byte, size)
170 | // Clean A & B.
171 | aVects, bVects := make([][]byte, d+p), make([][]byte, d+p)
172 | half := size / 2
173 | for j := range result {
174 | aVects[j], bVects[j] = result[j][:half], result[j][half:]
175 | }
176 | aNeed, bNeed, err := x.GetNeedVects(needReconst)
177 | if err != nil {
178 | t.Fatal(err)
179 | }
180 | // Clean A.
181 | for j := range result {
182 | if !isIn(j, aNeed) {
183 | aVects[j] = make([]byte, half)
184 | }
185 | }
186 | // Clean B.
187 | bVects[needReconst] = make([]byte, size)
188 | for j := d; j < d+p; j++ {
189 | if !isIn(j, bNeed) {
190 | bVects[j] = make([]byte, half)
191 | }
192 | }
193 |
194 | for j := range result {
195 | copy(result[j][:half], aVects[j])
196 | copy(result[j][half:], bVects[j])
197 |
198 | }
199 | err = x.ReconstOne(result, needReconst)
200 | if err != nil {
201 | t.Fatal(err)
202 | }
203 |
204 | if !bytes.Equal(result[needReconst], expect[needReconst]) {
205 | t.Fatalf("mismatch reconstOne; vect: %d; size: %d", needReconst, size)
206 | }
207 | }
208 | }
209 |
210 | func fillRandom(p []byte) {
211 | rand.Read(p)
212 | }
213 |
214 | func TestXRS_retrieveRS(t *testing.T) {
215 | d, p := testDataNum, testParityNum
216 | x, err := New(d, p)
217 | if err != nil {
218 | t.Fatal(err)
219 | }
220 |
221 | rand.Seed(time.Now().UnixNano())
222 |
223 | vects := make([][]byte, d+p)
224 | results := make([][]byte, d+p)
225 | for i := range vects {
226 | vects[i] = make([]byte, testSize)
227 | results[i] = make([]byte, testSize)
228 | fillRandom(vects[i])
229 | copy(results[i], vects[i])
230 | }
231 |
232 | err = x.retrieveRS(results, rand.Perm(d+p))
233 | if err != nil {
234 | t.Fatal(err)
235 | }
236 | err = x.retrieveRS(results, rand.Perm(d+p))
237 | if err != nil {
238 | t.Fatal(err)
239 | }
240 |
241 | for i := range vects {
242 | if !bytes.Equal(vects[i], results[i]) {
243 | t.Fatalf("mismatch retrieveRS; vect: %d", i)
244 | }
245 | }
246 | }
247 |
248 | func TestXRS_Reconst(t *testing.T) {
249 | testReconst(t, testDataNum, testParityNum, testSize, 128)
250 | }
251 |
252 | func testReconst(t *testing.T, d, p, size, loop int) {
253 |
254 | rand.Seed(time.Now().UnixNano())
255 |
256 | for i := 0; i < loop; i++ {
257 | exp := make([][]byte, d+p)
258 | act := make([][]byte, d+p)
259 | for j := 0; j < d+p; j++ {
260 | exp[j], act[j] = make([]byte, size), make([]byte, size)
261 | }
262 | for j := 0; j < d; j++ {
263 | fillRandom(exp[j])
264 | }
265 |
266 | x, err := New(d, p)
267 | if err != nil {
268 | t.Fatal(err)
269 | }
270 | err = x.Encode(exp)
271 | if err != nil {
272 | t.Fatal(err)
273 | }
274 |
275 | lost := makeLostRandom(d+p, rand.Intn(p+1))
276 | needReconst := lost[:rand.Intn(len(lost)+1)]
277 | if len(needReconst) == 1 {
278 | lost = needReconst // Make sure to have correct data for reconstOne.
279 | }
280 | dpHas := makeHasFromLost(d+p, lost)
281 | for _, h := range dpHas {
282 | copy(act[h], exp[h])
283 | }
284 |
285 | // Try to reconstruct some health vectors.
286 | // Although we want to reconstruct these vectors,
287 | // but it maybe a mistake.
288 | for _, nr := range needReconst {
289 | if rand.Intn(4) == 0 { // 1/4 chance.
290 | copy(act[nr], exp[nr])
291 | }
292 | }
293 |
294 | err = x.Reconst(act, dpHas, needReconst)
295 | if err != nil {
296 | t.Fatal(err)
297 | }
298 |
299 | for _, n := range needReconst {
300 | if !bytes.Equal(exp[n], act[n]) {
301 | t.Fatalf("reconst failed: vect: %d, size: %d", n, size)
302 | }
303 | }
304 | }
305 | }
306 |
307 | func TestXRS_Update(t *testing.T) {
308 | testUpdate(t, testDataNum, testParityNum, testSize)
309 | }
310 |
311 | func testUpdate(t *testing.T, d, p, size int) {
312 |
313 | rand.Seed(time.Now().UnixNano())
314 |
315 | for i := 0; i < d; i++ {
316 | act := make([][]byte, d+p)
317 | exp := make([][]byte, d+p)
318 | for j := 0; j < d+p; j++ {
319 | act[j], exp[j] = make([]byte, size), make([]byte, size)
320 | }
321 | for j := 0; j < d; j++ {
322 | fillRandom(exp[j])
323 | copy(act[j], exp[j])
324 | }
325 |
326 | x, err := New(d, p)
327 | if err != nil {
328 | t.Fatal(err)
329 | }
330 | err = x.Encode(act)
331 | if err != nil {
332 | t.Fatal(err)
333 | }
334 |
335 | newData := make([]byte, size)
336 | fillRandom(newData)
337 | updateRow := i
338 | err = x.Update(act[updateRow], newData, updateRow, act[d:d+p])
339 | if err != nil {
340 | t.Fatal(err)
341 | }
342 |
343 | copy(exp[updateRow], newData)
344 | err = x.Encode(exp)
345 | if err != nil {
346 | t.Fatal(err)
347 | }
348 | for j := d; j < d+p; j++ {
349 | if !bytes.Equal(act[j], exp[j]) {
350 | t.Fatalf("update failed: vect: %d, size: %d", j, size)
351 | }
352 | }
353 | }
354 | }
355 |
356 | func TestXRS_Replace(t *testing.T) {
357 | testReplace(t, testDataNum, testParityNum, testSize, 1024, true)
358 | testReplace(t, testDataNum, testParityNum, testSize, 1024, false)
359 | }
360 |
361 | func testReplace(t *testing.T, d, p, size, loop int, toZero bool) {
362 |
363 | rand.Seed(time.Now().UnixNano())
364 |
365 | for i := 0; i < loop; i++ {
366 | replaceRows := makeReplaceRowRandom(d)
367 | act := make([][]byte, d+p)
368 | exp := make([][]byte, d+p)
369 | for j := 0; j < d+p; j++ {
370 | act[j], exp[j] = make([]byte, size), make([]byte, size)
371 | }
372 | for j := 0; j < d; j++ {
373 | fillRandom(exp[j])
374 | copy(act[j], exp[j])
375 | }
376 |
377 | data := make([][]byte, len(replaceRows))
378 | for i, rr := range replaceRows {
379 | data[i] = make([]byte, size)
380 | copy(data[i], exp[rr])
381 | }
382 |
383 | if toZero {
384 | for _, rr := range replaceRows {
385 | exp[rr] = make([]byte, size)
386 | }
387 | }
388 |
389 | x, err := New(d, p)
390 | if err != nil {
391 | t.Fatal(err)
392 | }
393 | err = x.Encode(exp)
394 | if err != nil {
395 | t.Fatal(err)
396 | }
397 |
398 | if !toZero {
399 | for _, rr := range replaceRows {
400 | act[rr] = make([]byte, size)
401 | }
402 | }
403 | err = x.Encode(act)
404 | if err != nil {
405 | t.Fatal(err)
406 | }
407 |
408 | err = x.Replace(data, replaceRows, act[d:])
409 | if err != nil {
410 | t.Fatal(err)
411 | }
412 |
413 | for j := d; j < d+p; j++ {
414 | if !bytes.Equal(act[j], exp[j]) {
415 | fmt.Println(replaceRows)
416 | t.Fatalf("replace failed: vect: %d, size: %d", j, size)
417 | }
418 | }
419 |
420 | }
421 | }
422 |
423 | func makeReplaceRowRandom(d int) []int {
424 | rand.Seed(time.Now().UnixNano())
425 |
426 | n := rand.Intn(d + 1)
427 | s := make([]int, 0)
428 | c := 0
429 | for i := 0; i < 64; i++ {
430 | if c == n {
431 | break
432 | }
433 | v := rand.Intn(d)
434 | if !isIn(v, s) {
435 | s = append(s, v)
436 | c++
437 | }
438 | }
439 | if c == 0 {
440 | s = []int{0}
441 | }
442 | return s
443 | }
444 |
445 | func makeLostRandom(n, lostN int) []int {
446 | l := make([]int, lostN)
447 | rand.Seed(time.Now().UnixNano())
448 | c := 0
449 | for {
450 | if c == lostN {
451 | break
452 | }
453 | v := rand.Intn(n)
454 | if !isIn(v, l) {
455 | l[c] = v
456 | c++
457 | }
458 | }
459 | return l
460 | }
461 |
462 | func makeHasFromLost(n int, lost []int) []int {
463 | s := make([]int, n-len(lost))
464 | c := 0
465 | for i := 0; i < n; i++ {
466 | if !isIn(i, lost) {
467 | s[c] = i
468 | c++
469 | }
470 | }
471 | return s
472 | }
473 |
474 | func BenchmarkXRS_Encode(b *testing.B) {
475 | dps := [][]int{
476 | []int{12, 4},
477 | }
478 |
479 | sizes := []int{
480 | 4 * kb,
481 | mb,
482 | 8 * mb,
483 | }
484 |
485 | b.Run("", benchmarkEncode(benchEnc, dps, sizes))
486 | }
487 |
488 | func benchmarkEncode(f func(*testing.B, int, int, int), dps [][]int, sizes []int) func(*testing.B) {
489 | return func(b *testing.B) {
490 | for _, dp := range dps {
491 | d, p := dp[0], dp[1]
492 | for _, size := range sizes {
493 | b.Run(fmt.Sprintf("(%d+%d)-%s", d, p, byteToStr(size)), func(b *testing.B) {
494 | f(b, d, p, size)
495 | })
496 | }
497 | }
498 | }
499 | }
500 |
501 | func benchEnc(b *testing.B, d, p, size int) {
502 |
503 | vects := make([][]byte, d+p)
504 | for j := 0; j < d+p; j++ {
505 | vects[j] = make([]byte, size)
506 | }
507 | for j := 0; j < d; j++ {
508 | fillRandom(vects[j])
509 | }
510 | x, err := New(d, p)
511 | if err != nil {
512 | b.Fatal(err)
513 | }
514 |
515 | b.SetBytes(int64((d + p) * size))
516 | b.ResetTimer()
517 | for i := 0; i < b.N; i++ {
518 | err = x.Encode(vects)
519 | if err != nil {
520 | b.Fatal(err)
521 | }
522 | }
523 | }
524 |
525 | func BenchmarkXRS_Reconst(b *testing.B) {
526 | d, p := 12, 4
527 | size := 4 * kb
528 |
529 | b.Run("", benchmarkReconst(benchReconst, d, p, size))
530 | }
531 |
532 | func benchmarkReconst(f func(*testing.B, int, int, int, []int, []int), d, p, size int) func(*testing.B) {
533 |
534 | datas := make([]int, d)
535 | for i := range datas {
536 | datas[i] = i
537 | }
538 | return func(b *testing.B) {
539 | for i := 1; i <= p; i++ {
540 | lost := datas[:i]
541 | dpHas := makeHasFromLost(d+p, lost)
542 | b.Run(fmt.Sprintf("(%d+%d)-%s-reconst_%d_data_vects",
543 | d, p, byteToStr(size), i),
544 | func(b *testing.B) { f(b, d, p, size, dpHas, lost) })
545 | }
546 | }
547 | }
548 |
549 | func benchReconst(b *testing.B, d, p, size int, dpHas, needReconst []int) {
550 | vects := make([][]byte, d+p)
551 | for j := 0; j < d+p; j++ {
552 | vects[j] = make([]byte, size)
553 | }
554 | for j := 0; j < d; j++ {
555 | fillRandom(vects[j])
556 | }
557 | x, err := New(d, p)
558 | if err != nil {
559 | b.Fatal(err)
560 | }
561 | err = x.Encode(vects)
562 | if err != nil {
563 | b.Fatal(err)
564 | }
565 |
566 | bs := (d + len(needReconst)) * size
567 | if len(needReconst) == 1 {
568 | aNeed, _, err := x.GetNeedVects(needReconst[0])
569 | if err != nil {
570 | b.Fatal(err)
571 | }
572 | bs = (d-1+2+len(aNeed))*size/2 + size
573 | }
574 |
575 | b.SetBytes(int64(bs))
576 | b.ResetTimer()
577 | for i := 0; i < b.N; i++ {
578 | err = x.Reconst(vects, dpHas, needReconst)
579 | if err != nil {
580 | b.Fatal(err)
581 | }
582 | }
583 | }
584 |
585 | func BenchmarkXRS_Update(b *testing.B) {
586 | d, p := 12, 4
587 | size := 4 * kb
588 |
589 | b.Run("", benchmarkUpdate(benchUpdate, d, p, size))
590 | }
591 |
592 | func benchmarkUpdate(f func(*testing.B, int, int, int, int), d, p, size int) func(*testing.B) {
593 |
594 | return func(b *testing.B) {
595 | updateRow := rand.Intn(d)
596 | b.Run(fmt.Sprintf("(%d+%d)-%s",
597 | d, p, byteToStr(size)),
598 | func(b *testing.B) { f(b, d, p, size, updateRow) })
599 | }
600 | }
601 |
602 | func benchUpdate(b *testing.B, d, p, size, updateRow int) {
603 | vects := make([][]byte, d+p)
604 | for j := 0; j < d+p; j++ {
605 | vects[j] = make([]byte, size)
606 | }
607 | for j := 0; j < d; j++ {
608 | fillRandom(vects[j])
609 | }
610 | x, err := New(d, p)
611 | if err != nil {
612 | b.Fatal(err)
613 | }
614 | err = x.Encode(vects)
615 | if err != nil {
616 | b.Fatal(err)
617 | }
618 |
619 | newData := make([]byte, size)
620 | fillRandom(newData)
621 |
622 | b.SetBytes(int64((p + 2 + p) * size))
623 | b.ResetTimer()
624 | for i := 0; i < b.N; i++ {
625 | err = x.Update(vects[updateRow], newData, updateRow, vects[d:])
626 | if err != nil {
627 | b.Fatal(err)
628 | }
629 | }
630 | }
631 |
632 | func BenchmarkXRS_Replace(b *testing.B) {
633 | d, p := 12, 4
634 | size := 4 * kb
635 |
636 | b.Run("", benchmarkReplace(benchReplace, d, p, size))
637 | }
638 |
639 | func benchmarkReplace(f func(*testing.B, int, int, int, int), d, p, size int) func(*testing.B) {
640 |
641 | return func(b *testing.B) {
642 | for i := 1; i <= d-p; i++ {
643 | b.Run(fmt.Sprintf("(%d+%d)-%s-replace_%d_data_vects",
644 | d, p, byteToStr(size), i),
645 | func(b *testing.B) { f(b, d, p, size, i) })
646 | }
647 | }
648 | }
649 |
650 | func benchReplace(b *testing.B, d, p, size, n int) {
651 | vects := make([][]byte, d+p)
652 | for j := 0; j < d+p; j++ {
653 | vects[j] = make([]byte, size)
654 | }
655 | for j := 0; j < d; j++ {
656 | fillRandom(vects[j])
657 | }
658 | x, err := New(d, p)
659 | if err != nil {
660 | b.Fatal(err)
661 | }
662 | err = x.Encode(vects)
663 | if err != nil {
664 | b.Fatal(err)
665 | }
666 |
667 | updateRows := make([]int, n)
668 | for i := range updateRows {
669 | updateRows[i] = i
670 | }
671 | b.SetBytes(int64((n + p + p) * size))
672 | b.ResetTimer()
673 | for i := 0; i < b.N; i++ {
674 | err = x.Replace(vects[:n], updateRows, vects[d:])
675 | if err != nil {
676 | b.Fatal(err)
677 | }
678 | }
679 | }
680 |
681 | func byteToStr(n int) string {
682 | if n >= mb {
683 | return fmt.Sprintf("%dMB", n/mb)
684 | }
685 |
686 | return fmt.Sprintf("%dKB", n/kb)
687 | }
688 |
--------------------------------------------------------------------------------