├── .github └── workflows │ └── unit-test.yml ├── .gitignore ├── LICENSE ├── README.md ├── go.mod ├── go.sum ├── xrs.go └── xrs_test.go /.github/workflows/unit-test.yml: -------------------------------------------------------------------------------- 1 | name: unit-test 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - release/* 8 | pull_request: 9 | branches: 10 | - master 11 | 12 | jobs: 13 | 14 | test: 15 | name: Test 16 | runs-on: ubuntu-latest 17 | steps: 18 | 19 | - name: Set up Go 1.13 20 | uses: actions/setup-go@v1 21 | with: 22 | go-version: 1.13 23 | id: go 24 | 25 | - name: Check out code into the Go module directory 26 | uses: actions/checkout@v1 27 | 28 | - name: Get dependencies 29 | run: | 30 | go get -v -t -d ./... 31 | if [ -f Gopkg.toml ]; then 32 | curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh 33 | dep ensure 34 | fi 35 | - name: Run test 36 | run: CGO_ENABLED=1 GO111MODULE=on go test -v -race -short 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.dll 4 | *.so 5 | *.dylib 6 | 7 | # Test binary, build with `go test -c` 8 | *.test 9 | 10 | # Output of the go coverage tool, specifically when used with LiteIDE 11 | *.out 12 | 13 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 14 | .glide/ 15 | /utils/ratetrafficdown 16 | .idea 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Temple3x (temple3x@gmail.com) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # X-Reed-Solomon 2 | 3 | [![GoDoc][1]][2] [![MIT licensed][3]][4] [![Build Status][5]][6] [![Go Report Card][7]][8] 4 | 5 | [1]: https://godoc.org/github.com/templexxx/xrs?status.svg 6 | [2]: https://godoc.org/github.com/templexxx/xrs 7 | [3]: https://img.shields.io/badge/license-MIT-blue.svg 8 | [4]: LICENSE 9 | [5]: https://github.com/templexxx/xrs/workflows/unit-test/badge.svg 10 | [6]: https://github.com/templexxx/xrs 11 | [7]: https://goreportcard.com/badge/github.com/templexxx/xrs 12 | [8]: https://goreportcard.com/report/github.com/templexxx/xrs 13 | 14 | ## Introduction: 15 | 16 | >- Fast and efficient data reconstruction Erasure Code engine in pure Go. 17 | > 18 | >- [Systematic Codes](https://en.wikipedia.org/wiki/Systematic_code) with [MDS property](https://en.wikipedia.org/wiki/Singleton_bound#MDS_codes). 19 | > 20 | >- [More than 10GB/s per physics core.](https://github.com/templexxx/xrs#performance) 21 | > 22 | >- Saving about 30% I/O in reconstruction. 23 | > 24 | >- Has been used for a distributed storage system with more than 10PB data. 25 | > 26 | >- Based on papers: 27 | > 1. [](https://www.cs.cmu.edu/~nihars/publications/Hitchhiker_SIGCOMM14.pdf) 28 | > 2. [](http://www.cs.cmu.edu/~rvinayak/papers/piggybacking_journal_ieee_tit_2017.pdf) 29 | 30 | ## Getting Started 31 | 32 | >- Make sure you have read the papers. 33 | > 34 | >- XRS splits row vector into two equal parts. 35 | > 36 | > e.g. 10+4: 37 | > 38 | +---------+ 39 | | a1 | b1 | 40 | +---------+ 41 | | a2 | b2 | 42 | +---------+ 43 | | a3 | b3 | 44 | +---------+ 45 | ... 46 | +---------+ 47 | | a10| b10| 48 | +---------+ 49 | | a11| b11| 50 | +---------+ 51 | | a12| b12| 52 | +---------+ 53 | | a13| b13| 54 | +---------+ 55 | 56 | >>- So it's important to choose a fit size for reading/write disks efficiently. 57 | > 58 | >- APIs are almost as same as normal Reed-Solomon Erasure Codes. 59 | 60 | ## Performance 61 | 62 | Performance depends mainly on: 63 | 64 | >- CPU instruction extension. 65 | > 66 | >- Number of data/parity row vectors. 67 | 68 | **Platform:** 69 | 70 | *MacBook Pro 15-inch, 2017 (Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz)* 71 | 72 | >All test run on a single Core. 73 | > 74 | >RS means Reed-Solomon Codes(for comparing), the RS lib is [here](https://github.com/templexxx/reedsolomon) 75 | 76 | ### Encode: 77 | 78 | `I/O = (data + parity) * vector_size / cost` 79 | 80 | *Base means no SIMD.* 81 | 82 | | Data | Parity | Vector size | RS I/O (MB/S) | XRS I/O (MB/S) | 83 | |-------|---------|-------------|-------------|---------------| 84 | |12|4|4KB| 12658.00 | 10895.15 | 85 | |12|4|1MB| 8989.67 | 7530.84 | 86 | |12|4|8MB| 8509.06 | 6579.53 | 87 | 88 | ### Reconstruct: 89 | 90 | `Need Data = Data size need read in reconstruction` 91 | 92 | `I/O = (need_data + reconstruct_data_num * vector_size) / cost` 93 | 94 | | Data | Parity | Vector size | Reconstruct Data Num | RS Need Data | XRS Need Data | RS Cost | XRS Cost | RS I/O (MB/S) | XRS I/O (MB/S) | 95 | |-------|---------|-------------|-------------|---------------|---------------|-------------|---------------|-------------|---------------| 96 | |12|4|4KB| 1 | 48KB | 34KB | 2140 ns/op | 3567 ns/op | 24885.17 |10334.99| 97 | |12|4|4KB| 2 | 48KB | 48KB | 3395 ns/op | 5940 ns/op | 16890.41 |9654.17| 98 | |12|4|4KB| 3 | 48KB | 48KB | 4746 ns/op | 7525 ns/op | 12945.61 |8164.76| 99 | |12|4|4KB| 4 | 48KB | 48KB | 5958 ns/op | 8851 ns/op | 10999.75 |7404.41| 100 | 101 | ### Update: 102 | 103 | `I/O = (2 + parity_num + parity_num) * vector_size / cost` 104 | 105 | | Data | Parity | Vector size | RS I/O (MB/S) | XRS I/O (MB/S) | 106 | |-------|---------|-------------|-------------|-------------| 107 | |12|4|4KB| 32739.22 | 26312.14 | 108 | 109 | ### Replace: 110 | 111 | `I/O = (parity_num + parity_num + replace_data_num) * vector_size / cost` 112 | 113 | | Data | Parity | Vector size | Replace Data Num | RS I/O (MB/S) |XRS I/O (MB/S) | 114 | |-------|---------|-------------|-------------|---------------|-------------| 115 | |12|4|4KB| 1 | 63908.06 | 44082.57 | 116 | |12|4|4KB| 2 | 39966.65 | 26554.30 | 117 | |12|4|4KB| 3 | 30007.81 | 19583.16 | 118 | |12|4|4KB| 4 | 25138.38 | 16636.82 | 119 | |12|4|4KB| 5 | 21261.91 | 14301.15 | 120 | |12|4|4KB| 6 | 19833.14 | 13121.98 | 121 | |12|4|4KB| 7 | 18395.47 | 12028.10 | 122 | |12|4|4KB| 8 | 17364.02 | 11300.55 | 123 | 124 | **PS:** 125 | 126 | *And we must know the benchmark test is quite different with encoding/decoding in practice. 127 | Because in benchmark test loops, the CPU Cache may help a lot.* 128 | 129 | ## Links & Deps 130 | * [Reed-Solomon](https://github.com/templexxx/reedsolomon) 131 | * [XOR](https://github.com/templexxx/xorsimd) 132 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/templexxx/xrs 2 | 3 | go 1.13 4 | 5 | require ( 6 | github.com/templexxx/reedsolomon v1.1.3 7 | github.com/templexxx/xorsimd v0.1.1 8 | ) 9 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/templexxx/cpu v0.0.1 h1:hY4WdLOgKdc8y13EYklu9OUTXik80BkxHoWvTO6MQQY= 2 | github.com/templexxx/cpu v0.0.1/go.mod h1:w7Tb+7qgcAlIyX4NhLuDKt78AHA5SzPmq0Wj6HiEnnk= 3 | github.com/templexxx/reedsolomon v1.1.3 h1:UJZtgOAcp8Ldl9Qp9/8YIPHfW58vsUrPHcJYv15Na50= 4 | github.com/templexxx/reedsolomon v1.1.3/go.mod h1:lCyQlNrc8GTWsFE47kSLkJJvsGL8Lo5pfUDPiJZMm3o= 5 | github.com/templexxx/xorsimd v0.1.1 h1:Y4e8YgMx/4xRJO4G6lq0bSswfDCxbIrGu7KqM2ET524= 6 | github.com/templexxx/xorsimd v0.1.1/go.mod h1:W+ffZz8jJMH2SXwuKu9WhygqBMbFnp14G2fqEr8qaNo= 7 | -------------------------------------------------------------------------------- /xrs.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2017 Temple3x (temple3x@gmail.com) 2 | // 3 | // Use of this source code is governed by the MIT License 4 | // that can be found in the LICENSE file. 5 | 6 | // Package xrs implements Erasure Codes based on 7 | // , 8 | // split row vectors into two equal size parts: 9 | // e.g. 10+4: 10 | // +---------+ 11 | // | a1 | b1 | 12 | // +---------+ 13 | // | a2 | b2 | 14 | // +---------+ 15 | // | a3 | b3 | 16 | // +---------+ 17 | // ... 18 | // +---------+ 19 | // | a10| b10| 20 | // +---------+ 21 | // | a11| b11| 22 | // +---------+ 23 | // | a12| b12| 24 | // +---------+ 25 | // | a13| b13| 26 | // +---------+ 27 | 28 | package xrs 29 | 30 | import ( 31 | "errors" 32 | "fmt" 33 | 34 | rs "github.com/templexxx/reedsolomon" 35 | xor "github.com/templexxx/xorsimd" 36 | ) 37 | 38 | // XRS X-Reed-Solomon Codes receiver. 39 | type XRS struct { 40 | // RS is the backend of XRS> 41 | RS *rs.RS 42 | // XORSet shows how XRS combines sub-vectors by xor. 43 | // 44 | // Key: Parity index(except first parity). 45 | // Value: Data indexes. 46 | XORSet map[int][]int 47 | } 48 | 49 | // New create an XRS with specific data & parity numbers. 50 | // 51 | // Warn: 52 | // parityNum can't be 1. 53 | func New(dataNum, parityNum int) (x *XRS, err error) { 54 | if parityNum == 1 { 55 | err = errors.New("illegal parity") 56 | return 57 | } 58 | r, err := rs.New(dataNum, parityNum) 59 | if err != nil { 60 | return 61 | } 62 | xs := make(map[int][]int) 63 | makeXORSet(dataNum, parityNum, xs) 64 | x = &XRS{RS: r, XORSet: xs} 65 | return 66 | } 67 | 68 | // e.g. 10+4: 69 | // 70 | // We will have this xor_set: 11:[0 3 6 9] 12:[1 4 7] 13:[2 5 8], 71 | // which means: 72 | // b11 ⊕ a0 ⊕ a3 ⊕ a6 ⊕ a9 = new_b11 73 | // b12 ⊕ a1 ⊕ a4 ⊕ a7 = new_b12 74 | // b13 ⊕ a2 ⊕ a5 ⊕ a8 = new_b13 75 | func makeXORSet(d, p int, m map[int][]int) { 76 | 77 | // Init map. 78 | for i := d + 1; i < d+p; i++ { 79 | m[i] = make([]int, 0) 80 | } 81 | 82 | // Fill map. 83 | j := d + 1 84 | for i := 0; i < d; i++ { 85 | if j > d+p-1 { 86 | j = d + 1 87 | } 88 | m[j] = append(m[j], i) 89 | j++ 90 | } 91 | 92 | // Clean map. 93 | for k, v := range m { 94 | if len(v) == 0 { 95 | delete(m, k) 96 | } 97 | } 98 | } 99 | 100 | // Encode encodes data for generating parity. 101 | // Write parity vectors into vects[r.DataNum:]. 102 | func (x *XRS) Encode(vects [][]byte) (err error) { 103 | 104 | err = checkSize(vects[0]) 105 | if err != nil { 106 | return 107 | } 108 | size := len(vects[0]) 109 | 110 | // Step1: Reed-Solomon encode. 111 | err = x.RS.Encode(vects) 112 | if err != nil { 113 | return 114 | } 115 | 116 | // Step2: XOR by xor_set. 117 | half := size / 2 118 | for bi, xs := range x.XORSet { 119 | xv := make([][]byte, len(xs)+1) 120 | xv[0] = vects[bi][half:] 121 | for j, ai := range xs { 122 | xv[j+1] = vects[ai][:half] 123 | } 124 | xor.Encode(vects[bi][half:], xv) 125 | } 126 | return 127 | } 128 | 129 | func checkSize(vect []byte) error { 130 | size := len(vect) 131 | if size&1 != 0 { 132 | return fmt.Errorf("vect size not even: %d", size) 133 | } 134 | return nil 135 | } 136 | 137 | // GetNeedVects receives needReconst index(it must be data vector) 138 | // returns a_vectors' indexes and b_parity_vectors' indexes for reconstructing needReconst. 139 | // It's used for ReconstOne to read correct vectors for saving I/O. 140 | // 141 | // bNeed always has two elements, the first one is DataNum. 142 | func (x *XRS) GetNeedVects(needReconst int) (aNeed, bNeed []int, err error) { 143 | d := x.RS.DataNum 144 | if needReconst < 0 || needReconst >= d { 145 | err = fmt.Errorf("illegal data index: %d", needReconst) 146 | return 147 | } 148 | 149 | // Find b. 150 | bNeed = make([]int, 2) 151 | bNeed[0] = d // Must has b_vects[d]. 152 | xs := x.XORSet 153 | for i, s := range xs { 154 | if isIn(needReconst, s) { 155 | bNeed[1] = i 156 | break 157 | } 158 | } 159 | 160 | // Get a (except needReconst). 161 | for _, i := range xs[bNeed[1]] { 162 | if i != needReconst { 163 | aNeed = append(aNeed, i) 164 | } 165 | } 166 | return 167 | } 168 | 169 | // ReconstOne reconstruct one data vector, it saves I/O. 170 | // Make sure you have some specific vectors data. ( you can get the vectors' indexes from GetNeedVects) 171 | func (x *XRS) ReconstOne(vects [][]byte, needReconst int) (err error) { 172 | 173 | err = checkSize(vects[0]) 174 | if err != nil { 175 | return 176 | } 177 | 178 | aNeed, bNeed, err := x.GetNeedVects(needReconst) 179 | if err != nil { 180 | return 181 | } 182 | 183 | // Step1: Reconstruct b_needReconst & rs(bNeed[1]), using original Reed-Solomon Codes. 184 | bVects := make([][]byte, len(vects)) 185 | half := len(vects[0]) / 2 186 | for i, v := range vects { 187 | bVects[i] = v[half:] 188 | } 189 | 190 | d := x.RS.DataNum 191 | bDPHas := make([]int, d) 192 | for i := 0; i < d; i++ { 193 | bDPHas[i] = i 194 | } 195 | bDPHas[needReconst] = d // Replace needReconst with DataNum. 196 | 197 | bi := bNeed[1] // B index in XORSet. 198 | 199 | bRS := make([]byte, half) 200 | bVects[bi] = bRS 201 | err = x.RS.Reconst(bVects, bDPHas, []int{needReconst, bi}) 202 | if err != nil { 203 | return 204 | } 205 | 206 | // Step2: Reconstruct a_needReconst 207 | // ∵ a_needReconst ⊕ a_need ⊕ bRS = vects[bi] 208 | // ∴ a_needReconst = vects[bi] ⊕ bRS ⊕ a_need 209 | xorV := make([][]byte, len(aNeed)+2) 210 | xorV[0] = vects[bi][half:] 211 | xorV[1] = bRS 212 | for i, ai := range aNeed { 213 | xorV[i+2] = vects[ai][:half] 214 | } 215 | xor.Encode(vects[needReconst][:half], xorV) 216 | return 217 | } 218 | 219 | // Reconst reconstructs missing vectors, 220 | // vects: All vectors, len(vects) = dataNum + parityNum. 221 | // dpHas: Survived data & parity index, need dataNum indexes at least. 222 | // needReconst: Vectors indexes which need to be reconstructed. 223 | // 224 | // Warn: 225 | // If there is only one needReconst, it will call ReconstOne, 226 | // so make sure you have correct data, if there is only one vectors need to repair. 227 | // 228 | // e.g: 229 | // in 3+2, the whole index: [0,1,2,3,4], 230 | // if vects[0,4] are lost & they need to be reconstructed 231 | // (Maybe you only need vects[0], so the needReconst should be [0], but not [0,4]). 232 | // the "dpHas" will be [1,2,3] ,and you must be sure that vects[1] vects[2] vects[3] have correct data, 233 | // results will be written into vects[0]&vects[4] directly. 234 | func (x *XRS) Reconst(vects [][]byte, dpHas, needReconst []int) (err error) { 235 | 236 | if len(needReconst) == 1 && needReconst[0] < x.RS.DataNum { 237 | return x.ReconstOne(vects, needReconst[0]) 238 | } 239 | 240 | err = checkSize(vects[0]) 241 | if err != nil { 242 | return 243 | } 244 | 245 | // Step1: Reconstruct all a_vectors. 246 | half := len(vects[0]) / 2 247 | aVects := make([][]byte, len(vects)) 248 | for i := range vects { 249 | aVects[i] = vects[i][:half] 250 | } 251 | aLost := make([]int, 0) 252 | for i := 0; i < x.RS.DataNum+x.RS.ParityNum; i++ { 253 | if !isIn(i, dpHas) { 254 | aLost = append(aLost, i) 255 | } 256 | } 257 | err = x.RS.Reconst(aVects, dpHas, aLost) 258 | if err != nil { 259 | return 260 | } 261 | 262 | // Step2: Retrieve b_vectors to RS codes(if has). 263 | err = x.retrieveRS(vects, dpHas) 264 | if err != nil { 265 | return 266 | } 267 | 268 | // Step3: Reconstruct b_vectors using RS codes. 269 | bVects := make([][]byte, len(vects)) 270 | for i := range vects { 271 | bVects[i] = vects[i][half:] 272 | } 273 | err = x.RS.Reconst(bVects, dpHas, needReconst) 274 | if err != nil { 275 | return 276 | } 277 | 278 | // Step4: XOR b_parity_vectors according to XORSet(if need). 279 | d := x.RS.DataNum 280 | _, pn := rs.SplitNeedReconst(d, needReconst) 281 | if len(pn) != 0 { 282 | if len(pn) == 1 && pn[0] == d { 283 | return nil 284 | } 285 | for _, i := range pn { 286 | if i != d { 287 | xs := x.XORSet[i] 288 | xv := make([][]byte, len(xs)+1) 289 | xv[0] = vects[i][half:] 290 | for j, ai := range xs { 291 | xv[j+1] = vects[ai][:half] 292 | } 293 | xor.Encode(vects[i][half:], xv) 294 | } 295 | } 296 | } 297 | 298 | return nil 299 | } 300 | 301 | // retrieveRS retrieves b_parity_vects(if has) to RS codes 302 | // by XOR itself and a_vects in XORSet. 303 | func (x *XRS) retrieveRS(vects [][]byte, dpHas []int) (err error) { 304 | 305 | half := len(vects[0]) / 2 306 | for _, h := range dpHas { 307 | if h > x.RS.DataNum { // vects[data] is rs_codes 308 | xs := x.XORSet[h] 309 | xv := make([][]byte, len(xs)+1) 310 | xv[0] = vects[h][half:] // put B first 311 | for i, ai := range xs { 312 | xv[i+1] = vects[ai][:half] 313 | } 314 | xor.Encode(vects[h][half:], xv) 315 | } 316 | } 317 | return 318 | } 319 | 320 | // Update updates parity_data when one data_vect changes. 321 | // row: It's the new data's index in the whole vectors. 322 | func (x *XRS) Update(oldData, newData []byte, row int, parity [][]byte) (err error) { 323 | 324 | err = checkSize(oldData) 325 | if err != nil { 326 | return 327 | } 328 | 329 | err = x.RS.Update(oldData, newData, row, parity) 330 | if err != nil { 331 | return 332 | } 333 | 334 | _, bNeed, err := x.GetNeedVects(row) 335 | if err != nil { 336 | return 337 | } 338 | half := len(oldData) / 2 339 | src := make([][]byte, 3) 340 | bv := parity[bNeed[1]-x.RS.DataNum][half:] 341 | src[0], src[1], src[2] = oldData[:half], newData[:half], bv 342 | xor.Encode(bv, src) 343 | return 344 | } 345 | 346 | // Replace replaces oldData vectors with 0 or replaces 0 with newData vectors. 347 | // 348 | // In practice, 349 | // If len(replaceRows) > dataNum-parityNum, it's better to use Encode, 350 | // because Replace need to read len(replaceRows) + parityNum vectors, 351 | // if replaceRows are too many, the cost maybe larger than Encode 352 | // (Encode only need read dataNum). 353 | // Think about an EC compute node, and dataNum+parityNum data nodes model. 354 | // 355 | // It's used in two situations: 356 | // 1. We didn't have enough data for filling in a stripe, but still did ec encode, 357 | // we need replace several zero vectors with new vectors which have data after we get enough data finally. 358 | // 2. After compact, we may have several useless vectors in a stripe, 359 | // we need replaces these useless vectors with zero vectors for free space. 360 | // 361 | // Warn: 362 | // data's index & replaceRows must has the same sort. 363 | func (x *XRS) Replace(data [][]byte, replaceRows []int, parity [][]byte) (err error) { 364 | 365 | err = checkSize(data[0]) 366 | if err != nil { 367 | return 368 | } 369 | 370 | err = x.RS.Replace(data, replaceRows, parity) 371 | if err != nil { 372 | return 373 | } 374 | 375 | for i := range replaceRows { 376 | _, bNeed, err2 := x.GetNeedVects(replaceRows[i]) 377 | if err2 != nil { 378 | return err2 379 | } 380 | 381 | half := len(data[0]) / 2 382 | bv := parity[bNeed[1]-x.RS.DataNum][half:] 383 | xor.Encode(bv, [][]byte{bv, data[i][:half]}) 384 | } 385 | 386 | return 387 | } 388 | 389 | func isIn(e int, s []int) bool { 390 | for _, v := range s { 391 | if e == v { 392 | return true 393 | } 394 | } 395 | return false 396 | } 397 | -------------------------------------------------------------------------------- /xrs_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2017 Temple3x (temple3x@gmail.com) 2 | // 3 | // Use of this source code is governed by the MIT License 4 | // that can be found in the LICENSE file. 5 | 6 | package xrs 7 | 8 | import ( 9 | "bytes" 10 | "fmt" 11 | "math/rand" 12 | "sort" 13 | "testing" 14 | "time" 15 | ) 16 | 17 | const ( 18 | kb = 1 << 10 19 | mb = 1 << 20 20 | testDataNum = 12 21 | testParityNum = 4 22 | testSize = 1024 23 | ) 24 | 25 | // We need the result to be as same as old one. 26 | func TestMakeXORSet(t *testing.T) { 27 | for d := 1; d <= 255; d++ { 28 | for p := 2; p <= 255; p++ { 29 | if d+p > 256 { 30 | continue 31 | } 32 | 33 | xs1 := make(map[int][]int) 34 | makeXORSet(d, p, xs1) 35 | xs2 := make(map[int][]int) 36 | makeXORSetOld(d, p, xs2) 37 | 38 | if len(xs1) != len(xs2) { 39 | t.Fatal("mismatch map len", d, p, xs1, xs2) 40 | } 41 | for k, v1 := range xs1 { 42 | v2 := xs2[k] 43 | if len(v1) != len(v2) { 44 | t.Fatal("mismatch len") 45 | } 46 | for j, k := range v1 { 47 | if k != v2[j] { 48 | t.Fatal("element mismatch") 49 | } 50 | } 51 | } 52 | 53 | } 54 | } 55 | } 56 | 57 | // makeXORSetOld is the old implementation. 58 | func makeXORSetOld(d, p int, m map[int][]int) { 59 | a := 0 60 | for { 61 | if a == d { 62 | break 63 | } 64 | for i := d + 1; i < d+p; i++ { 65 | if a == d { 66 | break 67 | } 68 | l := m[i] 69 | l = append(l, a) 70 | m[i] = l 71 | a++ 72 | } 73 | } 74 | 75 | return 76 | } 77 | 78 | // Powered by MATLAB 79 | func TestXRS_Encode(t *testing.T) { 80 | d, p := 5, 5 81 | x, err := New(d, p) 82 | if err != nil { 83 | t.Fatal(err) 84 | } 85 | vects := [][]byte{{0, 0}, {4, 7}, {2, 4}, {6, 9}, {8, 11}, 86 | {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}} 87 | err = x.Encode(vects) 88 | if err != nil { 89 | t.Fatal(err) 90 | } 91 | exp := [][]byte{{0, 0}, {4, 7}, {2, 4}, {6, 9}, {8, 11}, 92 | {97, 156}, {173, 117}, {218, 110}, {107, 59}, {110, 153}} 93 | 94 | for i := range exp { 95 | if !bytes.Equal(exp[i], vects[i]) { 96 | t.Fatalf("encode failed: vect %d mismatch", i) 97 | } 98 | } 99 | } 100 | 101 | func TestXRS_GetNeedVects(t *testing.T) { 102 | for d := 1; d <= 255; d++ { 103 | for p := 2; p <= 255; p++ { 104 | if d+p > 256 { 105 | continue 106 | } 107 | 108 | xrs, err := New(d, p) 109 | if err != nil { 110 | t.Fatal(err) 111 | } 112 | 113 | for i := 0; i < d; i++ { 114 | a, b, err := xrs.GetNeedVects(i) 115 | if err != nil { 116 | t.Fatal(err) 117 | } 118 | 119 | a = append(a, i) 120 | expA := xrs.XORSet[b[1]] 121 | if len(a) != len(expA) { 122 | t.Fatal("mismatch len") 123 | } 124 | sort.Ints(a) 125 | for j, k := range a { 126 | if k != expA[j] { 127 | t.Fatal("element mismatch") 128 | } 129 | } 130 | } 131 | } 132 | } 133 | } 134 | 135 | func TestXRS_ReconstOne(t *testing.T) { 136 | testReconstOne(t, testDataNum, testParityNum, 2) 137 | } 138 | 139 | func testReconstOne(t *testing.T, d, p, size int) { 140 | rand.Seed(time.Now().UnixNano()) 141 | 142 | for lost := 0; lost < d; lost++ { 143 | 144 | // init expect, result 145 | expect := make([][]byte, d+p) 146 | result := make([][]byte, d+p) 147 | for j := 0; j < d+p; j++ { 148 | expect[j] = make([]byte, size) 149 | result[j] = make([]byte, size) 150 | } 151 | for j := 0; j < d; j++ { 152 | fillRandom(expect[j]) 153 | } 154 | x, err := New(d, p) 155 | if err != nil { 156 | t.Fatal(err) 157 | } 158 | err = x.Encode(expect) 159 | if err != nil { 160 | t.Fatal(err) 161 | } 162 | for j := 0; j < d+p; j++ { 163 | copy(result[j], expect[j]) 164 | } 165 | 166 | // Clean all data except needed. 167 | // Clean needReconst. 168 | needReconst := lost 169 | result[needReconst] = make([]byte, size) 170 | // Clean A & B. 171 | aVects, bVects := make([][]byte, d+p), make([][]byte, d+p) 172 | half := size / 2 173 | for j := range result { 174 | aVects[j], bVects[j] = result[j][:half], result[j][half:] 175 | } 176 | aNeed, bNeed, err := x.GetNeedVects(needReconst) 177 | if err != nil { 178 | t.Fatal(err) 179 | } 180 | // Clean A. 181 | for j := range result { 182 | if !isIn(j, aNeed) { 183 | aVects[j] = make([]byte, half) 184 | } 185 | } 186 | // Clean B. 187 | bVects[needReconst] = make([]byte, size) 188 | for j := d; j < d+p; j++ { 189 | if !isIn(j, bNeed) { 190 | bVects[j] = make([]byte, half) 191 | } 192 | } 193 | 194 | for j := range result { 195 | copy(result[j][:half], aVects[j]) 196 | copy(result[j][half:], bVects[j]) 197 | 198 | } 199 | err = x.ReconstOne(result, needReconst) 200 | if err != nil { 201 | t.Fatal(err) 202 | } 203 | 204 | if !bytes.Equal(result[needReconst], expect[needReconst]) { 205 | t.Fatalf("mismatch reconstOne; vect: %d; size: %d", needReconst, size) 206 | } 207 | } 208 | } 209 | 210 | func fillRandom(p []byte) { 211 | rand.Read(p) 212 | } 213 | 214 | func TestXRS_retrieveRS(t *testing.T) { 215 | d, p := testDataNum, testParityNum 216 | x, err := New(d, p) 217 | if err != nil { 218 | t.Fatal(err) 219 | } 220 | 221 | rand.Seed(time.Now().UnixNano()) 222 | 223 | vects := make([][]byte, d+p) 224 | results := make([][]byte, d+p) 225 | for i := range vects { 226 | vects[i] = make([]byte, testSize) 227 | results[i] = make([]byte, testSize) 228 | fillRandom(vects[i]) 229 | copy(results[i], vects[i]) 230 | } 231 | 232 | err = x.retrieveRS(results, rand.Perm(d+p)) 233 | if err != nil { 234 | t.Fatal(err) 235 | } 236 | err = x.retrieveRS(results, rand.Perm(d+p)) 237 | if err != nil { 238 | t.Fatal(err) 239 | } 240 | 241 | for i := range vects { 242 | if !bytes.Equal(vects[i], results[i]) { 243 | t.Fatalf("mismatch retrieveRS; vect: %d", i) 244 | } 245 | } 246 | } 247 | 248 | func TestXRS_Reconst(t *testing.T) { 249 | testReconst(t, testDataNum, testParityNum, testSize, 128) 250 | } 251 | 252 | func testReconst(t *testing.T, d, p, size, loop int) { 253 | 254 | rand.Seed(time.Now().UnixNano()) 255 | 256 | for i := 0; i < loop; i++ { 257 | exp := make([][]byte, d+p) 258 | act := make([][]byte, d+p) 259 | for j := 0; j < d+p; j++ { 260 | exp[j], act[j] = make([]byte, size), make([]byte, size) 261 | } 262 | for j := 0; j < d; j++ { 263 | fillRandom(exp[j]) 264 | } 265 | 266 | x, err := New(d, p) 267 | if err != nil { 268 | t.Fatal(err) 269 | } 270 | err = x.Encode(exp) 271 | if err != nil { 272 | t.Fatal(err) 273 | } 274 | 275 | lost := makeLostRandom(d+p, rand.Intn(p+1)) 276 | needReconst := lost[:rand.Intn(len(lost)+1)] 277 | if len(needReconst) == 1 { 278 | lost = needReconst // Make sure to have correct data for reconstOne. 279 | } 280 | dpHas := makeHasFromLost(d+p, lost) 281 | for _, h := range dpHas { 282 | copy(act[h], exp[h]) 283 | } 284 | 285 | // Try to reconstruct some health vectors. 286 | // Although we want to reconstruct these vectors, 287 | // but it maybe a mistake. 288 | for _, nr := range needReconst { 289 | if rand.Intn(4) == 0 { // 1/4 chance. 290 | copy(act[nr], exp[nr]) 291 | } 292 | } 293 | 294 | err = x.Reconst(act, dpHas, needReconst) 295 | if err != nil { 296 | t.Fatal(err) 297 | } 298 | 299 | for _, n := range needReconst { 300 | if !bytes.Equal(exp[n], act[n]) { 301 | t.Fatalf("reconst failed: vect: %d, size: %d", n, size) 302 | } 303 | } 304 | } 305 | } 306 | 307 | func TestXRS_Update(t *testing.T) { 308 | testUpdate(t, testDataNum, testParityNum, testSize) 309 | } 310 | 311 | func testUpdate(t *testing.T, d, p, size int) { 312 | 313 | rand.Seed(time.Now().UnixNano()) 314 | 315 | for i := 0; i < d; i++ { 316 | act := make([][]byte, d+p) 317 | exp := make([][]byte, d+p) 318 | for j := 0; j < d+p; j++ { 319 | act[j], exp[j] = make([]byte, size), make([]byte, size) 320 | } 321 | for j := 0; j < d; j++ { 322 | fillRandom(exp[j]) 323 | copy(act[j], exp[j]) 324 | } 325 | 326 | x, err := New(d, p) 327 | if err != nil { 328 | t.Fatal(err) 329 | } 330 | err = x.Encode(act) 331 | if err != nil { 332 | t.Fatal(err) 333 | } 334 | 335 | newData := make([]byte, size) 336 | fillRandom(newData) 337 | updateRow := i 338 | err = x.Update(act[updateRow], newData, updateRow, act[d:d+p]) 339 | if err != nil { 340 | t.Fatal(err) 341 | } 342 | 343 | copy(exp[updateRow], newData) 344 | err = x.Encode(exp) 345 | if err != nil { 346 | t.Fatal(err) 347 | } 348 | for j := d; j < d+p; j++ { 349 | if !bytes.Equal(act[j], exp[j]) { 350 | t.Fatalf("update failed: vect: %d, size: %d", j, size) 351 | } 352 | } 353 | } 354 | } 355 | 356 | func TestXRS_Replace(t *testing.T) { 357 | testReplace(t, testDataNum, testParityNum, testSize, 1024, true) 358 | testReplace(t, testDataNum, testParityNum, testSize, 1024, false) 359 | } 360 | 361 | func testReplace(t *testing.T, d, p, size, loop int, toZero bool) { 362 | 363 | rand.Seed(time.Now().UnixNano()) 364 | 365 | for i := 0; i < loop; i++ { 366 | replaceRows := makeReplaceRowRandom(d) 367 | act := make([][]byte, d+p) 368 | exp := make([][]byte, d+p) 369 | for j := 0; j < d+p; j++ { 370 | act[j], exp[j] = make([]byte, size), make([]byte, size) 371 | } 372 | for j := 0; j < d; j++ { 373 | fillRandom(exp[j]) 374 | copy(act[j], exp[j]) 375 | } 376 | 377 | data := make([][]byte, len(replaceRows)) 378 | for i, rr := range replaceRows { 379 | data[i] = make([]byte, size) 380 | copy(data[i], exp[rr]) 381 | } 382 | 383 | if toZero { 384 | for _, rr := range replaceRows { 385 | exp[rr] = make([]byte, size) 386 | } 387 | } 388 | 389 | x, err := New(d, p) 390 | if err != nil { 391 | t.Fatal(err) 392 | } 393 | err = x.Encode(exp) 394 | if err != nil { 395 | t.Fatal(err) 396 | } 397 | 398 | if !toZero { 399 | for _, rr := range replaceRows { 400 | act[rr] = make([]byte, size) 401 | } 402 | } 403 | err = x.Encode(act) 404 | if err != nil { 405 | t.Fatal(err) 406 | } 407 | 408 | err = x.Replace(data, replaceRows, act[d:]) 409 | if err != nil { 410 | t.Fatal(err) 411 | } 412 | 413 | for j := d; j < d+p; j++ { 414 | if !bytes.Equal(act[j], exp[j]) { 415 | fmt.Println(replaceRows) 416 | t.Fatalf("replace failed: vect: %d, size: %d", j, size) 417 | } 418 | } 419 | 420 | } 421 | } 422 | 423 | func makeReplaceRowRandom(d int) []int { 424 | rand.Seed(time.Now().UnixNano()) 425 | 426 | n := rand.Intn(d + 1) 427 | s := make([]int, 0) 428 | c := 0 429 | for i := 0; i < 64; i++ { 430 | if c == n { 431 | break 432 | } 433 | v := rand.Intn(d) 434 | if !isIn(v, s) { 435 | s = append(s, v) 436 | c++ 437 | } 438 | } 439 | if c == 0 { 440 | s = []int{0} 441 | } 442 | return s 443 | } 444 | 445 | func makeLostRandom(n, lostN int) []int { 446 | l := make([]int, lostN) 447 | rand.Seed(time.Now().UnixNano()) 448 | c := 0 449 | for { 450 | if c == lostN { 451 | break 452 | } 453 | v := rand.Intn(n) 454 | if !isIn(v, l) { 455 | l[c] = v 456 | c++ 457 | } 458 | } 459 | return l 460 | } 461 | 462 | func makeHasFromLost(n int, lost []int) []int { 463 | s := make([]int, n-len(lost)) 464 | c := 0 465 | for i := 0; i < n; i++ { 466 | if !isIn(i, lost) { 467 | s[c] = i 468 | c++ 469 | } 470 | } 471 | return s 472 | } 473 | 474 | func BenchmarkXRS_Encode(b *testing.B) { 475 | dps := [][]int{ 476 | []int{12, 4}, 477 | } 478 | 479 | sizes := []int{ 480 | 4 * kb, 481 | mb, 482 | 8 * mb, 483 | } 484 | 485 | b.Run("", benchmarkEncode(benchEnc, dps, sizes)) 486 | } 487 | 488 | func benchmarkEncode(f func(*testing.B, int, int, int), dps [][]int, sizes []int) func(*testing.B) { 489 | return func(b *testing.B) { 490 | for _, dp := range dps { 491 | d, p := dp[0], dp[1] 492 | for _, size := range sizes { 493 | b.Run(fmt.Sprintf("(%d+%d)-%s", d, p, byteToStr(size)), func(b *testing.B) { 494 | f(b, d, p, size) 495 | }) 496 | } 497 | } 498 | } 499 | } 500 | 501 | func benchEnc(b *testing.B, d, p, size int) { 502 | 503 | vects := make([][]byte, d+p) 504 | for j := 0; j < d+p; j++ { 505 | vects[j] = make([]byte, size) 506 | } 507 | for j := 0; j < d; j++ { 508 | fillRandom(vects[j]) 509 | } 510 | x, err := New(d, p) 511 | if err != nil { 512 | b.Fatal(err) 513 | } 514 | 515 | b.SetBytes(int64((d + p) * size)) 516 | b.ResetTimer() 517 | for i := 0; i < b.N; i++ { 518 | err = x.Encode(vects) 519 | if err != nil { 520 | b.Fatal(err) 521 | } 522 | } 523 | } 524 | 525 | func BenchmarkXRS_Reconst(b *testing.B) { 526 | d, p := 12, 4 527 | size := 4 * kb 528 | 529 | b.Run("", benchmarkReconst(benchReconst, d, p, size)) 530 | } 531 | 532 | func benchmarkReconst(f func(*testing.B, int, int, int, []int, []int), d, p, size int) func(*testing.B) { 533 | 534 | datas := make([]int, d) 535 | for i := range datas { 536 | datas[i] = i 537 | } 538 | return func(b *testing.B) { 539 | for i := 1; i <= p; i++ { 540 | lost := datas[:i] 541 | dpHas := makeHasFromLost(d+p, lost) 542 | b.Run(fmt.Sprintf("(%d+%d)-%s-reconst_%d_data_vects", 543 | d, p, byteToStr(size), i), 544 | func(b *testing.B) { f(b, d, p, size, dpHas, lost) }) 545 | } 546 | } 547 | } 548 | 549 | func benchReconst(b *testing.B, d, p, size int, dpHas, needReconst []int) { 550 | vects := make([][]byte, d+p) 551 | for j := 0; j < d+p; j++ { 552 | vects[j] = make([]byte, size) 553 | } 554 | for j := 0; j < d; j++ { 555 | fillRandom(vects[j]) 556 | } 557 | x, err := New(d, p) 558 | if err != nil { 559 | b.Fatal(err) 560 | } 561 | err = x.Encode(vects) 562 | if err != nil { 563 | b.Fatal(err) 564 | } 565 | 566 | bs := (d + len(needReconst)) * size 567 | if len(needReconst) == 1 { 568 | aNeed, _, err := x.GetNeedVects(needReconst[0]) 569 | if err != nil { 570 | b.Fatal(err) 571 | } 572 | bs = (d-1+2+len(aNeed))*size/2 + size 573 | } 574 | 575 | b.SetBytes(int64(bs)) 576 | b.ResetTimer() 577 | for i := 0; i < b.N; i++ { 578 | err = x.Reconst(vects, dpHas, needReconst) 579 | if err != nil { 580 | b.Fatal(err) 581 | } 582 | } 583 | } 584 | 585 | func BenchmarkXRS_Update(b *testing.B) { 586 | d, p := 12, 4 587 | size := 4 * kb 588 | 589 | b.Run("", benchmarkUpdate(benchUpdate, d, p, size)) 590 | } 591 | 592 | func benchmarkUpdate(f func(*testing.B, int, int, int, int), d, p, size int) func(*testing.B) { 593 | 594 | return func(b *testing.B) { 595 | updateRow := rand.Intn(d) 596 | b.Run(fmt.Sprintf("(%d+%d)-%s", 597 | d, p, byteToStr(size)), 598 | func(b *testing.B) { f(b, d, p, size, updateRow) }) 599 | } 600 | } 601 | 602 | func benchUpdate(b *testing.B, d, p, size, updateRow int) { 603 | vects := make([][]byte, d+p) 604 | for j := 0; j < d+p; j++ { 605 | vects[j] = make([]byte, size) 606 | } 607 | for j := 0; j < d; j++ { 608 | fillRandom(vects[j]) 609 | } 610 | x, err := New(d, p) 611 | if err != nil { 612 | b.Fatal(err) 613 | } 614 | err = x.Encode(vects) 615 | if err != nil { 616 | b.Fatal(err) 617 | } 618 | 619 | newData := make([]byte, size) 620 | fillRandom(newData) 621 | 622 | b.SetBytes(int64((p + 2 + p) * size)) 623 | b.ResetTimer() 624 | for i := 0; i < b.N; i++ { 625 | err = x.Update(vects[updateRow], newData, updateRow, vects[d:]) 626 | if err != nil { 627 | b.Fatal(err) 628 | } 629 | } 630 | } 631 | 632 | func BenchmarkXRS_Replace(b *testing.B) { 633 | d, p := 12, 4 634 | size := 4 * kb 635 | 636 | b.Run("", benchmarkReplace(benchReplace, d, p, size)) 637 | } 638 | 639 | func benchmarkReplace(f func(*testing.B, int, int, int, int), d, p, size int) func(*testing.B) { 640 | 641 | return func(b *testing.B) { 642 | for i := 1; i <= d-p; i++ { 643 | b.Run(fmt.Sprintf("(%d+%d)-%s-replace_%d_data_vects", 644 | d, p, byteToStr(size), i), 645 | func(b *testing.B) { f(b, d, p, size, i) }) 646 | } 647 | } 648 | } 649 | 650 | func benchReplace(b *testing.B, d, p, size, n int) { 651 | vects := make([][]byte, d+p) 652 | for j := 0; j < d+p; j++ { 653 | vects[j] = make([]byte, size) 654 | } 655 | for j := 0; j < d; j++ { 656 | fillRandom(vects[j]) 657 | } 658 | x, err := New(d, p) 659 | if err != nil { 660 | b.Fatal(err) 661 | } 662 | err = x.Encode(vects) 663 | if err != nil { 664 | b.Fatal(err) 665 | } 666 | 667 | updateRows := make([]int, n) 668 | for i := range updateRows { 669 | updateRows[i] = i 670 | } 671 | b.SetBytes(int64((n + p + p) * size)) 672 | b.ResetTimer() 673 | for i := 0; i < b.N; i++ { 674 | err = x.Replace(vects[:n], updateRows, vects[d:]) 675 | if err != nil { 676 | b.Fatal(err) 677 | } 678 | } 679 | } 680 | 681 | func byteToStr(n int) string { 682 | if n >= mb { 683 | return fmt.Sprintf("%dMB", n/mb) 684 | } 685 | 686 | return fmt.Sprintf("%dKB", n/kb) 687 | } 688 | --------------------------------------------------------------------------------