├── invertible.jpg
├── mathtool
    ├── cntinverse
    │   ├── combi.jpg
    │   └── cntinverse.go
    └── gentbls
    │   └── gentbls.go
├── go.mod
├── gmu_generic.go
├── go.sum
├── .gitignore
├── gmu.go
├── .github
    └── workflows
    │   └── unit-test.yml
├── gmu_amd64.go
├── LICENSE
├── gmu_test.go
├── helper_test.go
├── matrix.go
├── README.md
├── matrix_test.go
├── gmu_amd64.s
├── rs_test.go
└── rs.go


/invertible.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/templexxx/reedsolomon/HEAD/invertible.jpg


--------------------------------------------------------------------------------
/mathtool/cntinverse/combi.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/templexxx/reedsolomon/HEAD/mathtool/cntinverse/combi.jpg


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/templexxx/reedsolomon
2 | 
3 | require (
4 | 	github.com/templexxx/cpu v0.0.1
5 | 	github.com/templexxx/xorsimd v0.1.1
6 | )
7 | 
8 | go 1.13
9 | 


--------------------------------------------------------------------------------
/gmu_generic.go:
--------------------------------------------------------------------------------
 1 | //go:build !amd64
 2 | // +build !amd64
 3 | 
 4 | package reedsolomon
 5 | 
 6 | func (g *gmu) initFunc(feat int) {
 7 | 	g.mulVect = mulVectNoSIMD
 8 | 	g.mulVectXOR = mulVectXORNoSIMD
 9 | }
10 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/templexxx/cpu v0.0.1 h1:hY4WdLOgKdc8y13EYklu9OUTXik80BkxHoWvTO6MQQY=
2 | github.com/templexxx/cpu v0.0.1/go.mod h1:w7Tb+7qgcAlIyX4NhLuDKt78AHA5SzPmq0Wj6HiEnnk=
3 | github.com/templexxx/xorsimd v0.1.1 h1:Y4e8YgMx/4xRJO4G6lq0bSswfDCxbIrGu7KqM2ET524=
4 | github.com/templexxx/xorsimd v0.1.1/go.mod h1:W+ffZz8jJMH2SXwuKu9WhygqBMbFnp14G2fqEr8qaNo=
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects)
 2 | *.o
 3 | *.a
 4 | *.so
 5 | 
 6 | # Folders
 7 | _obj
 8 | _test
 9 | 
10 | # Architecture specific extensions/prefixes
11 | *.[568vq]
12 | [568vq].out
13 | 
14 | *.cgo1.go
15 | *.cgo2.c
16 | _cgo_defun.c
17 | _cgo_gotypes.go
18 | _cgo_export.*
19 | 
20 | _testmain.go
21 | 
22 | *.exe
23 | *.test
24 | *.prof
25 | /.idea
26 | /backup
27 | /loopunroll/
28 | cpu.out
29 | mathtool/galois/
30 | mathtool/matrix/
31 | mem.out
32 | /examples/
33 | /.DS_Store
34 | /mathtool/cntinverse
35 | /invert
36 | /bakcup
37 | /buf.svg
38 | *.svg
39 | *.out
40 | /escape
41 | 


--------------------------------------------------------------------------------
/gmu.go:
--------------------------------------------------------------------------------
 1 | package reedsolomon
 2 | 
 3 | // galois field multiplying unit
 4 | type gmu struct {
 5 | 	// output = c * input
 6 | 	mulVect func(c byte, input, output []byte)
 7 | 	// output ^= c * input
 8 | 	mulVectXOR func(c byte, input, output []byte)
 9 | }
10 | 
11 | func mulVectNoSIMD(c byte, input, output []byte) {
12 | 	t := mulTbl[c][:256]
13 | 	for i := 0; i < len(input); i++ {
14 | 		output[i] = t[input[i]]
15 | 	}
16 | }
17 | 
18 | func mulVectXORNoSIMD(c byte, input, output []byte) {
19 | 	t := mulTbl[c][:256]
20 | 	for i := 0; i < len(input); i++ {
21 | 		output[i] ^= t[input[i]]
22 | 	}
23 | }
24 | 
25 | // a * b
26 | func gfMul(a, b uint8) uint8 {
27 | 	return mulTbl[a][b]
28 | }
29 | 


--------------------------------------------------------------------------------
/.github/workflows/unit-test.yml:
--------------------------------------------------------------------------------
 1 | name: unit-test
 2 | on:
 3 |   push:
 4 |   pull_request:
 5 | 
 6 | jobs:
 7 |   test:
 8 |     strategy:
 9 |       matrix:
10 |         go-version:
11 |           - 1.18.x
12 |         os:
13 |           - ubuntu-latest
14 | 
15 |     runs-on: ${{ matrix.os }}
16 | 
17 |     steps:
18 |       - name: Install Go
19 |         uses: actions/setup-go@v2
20 |         with:
21 |           go-version: ${{ matrix.go-version }}
22 | 
23 |       - name: checkout
24 |         uses: actions/checkout@v2
25 | 
26 |       - name: cache
27 |         uses: actions/cache@v2
28 |         with:
29 |           path: ~/go/pkg/mod
30 |           key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
31 |           restore-keys: |
32 |             ${{ runner.os }}-go-
33 |       - name: test
34 |         run: |
35 |           go test -v -race
36 | 


--------------------------------------------------------------------------------
/gmu_amd64.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Temple3x (temple3x@gmail.com)
 2 | //
 3 | // Use of this source code is governed by the MIT License
 4 | // that can be found in the LICENSE file.
 5 | 
 6 | package reedsolomon
 7 | 
 8 | func (g *gmu) initFunc(feat int) {
 9 | 	switch feat {
10 | 	case featAVX2:
11 | 		g.mulVect = mulVectAVX2C
12 | 		g.mulVectXOR = mulVectXORAVX2C
13 | 	default:
14 | 		g.mulVect = mulVectNoSIMD
15 | 		g.mulVectXOR = mulVectXORNoSIMD
16 | 	}
17 | }
18 | 
19 | func mulVectAVX2C(c byte, input, output []byte) {
20 | 	tbl := lowHighTbl[int(c)*32 : int(c)*32+32]
21 | 	mulVectAVX2(tbl, input, output)
22 | }
23 | 
24 | func mulVectXORAVX2C(c byte, input, output []byte) {
25 | 	tbl := lowHighTbl[int(c)*32 : int(c)*32+32]
26 | 	mulVectXORAVX2(tbl, input, output)
27 | }
28 | 
29 | //go:noescape
30 | func mulVectAVX2(tbl, input, output []byte)
31 | 
32 | //go:noescape
33 | func mulVectXORAVX2(tbl, input, output []byte)
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Temple3x (temple3x@gmail.com)
 4 | Copyright (c) 2015 Klaus Post
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/gmu_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Temple3x (temple3x@gmail.com)
 2 | //
 3 | // Use of this source code is governed by the MIT License
 4 | // that can be found in the LICENSE file.
 5 | 
 6 | package reedsolomon
 7 | 
 8 | import (
 9 | 	"bytes"
10 | 	"math/rand"
11 | 	"testing"
12 | 	"time"
13 | )
14 | 
15 | func TestGMU(t *testing.T) {
16 | 	max := testSize
17 | 
18 | 	switch getCPUFeature() {
19 | 	case featAVX2:
20 | 		testGMU(t, max, featAVX2, featNoSIMD)
21 | 	}
22 | }
23 | 
24 | func testGMU(t *testing.T, maxSize, feat, cmpFeat int) {
25 | 
26 | 	rand.Seed(time.Now().UnixNano())
27 | 
28 | 	fs := featToStr(feat)
29 | 
30 | 	start, n := 1, 1
31 | 	if feat != featNoSIMD {
32 | 		start, n = 16, 16 // The min size for SIMD instructions.
33 | 	}
34 | 
35 | 	g := new(gmu)
36 | 	g.initFunc(feat)
37 | 
38 | 	cg := new(gmu)
39 | 	cg.initFunc(cmpFeat)
40 | 
41 | 	for size := start; size <= maxSize; size += n {
42 | 		for c := 0; c <= 255; c++ {
43 | 			input := make([]byte, size)
44 | 			act := make([]byte, size)
45 | 			fillRandom(input)
46 | 
47 | 			g.mulVect(byte(c), input, act)
48 | 			exp := make([]byte, size)
49 | 			cg.mulVect(byte(c), input, exp)
50 | 			if !bytes.Equal(act, exp) {
51 | 				t.Fatalf("%s mismatched with %s, size: %d",
52 | 					fs, featToStr(cmpFeat), size)
53 | 			}
54 | 
55 | 			g.mulVectXOR(byte(c), input, act)
56 | 			cg.mulVectXOR(byte(c), input, exp)
57 | 			if !bytes.Equal(act, exp) {
58 | 				t.Fatalf("%s mismatched with %s, size: %d",
59 | 					fs, featToStr(cmpFeat), size)
60 | 			}
61 | 		}
62 | 	}
63 | 
64 | 	t.Logf("%s passed, size: [%d, %d), size = i * %d",
65 | 		fs, start, maxSize+1, n)
66 | }
67 | 


--------------------------------------------------------------------------------
/mathtool/cntinverse/cntinverse.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Temple3x (temple3x@gmail.com)
 2 | //
 3 | // Use of this source code is governed by the MIT License
 4 | // that can be found in the LICENSE file.
 5 | //
 6 | // Copyright ©2016 The Gonum Authors. All rights reserved.
 7 | // Use of this source code is governed by a BSD-style
 8 | // license that can be found in the LICENSE file.
 9 | 
10 | // This tools will calculate the number of inverse matrices
11 | // with specific data & parity number.
12 | package main
13 | 
14 | import (
15 | 	"flag"
16 | 	"fmt"
17 | 	"math"
18 | 	"os"
19 | )
20 | 
21 | var vects = flag.Uint64("vects", 20, "number of vectors (data+parity)")
22 | var data = flag.Uint64("data", 0, "number of data vectors; keep it empty if you want to "+
23 | 	"get the max num of inverse matrix")
24 | 
25 | func init() {
26 | 	flag.Usage = func() {
27 | 		fmt.Printf("Usage of %s:\n", os.Args[0])
28 | 		fmt.Println("  cntinverse [-flags]")
29 | 		fmt.Println("  Valid flags:")
30 | 		flag.PrintDefaults()
31 | 	}
32 | }
33 | 
34 | func main() {
35 | 	flag.Parse()
36 | 
37 | 	n := float64(*vects)
38 | 	k := float64(*data)
39 | 
40 | 	if k == 0 {
41 | 		k = n / 2
42 | 	}
43 | 	fmt.Printf("num of inverse matrices for vectors ≈ %d, data: %d: %.f \n",
44 | 		uint64(n),
45 | 		uint64(k),
46 | 		generalizedBinomial(n, k))
47 | }
48 | 
49 | const (
50 | 	errNegInput = "combination: negative input"
51 | 	badSetSize  = "combination: n < k"
52 | )
53 | 
54 | // generalizedBinomial returns the generalized binomial coefficient of (n, k),
55 | // defined as
56 | //
57 | //	Γ(n+1) / (Γ(k+1) Γ(n-k+1))
58 | //
59 | // where Γ is the Gamma function. generalizedBinomial is useful for continuous
60 | // relaxations of the binomial coefficient, or when the binomial coefficient value
61 | // may overflow int. In the latter case, one may use math/big for an exact
62 | // computation.
63 | //
64 | // n and k must be non-negative with n >= k, otherwise generalizedBinomial will panic.
65 | func generalizedBinomial(n, k float64) float64 {
66 | 	return math.Exp(logGeneralizedBinomial(n, k))
67 | }
68 | 
69 | // logGeneralizedBinomial returns the log of the generalized binomial coefficient.
70 | // See generalizedBinomial for more information.
71 | func logGeneralizedBinomial(n, k float64) float64 {
72 | 	if n < 0 || k < 0 {
73 | 		panic(errNegInput)
74 | 	}
75 | 	if n < k {
76 | 		panic(badSetSize)
77 | 	}
78 | 	a, _ := math.Lgamma(n + 1)
79 | 	b, _ := math.Lgamma(k + 1)
80 | 	c, _ := math.Lgamma(n - k + 1)
81 | 	return a - b - c
82 | }
83 | 


--------------------------------------------------------------------------------
/helper_test.go:
--------------------------------------------------------------------------------
  1 | package reedsolomon
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math/rand"
  6 | 	"sort"
  7 | 	"testing"
  8 | 	"time"
  9 | )
 10 | 
 11 | func TestDedup(t *testing.T) {
 12 | 
 13 | 	rand.Seed(time.Now().UnixNano())
 14 | 
 15 | 	round := 1024
 16 | 	minN := 4
 17 | 	maxN := 4096
 18 | 	s := make([]int, maxN)
 19 | 
 20 | 	for i := 0; i < round; i++ {
 21 | 		n := rand.Intn(maxN + 1)
 22 | 		if n < minN {
 23 | 			n = minN
 24 | 		}
 25 | 		for j := 0; j < n/minN; j++ {
 26 | 			copy(s[j*4:j*4+4], []int{0, 1, 2, 3})
 27 | 		}
 28 | 		s2 := s[:n]
 29 | 		s2 = dedup(s2)
 30 | 		if len(s2) != minN {
 31 | 			t.Fatal("failed to dedup: wrong length")
 32 | 		}
 33 | 		for j := range s2 {
 34 | 			if s2[j] != j {
 35 | 				t.Fatal("failed to dedup: wrong result")
 36 | 			}
 37 | 		}
 38 | 	}
 39 | }
 40 | 
 41 | // dedup removes duplicates from a given slice
 42 | func dedup(s []int) []int {
 43 | 
 44 | 	sort.Ints(s)
 45 | 
 46 | 	cnt := len(s)
 47 | 	cntDup := 0
 48 | 	for i := 1; i < cnt; i++ {
 49 | 		if s[i] == s[i-1] {
 50 | 			cntDup++
 51 | 		} else {
 52 | 			s[i-cntDup] = s[i]
 53 | 		}
 54 | 	}
 55 | 
 56 | 	return s[:cnt-cntDup]
 57 | }
 58 | 
 59 | // generates survived & needReconst sorted indexes for testing randomly.
 60 | func genIdxForTest(d, p, survivedN, needReconstN int) ([]int, []int) {
 61 | 	if survivedN < d {
 62 | 		survivedN = d
 63 | 	}
 64 | 	if needReconstN > p {
 65 | 		needReconstN = p
 66 | 	}
 67 | 	if survivedN+needReconstN > d+p {
 68 | 		survivedN = d
 69 | 	}
 70 | 
 71 | 	needReconst := randPermK(d+p, needReconstN)
 72 | 
 73 | 	survived := make([]int, 0, survivedN)
 74 | 
 75 | 	fullIdx := make([]int, d+p)
 76 | 	for i := range fullIdx {
 77 | 		fullIdx[i] = i
 78 | 	}
 79 | 	rand.Shuffle(d+p, func(i, j int) { // More chance to get balanced survived index
 80 | 		fullIdx[i], fullIdx[j] = fullIdx[j], fullIdx[i]
 81 | 	})
 82 | 
 83 | 	for i := 0; i < d+p; i++ {
 84 | 		if len(survived) == survivedN {
 85 | 			break
 86 | 		}
 87 | 		if !isIn(fullIdx[i], needReconst) {
 88 | 			survived = append(survived, fullIdx[i])
 89 | 		}
 90 | 	}
 91 | 
 92 | 	sort.Ints(survived)
 93 | 	sort.Ints(needReconst)
 94 | 
 95 | 	return survived, needReconst
 96 | }
 97 | 
 98 | func TestGenIdxForTest(t *testing.T) {
 99 | 
100 | 	d, p := 10, 4
101 | 
102 | 	ret := make([]int, 0, d+p)
103 | 
104 | 	for i := 0; i < d+p; i++ {
105 | 		for j := 0; j < d+p; j++ {
106 | 			is, ir := genIdxForTest(d, p, 10, 4)
107 | 			checkGenIdxForTest(t, d, p, is, ir, ret)
108 | 			ret = ret[:0]
109 | 		}
110 | 	}
111 | }
112 | 
113 | func checkGenIdxForTest(t *testing.T, d, p int, is, ir, all []int) {
114 | 
115 | 	for _, v := range is {
116 | 		if v < 0 || v >= d+p {
117 | 			t.Fatal(ErrIllegalVectIndex)
118 | 		}
119 | 		all = append(all, v)
120 | 	}
121 | 	for _, v := range ir {
122 | 		if v < 0 || v >= d+p {
123 | 			t.Fatal(ErrIllegalVectIndex)
124 | 		}
125 | 		all = append(all, v)
126 | 	}
127 | 	if len(is) < d {
128 | 		t.Fatal("too few survived")
129 | 	}
130 | 	da := dedup(all)
131 | 	if len(da) != len(all) {
132 | 		t.Fatal("survived & needReconst conflicting")
133 | 	}
134 | 	if !sort.IsSorted(sort.IntSlice(is)) || !sort.IsSorted(sort.IntSlice(ir)) {
135 | 		t.Fatal("idx unsorted")
136 | 	}
137 | }
138 | 
139 | // generates first k integers from a pseudo-random permutation in [0,n)
140 | func randPermK(n, k int) []int {
141 | 	rand.Seed(time.Now().UnixNano())
142 | 
143 | 	return rand.Perm(n)[:k]
144 | }
145 | 
146 | func featToStr(f int) string {
147 | 	switch f {
148 | 	case featAVX2:
149 | 		return "AVX2"
150 | 	case featNoSIMD:
151 | 		return "No-SIMD"
152 | 	default:
153 | 		return "Tested"
154 | 	}
155 | }
156 | 
157 | func fillRandom(p []byte) {
158 | 	rand.Read(p)
159 | }
160 | 
161 | func byteToStr(n int) string {
162 | 	if n >= mib {
163 | 		return fmt.Sprintf("%dMiB", n/mib)
164 | 	}
165 | 
166 | 	return fmt.Sprintf("%dKiB", n/kib)
167 | }
168 | 
169 | func isIn(e int, s []int) bool {
170 | 	for _, v := range s {
171 | 		if e == v {
172 | 			return true
173 | 		}
174 | 	}
175 | 	return false
176 | }
177 | 


--------------------------------------------------------------------------------
/matrix.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2017 Temple3x (temple3x@gmail.com)
  2 | //
  3 | // Use of this source code is governed by the MIT License
  4 | // that can be found in the LICENSE file.
  5 | 
  6 | package reedsolomon
  7 | 
  8 | import (
  9 | 	"errors"
 10 | )
 11 | 
 12 | // matrix: row*column bytes,
 13 | // I use one slice but not 2D slice,
 14 | // because type matrix is only as encoding/generator matrix's
 15 | // container, hundreds bytes at most,
 16 | // so it maybe more cache-friendly and GC-friendly.
 17 | type matrix []byte
 18 | 
 19 | // makeSurvivedMatrix makes an encoding matrix.
 20 | // High portion: Identity Matrix;
 21 | // Lower portion: Cauchy Matrix.
 22 | // The Encoding Matrix is as same as Intel ISA-L's gf_gen_cauchy1_matrix:
 23 | // https://github.com/intel/isa-l/blob/master/erasure_code/ec_base.c
 24 | //
 25 | // Warn:
 26 | // It maybe not the common way to make an encoding matrix,
 27 | // so it may corrupt when mix this lib with other erasure codes libs.
 28 | //
 29 | // The common way to make an encoding matrix is using a
 30 | // Vandermonde Matrix, then use elementary transformation
 31 | // to make an identity matrix in the high portion of the matrix.
 32 | // But it's a little complicated.
 33 | //
 34 | // And there is a wrong way to use Vandermonde Matrix
 35 | // (see Intel ISA-L, and this lib's document warn the issue),
 36 | // in the wrong way, they use an identity matrix in the high portion,
 37 | // and a Vandermonde matrix in the lower directly,
 38 | // and this encoding matrix's sub-matrix maybe singular.
 39 | // You can find a proof in invertible.jpeg.
 40 | func makeEncodeMatrix(d, p int) matrix {
 41 | 	r := d + p
 42 | 	m := make([]byte, r*d)
 43 | 	// Create identity matrix upper.
 44 | 	for i := 0; i < d; i++ {
 45 | 		m[i*d+i] = 1
 46 | 	}
 47 | 
 48 | 	// Create cauchy matrix below. (1/(i + j), 0 <= j < d, d <= i < 2*d)
 49 | 	off := d * d // Skip the identity matrix.
 50 | 	for i := d; i < r; i++ {
 51 | 		for j := 0; j < d; j++ {
 52 | 			m[off] = inverseTbl[i^j]
 53 | 			off++
 54 | 		}
 55 | 	}
 56 | 	return m
 57 | }
 58 | 
 59 | func (m matrix) makeReconstMatrix(survived, needReconst []int) (rm matrix, err error) {
 60 | 
 61 | 	d, nn := len(survived), len(needReconst)
 62 | 	rm = make([]byte, nn*d)
 63 | 	for i, l := range needReconst {
 64 | 		copy(rm[i*d:i*d+d], m[l*d:l*d+d])
 65 | 	}
 66 | 	return
 67 | }
 68 | 
 69 | // makeEncMatrixForReconst makes an encoding matrix by calculating
 70 | // the inverse matrix of survived encoding matrix.
 71 | func (m matrix) makeEncMatrixForReconst(survived []int) (em matrix, err error) {
 72 | 	d := len(survived)
 73 | 	m2 := make([]byte, d*d)
 74 | 	for i, l := range survived {
 75 | 		copy(m2[i*d:i*d+d], m[l*d:l*d+d])
 76 | 	}
 77 | 	em, err = matrix(m2).invert(len(survived))
 78 | 	if err != nil {
 79 | 		return
 80 | 	}
 81 | 	return
 82 | }
 83 | 
 84 | var ErrNotSquare = errors.New("not a square matrix")
 85 | var ErrSingularMatrix = errors.New("matrix is singular")
 86 | 
 87 | // invert calculates m's inverse matrix,
 88 | // and return it or any error.
 89 | func (m matrix) invert(n int) (inv matrix, err error) {
 90 | 	if n*n != len(m) {
 91 | 		err = ErrNotSquare
 92 | 		return
 93 | 	}
 94 | 
 95 | 	mm := make([]byte, 2*n*n)
 96 | 	left := mm[:n*n]
 97 | 	copy(left, m) // Copy m, avoiding side effect.
 98 | 
 99 | 	// Make an identity matrix.
100 | 	inv = mm[n*n:]
101 | 	for i := 0; i < n; i++ {
102 | 		inv[i*n+i] = 1
103 | 	}
104 | 
105 | 	for i := 0; i < n; i++ {
106 | 		// Pivoting.
107 | 		if left[i*n+i] == 0 {
108 | 			// Find a row with non-zero in current column and swap.
109 | 			// If there is no one, means it's a singular matrix.
110 | 			var j int
111 | 			for j = i + 1; j < n; j++ {
112 | 				if left[j*n+i] != 0 {
113 | 					break
114 | 				}
115 | 			}
116 | 			if j == n {
117 | 				return nil, ErrSingularMatrix
118 | 			}
119 | 
120 | 			matrix(left).swap(i, j, n)
121 | 			inv.swap(i, j, n)
122 | 		}
123 | 
124 | 		if left[i*n+i] != 1 {
125 | 			v := inverseTbl[left[i*n+i]] // 1/pivot
126 | 			// Scale row.
127 | 			for j := 0; j < n; j++ {
128 | 				left[i*n+j] = gfMul(left[i*n+j], v)
129 | 				inv[i*n+j] = gfMul(inv[i*n+j], v)
130 | 			}
131 | 		}
132 | 
133 | 		// Use elementary transformation to
134 | 		// make all elements(except pivot) in the left matrix
135 | 		// become 0.
136 | 		for j := 0; j < n; j++ {
137 | 			if j == i {
138 | 				continue
139 | 			}
140 | 
141 | 			v := left[j*n+i]
142 | 			if v != 0 {
143 | 				for k := 0; k < n; k++ {
144 | 					left[j*n+k] ^= gfMul(v, left[i*n+k])
145 | 					inv[j*n+k] ^= gfMul(v, inv[i*n+k])
146 | 				}
147 | 			}
148 | 		}
149 | 	}
150 | 
151 | 	return
152 | }
153 | 
154 | // swap square matrix row[i] & row[j], col = n
155 | func (m matrix) swap(i, j, n int) {
156 | 	for k := 0; k < n; k++ {
157 | 		m[i*n+k], m[j*n+k] = m[j*n+k], m[i*n+k]
158 | 	}
159 | }
160 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Reed-Solomon
  2 | 
  3 | [![GoDoc][1]][2] [![MIT licensed][3]][4] [![Build Status][5]][6] [![Go Report Card][7]][8] [![Sourcegraph][9]][10]
  4 | 
  5 | [1]: https://godoc.org/github.com/templexxx/reedsolomon?status.svg
  6 | [2]: https://godoc.org/github.com/templexxx/reedsolomon
  7 | [3]: https://img.shields.io/badge/license-MIT-blue.svg
  8 | [4]: LICENSE
  9 | [5]: https://github.com/templexxx/reedsolomon/workflows/unit-test/badge.svg
 10 | [6]: https://github.com/templexxx/reedsolomon
 11 | [7]: https://goreportcard.com/badge/github.com/templexxx/reedsolomon
 12 | [8]: https://goreportcard.com/report/github.com/templexxx/reedsolomon
 13 | [9]: https://sourcegraph.com/github.com/templexxx/reedsolomon/-/badge.svg
 14 | [10]: https://sourcegraph.com/github.com/templexxx/reedsolomon?badge
 15 | 
 16 | 
 17 | ## Introduction:
 18 | 
 19 | >- Erasure Codes(based on Reed-Solomon Codes) engine in pure Go.
 20 | >
 21 | >- It's a kind of [Systematic Codes](https://en.wikipedia.org/wiki/Systematic_code), which means 
 22 | the input data is embedded in the encoded output .
 23 | >
 24 | >- [High Performance](https://github.com/templexxx/reedsolomon#performance): dozens GiB/s per physics core. 
 25 | >
 26 | >- High Reliability: 
 27 | >  1. At least two companies are using this library in their storage system.
 28 |     (More than dozens PB data)
 29 | >  2. Full test of galois field calculation and invertible matrices
 30 | >   (You can also find the [mathematical proof](invertible.jpg) in this repo).
 31 | >
 32 | >- Based on [Klauspost ReedSolomon](https://github.com/klauspost/reedsolomon) 
 33 | & [Intel ISA-L](https://github.com/01org/isa-l) with some additional changes/optimizations.
 34 | >
 35 | >- It's the backend of [XRS](https://github.com/templexxx/xrs) (Erasure Codes
 36 | which can save about 30% I/O in reconstruction process).
 37 | 
 38 | ## Specification
 39 | ### Math
 40 | 
 41 | >- Coding over in GF(2^8).
 42 | >
 43 | >- Primitive Polynomial: x^8 + x^4 + x^3 + x^2 + 1 (0x1d).
 44 | >
 45 | >- [Cauchy Matrix](matrix.go) is the generator matrix.
 46 | >   >-  Any sub-matrix of encoding matrix is invertible (See the proof [here](invertible.jpg)). 
 47 | >
 48 | >- [Galois Field Tool](mathtool/gentbls/gentbls.go): Generate primitive polynomial,
 49 | and it's log, exponent, multiply and inverse tables etc. 
 50 | >
 51 | >- [Inverse Matrices Tool](mathtool/cntinverse/cntinverse.go): Calculate the number of inverse matrices 
 52 | with specific data & parity number.
 53 | >
 54 | 
 55 | [XP](https://github.com/drmingdrmer) has written an excellent article ([Here, in Chinese](http://drmingdrmer.github.io/tech/distributed/2017/02/01/ec.html)) about how
 56 | Erasure Codes works and the math behind it. It's a good start to read it.
 57 | 
 58 | ### Accelerate
 59 | 
 60 | >- SIMD: [Screaming Fast Galois Field Arithmetic Using Intel SIMD Instructions](http://web.eecs.utk.edu/~jplank/plank/papers/FAST-2013-GF.html)
 61 | >
 62 | >- Reduce memory I/O: Write cache-friendly code. In the process of two matrices multiply, we will have to
 63 | read data times, and keep the temporary results, then write to memory. If we could put more data into
 64 | CPU's Cache but not read/write memory again and again, the performance should
 65 | improve a lot. 
 66 | >
 67 | >- Cache inverse matrices: It'll save thousands ns, not much, but it's still meaningful
 68 | for small data.
 69 | >
 70 | >- ...
 71 | 
 72 | [Here](http://www.templex.xyz/blog/101/reedsolomon.html) (in Chinese) is an article about
 73 | how to write a fast Erasure Codes engine. 
 74 | (Written by me years ago, need update, but the main ideas still work)
 75 | 
 76 | ## Performance
 77 | 
 78 | Performance depends mainly on:
 79 | 
 80 | >- CPU instruction extension.
 81 | >
 82 | >- Number of data/parity row vectors.
 83 | 
 84 | **Platform:** 
 85 | 
 86 | ```
 87 | goos: linux
 88 | goarch: amd64
 89 | pkg: github.com/templexxx/reedsolomon
 90 | cpu: 12th Gen Intel(R) Core(TM) i7-12700K
 91 | ```
 92 | 
 93 | **All test run on a single Core.**
 94 | 
 95 | ### Encode:
 96 | 
 97 | `I/O = (data + parity) * vector_size / cost`
 98 | 
 99 | | Data | Parity | Vector size | AVX2 I/O (MiB/S) | no SIMD I/O (MiB/S) |
100 | |------|--------|-------------|------------------|---------------------|
101 | | 10   | 2      | 8KiB        | 35640.29         | 2226.84             |
102 | | 10   | 2      | 1MiB        | 	30136.69        | 2214.45             |
103 | | 10   | 4      | 8KiB        | 19936.79         | 1294.25             |
104 | | 10   | 4      | 1MiB        | 17845.68         | 1284.02             |
105 | | 12   | 4      | 8KiB        | 19072.93         | 1229.14             |
106 | | 12   | 4      | 1MiB        | 16851.19         | 1219.29             |
107 | 
108 | ### Reconstruct:
109 | 
110 | `I/O = (data + reconstruct_data_num) * vector_size / cost`
111 | 
112 | | Data | Parity | Vector size | Reconstruct Data Num | AVX2 I/O (MiB/s) |
113 | |------|--------|-------------|----------------------|------------------|
114 | | 10   | 4      | 8KiB        | 1                    | 55775.91         |
115 | | 10   | 4      | 8KiB        | 2                    | 33037.90         |  
116 | | 10   | 4      | 8KiB        | 3                    | 23917.16         | 
117 | | 10   | 4      | 8KiB        | 4                    | 19363.26         | 
118 | 
119 | ### Update:
120 | 
121 | `I/O = (2 + parity_num + parity_num) * vector_size / cost`
122 | 
123 | | Data | Parity | Vector size | AVX2 I/O (MiB/S) |
124 | |------|--------|-------------|------------------|
125 | | 10   | 4      | 8KiB        | 55710.83         |
126 | 
127 | ### Replace:
128 | 
129 | `I/O = (parity_num + parity_num + replace_data_num) * vector_size / cost`
130 | 
131 | | Data | Parity | Vector size | Replace Data Num | AVX2 I/O (MiB/S) |
132 | |------|--------|-------------|------------------|------------------|
133 | | 10   | 4      | 8KiB        | 1                | 116193.04        |  
134 | | 10   | 4      | 8KiB        | 2                | 65375.73         |   
135 | | 10   | 4      | 8KiB        | 3                | 48775.47         |  
136 | | 10   | 4      | 8KiB        | 4                | 40398.79         |     
137 | | 10   | 4      | 8KiB        | 5                | 35262.89         |  
138 | | 10   | 4      | 8KiB        | 6                | 31881.60         |   
139 | 
140 | **PS:**
141 | 
142 | *We must know the benchmark test is quite different with encoding/decoding in practice.
143 | Because in benchmark test loops, the CPU Cache may help a lot.*
144 | 
145 | ## Acknowledgements
146 | >- [Klauspost ReedSolomon](https://github.com/klauspost/reedsolomon): It's the
147 | most commonly used Erasure Codes library in Go. Impressive performance, friendly API, 
148 | and it can support multi platforms(with fast Galois Field Arithmetic). Inspired me a lot.
149 | >
150 | >- [Intel ISA-L](https://github.com/01org/isa-l): The ideas of Cauchy matrix and saving memory
151 | I/O are from it.
152 | 


--------------------------------------------------------------------------------
/mathtool/gentbls/gentbls.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2017 Temple3x (temple3x@gmail.com)
  2 | //
  3 | // Use of this source code is governed by the MIT License
  4 | // that can be found in the LICENSE file.
  5 | 
  6 | // This tool generates primitive polynomial,
  7 | // and it's log, exponent, multiply and inverse tables etc.
  8 | package main
  9 | 
 10 | import (
 11 | 	"bufio"
 12 | 	"fmt"
 13 | 	"log"
 14 | 	"os"
 15 | 	"strconv"
 16 | 	"strings"
 17 | )
 18 | 
 19 | const deg = 8
 20 | 
 21 | type polynomial [deg + 1]byte
 22 | 
 23 | func main() {
 24 | 	f, err := os.OpenFile("gf_tables", os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
 25 | 	if err != nil {
 26 | 		log.Fatalln(err)
 27 | 	}
 28 | 	defer f.Close()
 29 | 
 30 | 	w := bufio.NewWriter(f)
 31 | 	ps := genPrimitivePolynomial()
 32 | 	title := strconv.FormatInt(int64(deg), 10) + " degree primitive polynomial：\n"
 33 | 	var pss string
 34 | 	for i, p := range ps {
 35 | 		pf := formatPolynomial(p)
 36 | 		pf = strconv.FormatInt(int64(i+1), 10) + ". " + pf + ";\n"
 37 | 		pss = pss + pf
 38 | 	}
 39 | 	body := fmt.Sprintf(title+"%v", pss)
 40 | 	w.WriteString(body)
 41 | 
 42 | 	// Set primitive polynomial here to generator tables.
 43 | 	// Default: x^8+x^4+x^3+x^2+1
 44 | 	var primitivePolynomial polynomial
 45 | 	primitivePolynomial[0] = 1
 46 | 	primitivePolynomial[2] = 1
 47 | 	primitivePolynomial[3] = 1
 48 | 	primitivePolynomial[4] = 1
 49 | 	primitivePolynomial[8] = 1
 50 | 
 51 | 	lenExpTable := (1 << deg) - 1
 52 | 	expTable := genExpTable(primitivePolynomial, lenExpTable)
 53 | 	body = fmt.Sprintf("expTbl: %#v\n", expTable)
 54 | 	w.WriteString(body)
 55 | 
 56 | 	logTable := genLogTable(expTable)
 57 | 	body = fmt.Sprintf("logTbl: %#v\n", logTable)
 58 | 	w.WriteString(body)
 59 | 
 60 | 	mulTable := genMulTable(expTable, logTable)
 61 | 	body = fmt.Sprintf("mulTbl: %#v\n", mulTable)
 62 | 	w.WriteString(body)
 63 | 
 64 | 	lowTable, highTable := genMulTableHalf(mulTable)
 65 | 	body = fmt.Sprintf("lowTbl: %#v\n", lowTable)
 66 | 	w.WriteString(body)
 67 | 	body = fmt.Sprintf("highTbl: %#v\n", highTable)
 68 | 	w.WriteString(body)
 69 | 
 70 | 	var lowHighTbl [256 * 32]byte
 71 | 	for i := 0; i < 256; i++ {
 72 | 		copy(lowHighTbl[i*32:i*32+16], lowTable[i])
 73 | 		copy(lowHighTbl[i*32+16:i*32+32], highTable[i])
 74 | 	}
 75 | 
 76 | 	body = fmt.Sprintf("lowHighTbl: %#v\n", lowHighTbl)
 77 | 	w.WriteString(body)
 78 | 
 79 | 	inverseTable := genInverseTable(mulTable)
 80 | 	body = fmt.Sprintf("inverseTbl: %#v\n", inverseTable)
 81 | 	w.WriteString(body)
 82 | 	w.Flush()
 83 | }
 84 | 
 85 | // Generate primitive Polynomial.
 86 | func genPrimitivePolynomial() []polynomial {
 87 | 	// Drop Polynomial x，so the constant term must be 1,
 88 | 	// so there are 2^(deg-1) Polynomials.
 89 | 	cnt := 1 << (deg - 1)
 90 | 	var polynomials []polynomial
 91 | 	var p polynomial
 92 | 	p[0] = 1
 93 | 	p[deg] = 1
 94 | 	// Generate all Polynomials.
 95 | 	for i := 0; i < cnt; i++ {
 96 | 		p = genPolynomial(p, 1)
 97 | 		polynomials = append(polynomials, p)
 98 | 	}
 99 | 	// Drop Polynomial x+1, so the cnt of Polynomials is odd.
100 | 	var psRaw []polynomial
101 | 	for _, p := range polynomials {
102 | 		var n int
103 | 		for _, v := range p {
104 | 			if v == 1 {
105 | 				n++
106 | 			}
107 | 		}
108 | 		if n&1 != 0 {
109 | 			psRaw = append(psRaw, p)
110 | 		}
111 | 	}
112 | 	// Order of primitive element == 2^deg -1
113 | 	var ps []polynomial
114 | 	for _, p := range psRaw {
115 | 		lenTable := (1 << deg) - 1
116 | 		table := genExpTable(p, lenTable)
117 | 		var numOf1 int
118 | 		for _, v := range table {
119 | 			// Cnt 1 in ExpTable.
120 | 			if int(v) == 1 {
121 | 				numOf1++
122 | 			}
123 | 		}
124 | 		if numOf1 == 1 {
125 | 			ps = append(ps, p)
126 | 		}
127 | 	}
128 | 	return ps
129 | }
130 | 
131 | func genPolynomial(p polynomial, i int) polynomial {
132 | 	if p[i] == 0 {
133 | 		p[i] = 1
134 | 	} else {
135 | 		p[i] = 0
136 | 		i++
137 | 		if i == deg {
138 | 			return p
139 | 		}
140 | 		p = genPolynomial(p, i)
141 | 	}
142 | 	return p
143 | }
144 | 
145 | func genExpTable(primitivePolynomial polynomial, exp int) []byte {
146 | 	table := make([]byte, exp)
147 | 	var rawPolynomial polynomial
148 | 	rawPolynomial[1] = 1
149 | 	table[0] = byte(1)
150 | 	table[1] = byte(2)
151 | 	for i := 2; i < exp; i++ {
152 | 		rawPolynomial = expGrowPolynomial(rawPolynomial, primitivePolynomial)
153 | 		table[i] = getValueOfPolynomial(rawPolynomial)
154 | 	}
155 | 	return table
156 | }
157 | 
158 | func expGrowPolynomial(raw, primitivePolynomial polynomial) polynomial {
159 | 	var newP polynomial
160 | 	for i, v := range raw[:deg] {
161 | 		if v == 1 {
162 | 			newP[i+1] = 1
163 | 		}
164 | 	}
165 | 	if newP[deg] == 1 {
166 | 		for i, v := range primitivePolynomial[:deg] {
167 | 			if v == 1 {
168 | 				if newP[i] == 1 {
169 | 					newP[i] = 0
170 | 				} else {
171 | 					newP[i] = 1
172 | 				}
173 | 			}
174 | 		}
175 | 	}
176 | 	newP[deg] = 0
177 | 	return newP
178 | }
179 | 
180 | func getValueOfPolynomial(p polynomial) uint8 {
181 | 	var v uint8
182 | 	for i, coefficient := range p[:deg] {
183 | 		if coefficient != 0 {
184 | 			add := 1 << uint8(i)
185 | 			v += uint8(add)
186 | 		}
187 | 	}
188 | 	return v
189 | }
190 | 
191 | func genLogTable(expTable []byte) []byte {
192 | 	table := make([]byte, 1<<deg)
193 | 	table[0] = 0
194 | 	for i, v := range expTable {
195 | 		table[v] = byte(i)
196 | 	}
197 | 	return table
198 | }
199 | 
200 | func genMulTable(expTable, logTable []byte) (result [256][256]byte) {
201 | 
202 | 	for a := range result {
203 | 		for b := range result[a] {
204 | 			if a == 0 || b == 0 {
205 | 				result[a][b] = 0
206 | 				continue
207 | 			}
208 | 			logA := int(logTable[a])
209 | 			logB := int(logTable[b])
210 | 			logSum := logA + logB
211 | 			for logSum >= 255 {
212 | 				logSum -= 255
213 | 			}
214 | 			result[a][b] = expTable[logSum]
215 | 		}
216 | 	}
217 | 	return result
218 | }
219 | 
220 | func genMulTableHalf(mulTable [256][256]byte) (low [][]byte, high [][]byte) {
221 | 	low = make([][]byte, 256)
222 | 	high = make([][]byte, 256)
223 | 	for i := range low {
224 | 		low[i] = make([]byte, 16)
225 | 		high[i] = make([]byte, 16)
226 | 	}
227 | 	for i := range low {
228 | 		for j := range low {
229 | 			//result := 0
230 | 			var result byte
231 | 			if !(i == 0 || j == 0) {
232 | 				//result = int(mulTable[i][j])
233 | 				result = mulTable[i][j]
234 | 
235 | 			}
236 | 			// j & 00001111, [0,15]
237 | 			if (j & 0xf) == j {
238 | 				low[i][j] = result
239 | 			}
240 | 			// j & 11110000, [240,255]
241 | 			if (j & 0xf0) == j {
242 | 				high[i][j>>4] = result
243 | 			}
244 | 		}
245 | 	}
246 | 	return
247 | }
248 | 
249 | func genInverseTable(mulTable [256][256]byte) [256]byte {
250 | 
251 | 	var inverseTable [256]byte
252 | 	for i, t := range mulTable {
253 | 		for j, v := range t {
254 | 			if int(v) == 1 {
255 | 				inverseTable[i] = byte(j)
256 | 			}
257 | 		}
258 | 	}
259 | 	return inverseTable
260 | }
261 | 
262 | func formatPolynomial(p polynomial) string {
263 | 	var ps string
264 | 	for i := deg; i > 1; i-- {
265 | 		if p[i] == 1 {
266 | 			ps = ps + "x^" + strconv.FormatInt(int64(i), 10) + "+"
267 | 		}
268 | 	}
269 | 	if p[1] == 1 {
270 | 		ps = ps + "x+"
271 | 	}
272 | 	if p[0] == 1 {
273 | 		ps = ps + "1"
274 | 	} else {
275 | 		strings.TrimSuffix(ps, "+")
276 | 	}
277 | 	return ps
278 | }
279 | 


--------------------------------------------------------------------------------
/matrix_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2017 Temple3x (temple3x@gmail.com)
  2 | //
  3 | // Use of this source code is governed by the MIT License
  4 | // that can be found in the LICENSE file.
  5 | 
  6 | package reedsolomon
  7 | 
  8 | import (
  9 | 	"bytes"
 10 | 	"flag"
 11 | 	"fmt"
 12 | 	"math/bits"
 13 | 	"math/rand"
 14 | 	"testing"
 15 | 	"time"
 16 | )
 17 | 
 18 | func TestMakeEncodeMatrix(t *testing.T) {
 19 | 	act := makeEncodeMatrix(4, 4)
 20 | 	exp := []byte{
 21 | 		1, 0, 0, 0,
 22 | 		0, 1, 0, 0,
 23 | 		0, 0, 1, 0,
 24 | 		0, 0, 0, 1,
 25 | 		71, 167, 122, 186,
 26 | 		167, 71, 186, 122,
 27 | 		122, 186, 71, 167,
 28 | 		186, 122, 167, 71}
 29 | 	if !bytes.Equal(act, exp) {
 30 | 		t.Fatal("mismatch")
 31 | 	}
 32 | }
 33 | 
 34 | func TestMatrixSwap(t *testing.T) {
 35 | 	n := 7
 36 | 	m := make([]byte, n*n)
 37 | 	rand.Seed(time.Now().UnixNano())
 38 | 	fillRandom(m)
 39 | 	exp := make([]byte, n*n)
 40 | 	copy(exp, m)
 41 | 	matrix(exp).swap(0, 1, n)
 42 | 	matrix(exp).swap(0, 1, n)
 43 | 	if !bytes.Equal(exp, m) {
 44 | 		t.Fatalf("swap mismatch")
 45 | 	}
 46 | }
 47 | 
 48 | func TestMatrixInvert(t *testing.T) {
 49 | 	testCases := []struct {
 50 | 		matrixData  []byte
 51 | 		n           int
 52 | 		expect      []byte
 53 | 		ok          bool
 54 | 		expectedErr error
 55 | 	}{
 56 | 		{
 57 | 			[]byte{
 58 | 				56, 23, 98,
 59 | 				3, 100, 200,
 60 | 				45, 201, 123},
 61 | 			3,
 62 | 			[]byte{
 63 | 				175, 133, 33,
 64 | 				130, 13, 245,
 65 | 				112, 35, 126},
 66 | 			true,
 67 | 			nil,
 68 | 		},
 69 | 		{
 70 | 			[]byte{
 71 | 				0, 23, 98,
 72 | 				3, 100, 200,
 73 | 				45, 201, 123},
 74 | 			3,
 75 | 			[]byte{
 76 | 				245, 128, 152,
 77 | 				188, 64, 135,
 78 | 				231, 81, 239},
 79 | 			true,
 80 | 			nil,
 81 | 		},
 82 | 		{
 83 | 			[]byte{
 84 | 				1, 0, 0, 0, 0,
 85 | 				0, 1, 0, 0, 0,
 86 | 				0, 0, 0, 1, 0,
 87 | 				0, 0, 0, 0, 1,
 88 | 				7, 7, 6, 6, 1},
 89 | 			5,
 90 | 			[]byte{
 91 | 				1, 0, 0, 0, 0,
 92 | 				0, 1, 0, 0, 0,
 93 | 				123, 123, 1, 122, 122,
 94 | 				0, 0, 1, 0, 0,
 95 | 				0, 0, 0, 1, 0},
 96 | 			true,
 97 | 			nil,
 98 | 		},
 99 | 		{
100 | 			[]byte{
101 | 				4, 2,
102 | 				12, 6},
103 | 			2,
104 | 			nil,
105 | 			false,
106 | 			ErrSingularMatrix,
107 | 		},
108 | 		{
109 | 			[]byte{7, 8, 9},
110 | 			2,
111 | 			nil,
112 | 			false,
113 | 			ErrNotSquare,
114 | 		},
115 | 	}
116 | 
117 | 	for i, c := range testCases {
118 | 		m := matrix(c.matrixData)
119 | 		actual, actualErr := m.invert(c.n)
120 | 		if actualErr != nil && c.ok {
121 | 			t.Errorf("case.%d, expected to pass, but failed with: <ERROR> %s", i+1, actualErr.Error())
122 | 		}
123 | 		if actualErr == nil && !c.ok {
124 | 			t.Errorf("case.%d, expected to fail with <ERROR> \"%s\", but passed", i+1, c.expectedErr)
125 | 		}
126 | 		if actualErr != nil && !c.ok {
127 | 			if c.expectedErr != actualErr {
128 | 				t.Errorf("case.%d, expected to fail with error \"%s\", but instead failed with error \"%s\"", i+1, c.expectedErr, actualErr)
129 | 			}
130 | 		}
131 | 		if actualErr == nil && c.ok {
132 | 			if !bytes.Equal(c.expect, actual) {
133 | 				t.Errorf("case.%d, mismatch", i+1)
134 | 			}
135 | 		}
136 | 	}
137 | }
138 | 
139 | func TestMakeEncMatrixForReconst(t *testing.T) {
140 | 	d, p := 4, 4
141 | 	em := makeEncodeMatrix(d, p)
142 | 	survivied, _ := genIdxForTest(d, p, d, p)
143 | 	emr, err := em.makeEncMatrixForReconst(survivied)
144 | 	if err != nil {
145 | 		t.Fatal(err)
146 | 	}
147 | 	hasM := make([]byte, d*d)
148 | 	for i, h := range survivied {
149 | 		copy(hasM[i*d:i*d+d], em[h*d:h*d+d])
150 | 	}
151 | 	if !mul(emr, hasM, d).isIdentity(d) {
152 | 		t.Fatal("make wrong encoding matrix for reconstruction")
153 | 	}
154 | }
155 | 
156 | // Check all sub matrices when there is a lost.
157 | // Warn:
158 | // Don't set too big numbers,
159 | // it may have too many combinations, the test will never finish.
160 | func TestEncMatrixInvertibleAll(t *testing.T) {
161 | 	testEncMatrixInvertible(t, 10, 4)
162 | 	testEncMatrixInvertible(t, 15, 4)
163 | }
164 | 
165 | func testEncMatrixInvertible(t *testing.T, d, p int) {
166 | 	encMatrix := makeEncodeMatrix(d, p)
167 | 	var bitmap uint64
168 | 	cnt := 0
169 | 	// Lost more, bitmap bigger.
170 | 	var min uint64 = (1 << (d + 1)) - 1 ^ (1 << (d - 1)) // Min value when lost one data row vector.
171 | 	var max uint64 = ((1 << d) - 1) << p                 // Max value when lost parity-num data row vectors.
172 | 	for bitmap = min; bitmap <= max; bitmap++ {
173 | 		if bits.OnesCount64(bitmap) != d {
174 | 			continue
175 | 		}
176 | 		cnt++
177 | 		v := bitmap
178 | 		dpHas := make([]int, d)
179 | 		c := 0
180 | 		for i := 0; i < d+p; i++ {
181 | 			var j uint64 = 1 << i
182 | 			if j&v == j {
183 | 				dpHas[c] = i
184 | 				c++
185 | 			}
186 | 		}
187 | 
188 | 		m := make([]byte, d*d)
189 | 		for i := 0; i < d; i++ {
190 | 			copy(m[i*d:i*d+d], encMatrix[dpHas[i]*d:dpHas[i]*d+d])
191 | 		}
192 | 		im, err := matrix(m).invert(d)
193 | 		if err != nil {
194 | 			t.Fatalf("encode matrix is singular, d:%d, p:%d, dpHas:%#v", d, p, dpHas)
195 | 		}
196 | 
197 | 		// Check A * A' = I or not,
198 | 		// ensure nothing wrong in the invert process.
199 | 		if !mul(im, m, d).isIdentity(d) {
200 | 			t.Fatalf("matrix invert wrong, d:%d, p:%d, dpHas:%#v", d, p, dpHas)
201 | 		}
202 | 	}
203 | 	t.Logf("%d+%d pass invertible test, total submatrix(with lost): %d", d, p, cnt)
204 | }
205 | 
206 | var Invertible = flag.Bool("invert-test", false,
207 | 	"checking encoding matrices' sub-matrices are invertible or not by pick up sub-matrix randomly")
208 | 
209 | // Check Encoding Matrices' sub-matrices are invertible.
210 | // Randomly pick up sub-matrix every data+parity pair.
211 | //
212 | // This test may cost about 100s, unless modify codes about
213 | // galois field or matrix, there is no need to run it every time,
214 | // so skip the test by default, avoiding waste time in develop process.
215 | func TestEncMatrixInvertibleRandom(t *testing.T) {
216 | 
217 | 	if !*Invertible {
218 | 		t.Skip("skip the test, because it may cost too much time")
219 | 	}
220 | 
221 | 	for d := 1; d < 256; d++ {
222 | 		for p := 1; p < 256; p++ {
223 | 			if d+p > 256 {
224 | 				continue
225 | 			}
226 | 
227 | 			encMatrix := makeEncodeMatrix(d, p)
228 | 			survived, _ := genIdxForTest(d, p, d, p)
229 | 			m := make([]byte, d*d)
230 | 			for i := 0; i < d; i++ {
231 | 				copy(m[i*d:i*d+d], encMatrix[survived[i]*d:survived[i]*d+d])
232 | 			}
233 | 
234 | 			im, err := matrix(m).invert(d)
235 | 			if err != nil {
236 | 				t.Fatalf("encode matrix is singular, d:%d, p:%d, dpHas:%#v", d, p, survived)
237 | 			}
238 | 
239 | 			// Check A * A' = I or not,
240 | 			// ensure nothing wrong in the invert process.
241 | 			if !mul(im, m, d).isIdentity(d) {
242 | 				t.Fatalf("matrix invert wrong, d:%d, p:%d, dpHas:%#v", d, p, survived)
243 | 			}
244 | 		}
245 | 	}
246 | }
247 | 
248 | // square matrix a * square matrix b = out
249 | func mul(a, b matrix, n int) (out matrix) {
250 | 
251 | 	out = make([]byte, n*n)
252 | 	for i := 0; i < n; i++ {
253 | 		for j := 0; j < n; j++ {
254 | 			d := byte(0)
255 | 			for k := 0; k < n; k++ {
256 | 				d ^= gfMul(a[n*i+k], b[n*k+j])
257 | 
258 | 			}
259 | 			out[i*n+j] = d
260 | 		}
261 | 	}
262 | 	return
263 | }
264 | 
265 | func (m matrix) isIdentity(n int) bool {
266 | 	im := make([]byte, n*n)
267 | 	for i := 0; i < n; i++ {
268 | 		im[i*n+i] = 1
269 | 	}
270 | 	return bytes.Equal(m, im)
271 | }
272 | 
273 | func BenchmarkMakeEncMatrixForReconst(b *testing.B) {
274 | 	dps := [][2]int{ // data, parity
275 | 		{4, 4},
276 | 		{10, 4},
277 | 		{16, 16},
278 | 		{64, 64},
279 | 		{128, 128},
280 | 		{255, 1},
281 | 		{256, 0},
282 | 	}
283 | 	benchMatrixInvertRun(b, dps)
284 | }
285 | 
286 | func benchMatrixInvertRun(b *testing.B, dps [][2]int) {
287 | 	for _, dp := range dps {
288 | 		d, p := dp[0], dp[1]
289 | 		b.Run(fmt.Sprintf("(%d+%d)", d, p), func(b *testing.B) {
290 | 			m := makeEncodeMatrix(d, p)
291 | 			survived, _ := genIdxForTest(d, p, d, p)
292 | 			b.ResetTimer()
293 | 			for i := 0; i < b.N; i++ {
294 | 				_, err := m.makeEncMatrixForReconst(survived)
295 | 				if err != nil {
296 | 					b.Fatal(err)
297 | 				}
298 | 			}
299 | 		})
300 | 	}
301 | }
302 | 


--------------------------------------------------------------------------------
/gmu_amd64.s:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2017 Temple3x (temple3x@gmail.com)
  2 | //
  3 | // Use of this source code is governed by the MIT License
  4 | // that can be found in the LICENSE file.
  5 | 
  6 | #define low_tbl Y0
  7 | #define high_tbl Y1
  8 | #define mask Y2
  9 | #define in0  Y3
 10 | #define in1  Y4
 11 | #define in2  Y5
 12 | #define in3  Y6
 13 | #define in4  Y7
 14 | #define in5  Y8
 15 | #define in0_h  Y10
 16 | #define in1_h  Y11
 17 | #define in2_h  Y12
 18 | #define in3_h  Y13
 19 | #define in4_h  Y14
 20 | #define in5_h  Y15
 21 | 
 22 | #define in  BX
 23 | #define out DI
 24 | #define len R8
 25 | #define pos R9
 26 | 
 27 | #define tmp0 R10
 28 | 
 29 | #define low_tblx X0
 30 | #define high_tblx X1
 31 | #define maskx X2
 32 | #define in0x X3
 33 | #define in0_hx X10
 34 | #define tmp0x  X9
 35 | #define tmp1x  X11
 36 | #define tmp2x  X12
 37 | #define tmp3x  X13
 38 | 
 39 | // func mulVectAVX2(tbl, input, ouput []byte)
 40 | TEXT ·mulVectAVX2(SB), 4, $0
 41 | 	MOVQ         i+24(FP), in
 42 | 	MOVQ         o+48(FP), out
 43 | 	MOVQ         tbl+0(FP), tmp0
 44 | 	VMOVDQU      (tmp0), low_tblx
 45 | 	VMOVDQU      16(tmp0), high_tblx
 46 | 	MOVB         $0x0f, DX
 47 | 	LONG         $0x2069e3c4; WORD $0x00d2 // VPINSRB $0x00, EDX, XMM2, XMM2
 48 | 	VPBROADCASTB maskx, maskx
 49 | 	MOVQ         in_len+32(FP), len
 50 | 	TESTQ        $31, len
 51 | 	JNZ          one16b
 52 | 
 53 | ymm:
 54 | 	VINSERTI128 $1, low_tblx, low_tbl, low_tbl
 55 | 	VINSERTI128 $1, high_tblx, high_tbl, high_tbl
 56 | 	VINSERTI128 $1, maskx, mask, mask
 57 | 	TESTQ       $255, len
 58 | 	JNZ         not_aligned
 59 | 
 60 | 	// 256bytes/loop
 61 | aligned:
 62 | 	MOVQ $0, pos
 63 | 
 64 | loop256b:
 65 | 	// split low/high part(every byte get 4 low/high bit)
 66 | 	VMOVDQU (in)(pos*1), in0
 67 | 	VPSRLQ  $4, in0, in0_h
 68 | 	VPAND   mask, in0_h, in0_h
 69 | 	VPAND   mask, in0, in0
 70 | 
 71 | 	// according low/high part shuffle table, store result in second dst register
 72 | 	VPSHUFB in0_h, high_tbl, in0_h
 73 | 	VPSHUFB in0, low_tbl, in0
 74 | 	VPXOR   in0, in0_h, in0
 75 | 
 76 | 	// store result in memory
 77 | 	VMOVDQU in0, (out)(pos*1)
 78 | 
 79 | 	VMOVDQU 32(in)(pos*1), in1
 80 | 	VPSRLQ  $4, in1, in1_h
 81 | 	VPAND   mask, in1_h, in1_h
 82 | 	VPAND   mask, in1, in1
 83 | 	VPSHUFB in1_h, high_tbl, in1_h
 84 | 	VPSHUFB in1, low_tbl, in1
 85 | 	VPXOR   in1, in1_h, in1
 86 | 	VMOVDQU in1, 32(out)(pos*1)
 87 | 
 88 | 	VMOVDQU 64(in)(pos*1), in2
 89 | 	VPSRLQ  $4, in2, in2_h
 90 | 	VPAND   mask, in2_h, in2_h
 91 | 	VPAND   mask, in2, in2
 92 | 	VPSHUFB in2_h, high_tbl, in2_h
 93 | 	VPSHUFB in2, low_tbl, in2
 94 | 	VPXOR   in2, in2_h, in2
 95 | 	VMOVDQU in2, 64(out)(pos*1)
 96 | 
 97 | 	VMOVDQU 96(in)(pos*1), in3
 98 | 	VPSRLQ  $4, in3, in3_h
 99 | 	VPAND   mask, in3_h, in3_h
100 | 	VPAND   mask, in3, in3
101 | 	VPSHUFB in3_h, high_tbl, in3_h
102 | 	VPSHUFB in3, low_tbl, in3
103 | 	VPXOR   in3, in3_h, in3
104 | 	VMOVDQU in3, 96(out)(pos*1)
105 | 
106 | 	VMOVDQU 128(in)(pos*1), in4
107 | 	VPSRLQ  $4, in4, in4_h
108 | 	VPAND   mask, in4_h, in4_h
109 | 	VPAND   mask, in4, in4
110 | 	VPSHUFB in4_h, high_tbl, in4_h
111 | 	VPSHUFB in4, low_tbl, in4
112 | 	VPXOR   in4, in4_h, in4
113 | 	VMOVDQU in4, 128(out)(pos*1)
114 | 
115 | 	VMOVDQU 160(in)(pos*1), in5
116 | 	VPSRLQ  $4, in5, in5_h
117 | 	VPAND   mask, in5_h, in5_h
118 | 	VPAND   mask, in5, in5
119 | 	VPSHUFB in5_h, high_tbl, in5_h
120 | 	VPSHUFB in5, low_tbl, in5
121 | 	VPXOR   in5, in5_h, in5
122 | 	VMOVDQU in5, 160(out)(pos*1)
123 | 
124 | 	VMOVDQU 192(in)(pos*1), in0
125 | 	VPSRLQ  $4, in0, in0_h
126 | 	VPAND   mask, in0_h, in0_h
127 | 	VPAND   mask, in0, in0
128 | 	VPSHUFB in0_h, high_tbl, in0_h
129 | 	VPSHUFB in0, low_tbl, in0
130 | 	VPXOR   in0, in0_h, in0
131 | 	VMOVDQU in0, 192(out)(pos*1)
132 | 
133 | 	VMOVDQU 224(in)(pos*1), in1
134 | 	VPSRLQ  $4, in1, in1_h
135 | 	VPAND   mask, in1_h, in1_h
136 | 	VPAND   mask, in1, in1
137 | 	VPSHUFB in1_h, high_tbl, in1_h
138 | 	VPSHUFB in1, low_tbl, in1
139 | 	VPXOR   in1, in1_h, in1
140 | 	VMOVDQU in1, 224(out)(pos*1)
141 | 
142 | 	ADDQ $256, pos
143 | 	CMPQ len, pos
144 | 	JNE  loop256b
145 | 	VZEROUPPER
146 | 	RET
147 | 
148 | not_aligned:
149 | 	MOVQ len, tmp0
150 | 	ANDQ $255, tmp0
151 | 
152 | loop32b:
153 | 	VMOVDQU -32(in)(len*1), in0
154 | 	VPSRLQ  $4, in0, in0_h
155 | 	VPAND   mask, in0_h, in0_h
156 | 	VPAND   mask, in0, in0
157 | 	VPSHUFB in0_h, high_tbl, in0_h
158 | 	VPSHUFB in0, low_tbl, in0
159 | 	VPXOR   in0, in0_h, in0
160 | 	VMOVDQU in0, -32(out)(len*1)
161 | 	SUBQ    $32, len
162 | 	SUBQ    $32, tmp0
163 | 	JG      loop32b
164 | 	CMPQ    len, $256
165 | 	JGE     aligned
166 | 	VZEROUPPER
167 | 	RET
168 | 
169 | one16b:
170 | 	VMOVDQU -16(in)(len*1), in0x
171 | 	VPSRLQ  $4, in0x, in0_hx
172 | 	VPAND   maskx, in0x, in0x
173 | 	VPAND   maskx, in0_hx, in0_hx
174 | 	VPSHUFB in0_hx, high_tblx, in0_hx
175 | 	VPSHUFB in0x, low_tblx, in0x
176 | 	VPXOR   in0x, in0_hx, in0x
177 | 	VMOVDQU in0x, -16(out)(len*1)
178 | 	SUBQ    $16, len
179 | 	CMPQ    len, $0
180 | 	JNE     ymm
181 | 	RET
182 | 
183 | // func mulVectXORAVX2(tbl, input, output []byte)
184 | TEXT ·mulVectXORAVX2(SB), 4, $0
185 | 	MOVQ         i+24(FP), in
186 | 	MOVQ         o+48(FP), out
187 | 	MOVQ         tbl+0(FP), tmp0
188 | 	VMOVDQU      (tmp0), low_tblx
189 | 	VMOVDQU      16(tmp0), high_tblx
190 | 	MOVB         $0x0f, DX
191 | 	LONG         $0x2069e3c4; WORD $0x00d2
192 | 	VPBROADCASTB maskx, maskx
193 | 	MOVQ         in_len+32(FP), len
194 | 	TESTQ        $31, len
195 | 	JNZ          one16b
196 | 
197 | ymm:
198 | 	VINSERTI128 $1, low_tblx, low_tbl, low_tbl
199 | 	VINSERTI128 $1, high_tblx, high_tbl, high_tbl
200 | 	VINSERTI128 $1, maskx, mask, mask
201 | 	TESTQ       $255, len
202 | 	JNZ         not_aligned
203 | 
204 | aligned:
205 | 	MOVQ $0, pos
206 | 
207 | loop256b:
208 | 	VMOVDQU (in)(pos*1), in0
209 | 	VPSRLQ  $4, in0, in0_h
210 | 	VPAND   mask, in0_h, in0_h
211 | 	VPAND   mask, in0, in0
212 | 	VPSHUFB in0_h, high_tbl, in0_h
213 | 	VPSHUFB in0, low_tbl, in0
214 | 	VPXOR   in0, in0_h, in0
215 | 	VPXOR   (out)(pos*1), in0, in0
216 | 	VMOVDQU in0, (out)(pos*1)
217 | 
218 | 	VMOVDQU 32(in)(pos*1), in1
219 | 	VPSRLQ  $4, in1, in1_h
220 | 	VPAND   mask, in1_h, in1_h
221 | 	VPAND   mask, in1, in1
222 | 	VPSHUFB in1_h, high_tbl, in1_h
223 | 	VPSHUFB in1, low_tbl, in1
224 | 	VPXOR   in1, in1_h, in1
225 | 	VPXOR   32(out)(pos*1), in1, in1
226 | 	VMOVDQU in1, 32(out)(pos*1)
227 | 
228 | 	VMOVDQU 64(in)(pos*1), in2
229 | 	VPSRLQ  $4, in2, in2_h
230 | 	VPAND   mask, in2_h, in2_h
231 | 	VPAND   mask, in2, in2
232 | 	VPSHUFB in2_h, high_tbl, in2_h
233 | 	VPSHUFB in2, low_tbl, in2
234 | 	VPXOR   in2, in2_h, in2
235 | 	VPXOR   64(out)(pos*1), in2, in2
236 | 	VMOVDQU in2, 64(out)(pos*1)
237 | 
238 | 	VMOVDQU 96(in)(pos*1), in3
239 | 	VPSRLQ  $4, in3, in3_h
240 | 	VPAND   mask, in3_h, in3_h
241 | 	VPAND   mask, in3, in3
242 | 	VPSHUFB in3_h, high_tbl, in3_h
243 | 	VPSHUFB in3, low_tbl, in3
244 | 	VPXOR   in3, in3_h, in3
245 | 	VPXOR   96(out)(pos*1), in3, in3
246 | 	VMOVDQU in3, 96(out)(pos*1)
247 | 
248 | 	VMOVDQU 128(in)(pos*1), in4
249 | 	VPSRLQ  $4, in4, in4_h
250 | 	VPAND   mask, in4_h, in4_h
251 | 	VPAND   mask, in4, in4
252 | 	VPSHUFB in4_h, high_tbl, in4_h
253 | 	VPSHUFB in4, low_tbl, in4
254 | 	VPXOR   in4, in4_h, in4
255 | 	VPXOR   128(out)(pos*1), in4, in4
256 | 	VMOVDQU in4, 128(out)(pos*1)
257 | 
258 | 	VMOVDQU 160(in)(pos*1), in5
259 | 	VPSRLQ  $4, in5, in5_h
260 | 	VPAND   mask, in5_h, in5_h
261 | 	VPAND   mask, in5, in5
262 | 	VPSHUFB in5_h, high_tbl, in5_h
263 | 	VPSHUFB in5, low_tbl, in5
264 | 	VPXOR   in5, in5_h, in5
265 | 	VPXOR   160(out)(pos*1), in5, in5
266 | 	VMOVDQU in5, 160(out)(pos*1)
267 | 
268 | 	VMOVDQU 192(in)(pos*1), in0
269 | 	VPSRLQ  $4, in0, in0_h
270 | 	VPAND   mask, in0_h, in0_h
271 | 	VPAND   mask, in0, in0
272 | 	VPSHUFB in0_h, high_tbl, in0_h
273 | 	VPSHUFB in0, low_tbl, in0
274 | 	VPXOR   in0, in0_h, in0
275 | 	VPXOR   192(out)(pos*1), in0, in0
276 | 	VMOVDQU in0, 192(out)(pos*1)
277 | 
278 | 	VMOVDQU 224(in)(pos*1), in1
279 | 	VPSRLQ  $4, in1, in1_h
280 | 	VPAND   mask, in1_h, in1_h
281 | 	VPAND   mask, in1, in1
282 | 	VPSHUFB in1_h, high_tbl, in1_h
283 | 	VPSHUFB in1, low_tbl, in1
284 | 	VPXOR   in1, in1_h, in1
285 | 	VPXOR   224(out)(pos*1), in1, in1
286 | 	VMOVDQU in1, 224(out)(pos*1)
287 | 
288 | 	ADDQ $256, pos
289 | 	CMPQ len, pos
290 | 	JNE  loop256b
291 | 	VZEROUPPER
292 | 	RET
293 | 
294 | not_aligned:
295 | 	MOVQ len, tmp0
296 | 	ANDQ $255, tmp0
297 | 
298 | loop32b:
299 | 	VMOVDQU -32(in)(len*1), in0
300 | 	VPSRLQ  $4, in0, in0_h
301 | 	VPAND   mask, in0_h, in0_h
302 | 	VPAND   mask, in0, in0
303 | 	VPSHUFB in0_h, high_tbl, in0_h
304 | 	VPSHUFB in0, low_tbl, in0
305 | 	VPXOR   in0, in0_h, in0
306 | 	VPXOR   -32(out)(len*1), in0, in0
307 | 	VMOVDQU in0, -32(out)(len*1)
308 | 	SUBQ    $32, len
309 | 	SUBQ    $32, tmp0
310 | 	JG      loop32b
311 | 	CMPQ    len, $256
312 | 	JGE     aligned
313 | 	VZEROUPPER
314 | 	RET
315 | 
316 | one16b:
317 | 	VMOVDQU -16(in)(len*1), in0x
318 | 	VPSRLQ  $4, in0x, in0_hx
319 | 	VPAND   maskx, in0x, in0x
320 | 	VPAND   maskx, in0_hx, in0_hx
321 | 	VPSHUFB in0_hx, high_tblx, in0_hx
322 | 	VPSHUFB in0x, low_tblx, in0x
323 | 	VPXOR   in0x, in0_hx, in0x
324 | 	VPXOR   -16(out)(len*1), in0x, in0x
325 | 	VMOVDQU in0x, -16(out)(len*1)
326 | 	SUBQ    $16, len
327 | 	CMPQ    len, $0
328 | 	JNE     ymm
329 | 	RET
330 | 


--------------------------------------------------------------------------------
/rs_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2017 Temple3x (temple3x@gmail.com)
  2 | //
  3 | // Use of this source code is governed by the MIT License
  4 | // that can be found in the LICENSE file.
  5 | 
  6 | package reedsolomon
  7 | 
  8 | import (
  9 | 	"bytes"
 10 | 	"fmt"
 11 | 	"math"
 12 | 	"math/rand"
 13 | 	"sync"
 14 | 	"testing"
 15 | 	"time"
 16 | )
 17 | 
 18 | const (
 19 | 	testDataNum   = 10
 20 | 	testParityNum = 4
 21 | 	testSize      = kib // enough for covering branches when using SIMD & GF8
 22 | )
 23 | 
 24 | // Check basic matrix multiply.
 25 | // Powered by MATLAB.
 26 | func TestRS_mul(t *testing.T) {
 27 | 	d, p := 5, 5
 28 | 	r, err := New(d, p)
 29 | 	if err != nil {
 30 | 		t.Fatal(err)
 31 | 	}
 32 | 	vects := [][]byte{{0}, {4}, {2}, {6}, {8}, {0}, {0}, {0}, {0}, {0}}
 33 | 	_ = r.mul(vects)
 34 | 	if vects[5][0] != 97 {
 35 | 		t.Fatal("vect 5 mismatch")
 36 | 	}
 37 | 	if vects[6][0] != 173 {
 38 | 		t.Fatal("vect 6 mismatch")
 39 | 	}
 40 | 	if vects[7][0] != 218 {
 41 | 		t.Fatal("vect 7 mismatch")
 42 | 	}
 43 | 	if vects[8][0] != 107 {
 44 | 		t.Fatal("vect 8 mismatch")
 45 | 	}
 46 | 	if vects[9][0] != 110 {
 47 | 		t.Fatal("vect 9 mismatch")
 48 | 	}
 49 | }
 50 | 
 51 | // generate_matrix * vects, basic matrix multiply.
 52 | // For verifying encoding.
 53 | func (r *RS) mul(vects [][]byte) error {
 54 | 	r.GenMatrix.mul(vects, r.DataNum, r.ParityNum, len(vects[0]))
 55 | 	return nil
 56 | }
 57 | 
 58 | func (m matrix) mul(vects [][]byte, input, output, n int) {
 59 | 	src := vects[:input]
 60 | 	out := vects[input:]
 61 | 	for i := 0; i < output; i++ {
 62 | 		for j := 0; j < n; j++ {
 63 | 			var s uint8
 64 | 			for k := 0; k < input; k++ {
 65 | 				s ^= gfMul(src[k][j], m[i*input+k])
 66 | 			}
 67 | 			out[i][j] = s
 68 | 		}
 69 | 	}
 70 | }
 71 | 
 72 | func TestRS_Encode(t *testing.T) {
 73 | 	rand.Seed(time.Now().UnixNano())
 74 | 
 75 | 	d, p := testDataNum, testParityNum
 76 | 	max := testSize
 77 | 
 78 | 	testEncode(t, d, p, max, featNoSIMD, featUnknown)
 79 | 
 80 | 	switch getCPUFeature() {
 81 | 	case featAVX2:
 82 | 		testEncode(t, d, p, max, featAVX2, featNoSIMD) // comparing with verified feature for speeding up.
 83 | 	}
 84 | }
 85 | 
 86 | func testEncode(t *testing.T, d, p, maxSize, feat, cmpFeat int) {
 87 | 
 88 | 	fs := featToStr(feat)
 89 | 	cmpfs := featToStr(cmpFeat)
 90 | 
 91 | 	for size := 1; size <= maxSize; size++ {
 92 | 		exp := make([][]byte, d+p)
 93 | 		act := make([][]byte, d+p)
 94 | 		for j := 0; j < d+p; j++ {
 95 | 			exp[j], act[j] = make([]byte, size), make([]byte, size)
 96 | 		}
 97 | 		for j := 0; j < d; j++ {
 98 | 			fillRandom(exp[j])
 99 | 			copy(act[j], exp[j])
100 | 		}
101 | 		r, err := newWithFeature(d, p, feat)
102 | 		if err != nil {
103 | 			t.Fatal(err)
104 | 		}
105 | 		err = r.Encode(act)
106 | 		if err != nil {
107 | 			t.Fatal(err)
108 | 		}
109 | 
110 | 		var f func(vects [][]byte) error
111 | 		if cmpFeat == featUnknown {
112 | 			f = r.mul
113 | 		} else {
114 | 			r2, err := newWithFeature(d, p, cmpFeat)
115 | 			if err != nil {
116 | 				t.Fatal(err)
117 | 			}
118 | 			f = r2.Encode
119 | 		}
120 | 		err = f(exp)
121 | 		if err != nil {
122 | 			t.Fatal(err)
123 | 		}
124 | 
125 | 		for j := range exp {
126 | 			if !bytes.Equal(exp[j], act[j]) {
127 | 				t.Fatalf("%s mismatched with %s: %d+%d, vect: %d, size: %d",
128 | 					fs, cmpfs, d, p, j, size)
129 | 			}
130 | 		}
131 | 	}
132 | 
133 | 	t.Logf("%s matched %s: %d+%d, size: [1, %d)",
134 | 		fs, cmpfs, d, p, maxSize+1)
135 | }
136 | 
137 | func TestMakeInverseCacheKey(t *testing.T) {
138 | 
139 | 	type tc struct {
140 | 		survived []int
141 | 		exp      uint64
142 | 	}
143 | 	cases := []tc{
144 | 		{[]int{0}, 1},
145 | 		{[]int{1}, 2},
146 | 		{[]int{0, 1}, 3},
147 | 		{[]int{0, 1, 2}, 7},
148 | 		{[]int{0, 2}, 5},
149 | 	}
150 | 	survived := make([]int, 64)
151 | 	for i := range survived {
152 | 		survived[i] = i
153 | 	}
154 | 	cases = append(cases, tc{survived, math.MaxUint64})
155 | 	for i, c := range cases {
156 | 		got := makeInverseCacheKey(c.survived)
157 | 		if got != c.exp {
158 | 			t.Fatalf("case: %d, exp: %d, got: %d, survived: %#v", i, c.exp, got, c.survived)
159 | 		}
160 | 	}
161 | }
162 | 
163 | func TestRS_Reconst(t *testing.T) {
164 | 	rand.Seed(time.Now().UnixNano())
165 | 
166 | 	testReconst(t, testDataNum, testParityNum, testSize, 128)
167 | }
168 | 
169 | func testReconst(t *testing.T, d, p, size, loop int) {
170 | 
171 | 	r, err := New(d, p)
172 | 	if err != nil {
173 | 		t.Fatal(err)
174 | 	}
175 | 
176 | 	for i := 0; i < loop; i++ {
177 | 
178 | 		exp := make([][]byte, d+p)
179 | 		act := make([][]byte, d+p)
180 | 		for j := 0; j < d+p; j++ {
181 | 			exp[j], act[j] = make([]byte, size), make([]byte, size)
182 | 		}
183 | 		for j := 0; j < d; j++ {
184 | 			fillRandom(exp[j])
185 | 		}
186 | 
187 | 		err = r.Encode(exp)
188 | 		if err != nil {
189 | 			t.Fatal(err)
190 | 		}
191 | 
192 | 		survived, needReconst := genIdxForTest(d, p, rand.Intn(d+p), rand.Intn(p+1))
193 | 		for _, i := range survived {
194 | 			copy(act[i], exp[i])
195 | 		}
196 | 
197 | 		// Pollute vectors need to be reconstructed.
198 | 		for _, nr := range needReconst {
199 | 			if rand.Intn(4) == 1 { // 1/4 chance.
200 | 				fillRandom(act[nr])
201 | 			}
202 | 		}
203 | 
204 | 		err = r.Reconst(act, survived, needReconst)
205 | 		if err != nil {
206 | 			t.Fatal(err)
207 | 		}
208 | 
209 | 		for _, n := range needReconst {
210 | 			if !bytes.Equal(exp[n], act[n]) {
211 | 				t.Fatalf("mismatched vect: %d, size: %d", n, size)
212 | 			}
213 | 		}
214 | 	}
215 | }
216 | 
217 | func TestRS_Update(t *testing.T) {
218 | 	rand.Seed(time.Now().UnixNano())
219 | 
220 | 	testUpdate(t, testDataNum, testParityNum, testSize)
221 | }
222 | 
223 | func testUpdate(t *testing.T, d, p, size int) {
224 | 
225 | 	for i := 0; i < d; i++ {
226 | 		act := make([][]byte, d+p)
227 | 		exp := make([][]byte, d+p)
228 | 		for j := 0; j < d+p; j++ {
229 | 			act[j], exp[j] = make([]byte, size), make([]byte, size)
230 | 		}
231 | 		for j := 0; j < d; j++ {
232 | 			fillRandom(exp[j])
233 | 			copy(act[j], exp[j])
234 | 		}
235 | 
236 | 		r, err := New(d, p)
237 | 		if err != nil {
238 | 			t.Fatal(err)
239 | 		}
240 | 		err = r.Encode(act)
241 | 		if err != nil {
242 | 			t.Fatal(err)
243 | 		}
244 | 
245 | 		newData := make([]byte, size)
246 | 		fillRandom(newData)
247 | 		updateRow := i
248 | 		err = r.Update(act[updateRow], newData, updateRow, act[d:d+p])
249 | 		if err != nil {
250 | 			t.Fatal(err)
251 | 		}
252 | 
253 | 		copy(exp[updateRow], newData)
254 | 		err = r.Encode(exp)
255 | 		if err != nil {
256 | 			t.Fatal(err)
257 | 		}
258 | 		for j := d; j < d+p; j++ {
259 | 			if !bytes.Equal(act[j], exp[j]) {
260 | 				t.Fatalf("update failed: vect: %d, size: %d", j, size)
261 | 			}
262 | 		}
263 | 	}
264 | }
265 | 
266 | func TestRS_Replace(t *testing.T) {
267 | 	rand.Seed(time.Now().UnixNano())
268 | 
269 | 	testReplace(t, testDataNum, testParityNum, testSize, 128, true)
270 | 	testReplace(t, testDataNum, testParityNum, testSize, 128, false)
271 | }
272 | 
273 | func testReplace(t *testing.T, d, p, size, loop int, toZero bool) {
274 | 
275 | 	for i := 0; i < loop; i++ {
276 | 		replaceRows := makeReplaceRowRandom(d)
277 | 		act := make([][]byte, d+p)
278 | 		exp := make([][]byte, d+p)
279 | 		for j := 0; j < d+p; j++ {
280 | 			act[j], exp[j] = make([]byte, size), make([]byte, size)
281 | 		}
282 | 		for j := 0; j < d; j++ {
283 | 			fillRandom(exp[j])
284 | 			copy(act[j], exp[j])
285 | 		}
286 | 
287 | 		data := make([][]byte, len(replaceRows))
288 | 		for i, rr := range replaceRows {
289 | 			data[i] = make([]byte, size)
290 | 			copy(data[i], exp[rr])
291 | 		}
292 | 
293 | 		if toZero {
294 | 			for _, rr := range replaceRows {
295 | 				exp[rr] = make([]byte, size)
296 | 			}
297 | 		}
298 | 
299 | 		r, err := New(d, p)
300 | 		if err != nil {
301 | 			t.Fatal(err)
302 | 		}
303 | 		err = r.Encode(exp)
304 | 		if err != nil {
305 | 			t.Fatal(err)
306 | 		}
307 | 
308 | 		if !toZero {
309 | 			for _, rr := range replaceRows {
310 | 				act[rr] = make([]byte, size)
311 | 			}
312 | 		}
313 | 		err = r.Encode(act)
314 | 		if err != nil {
315 | 			t.Fatal(err)
316 | 		}
317 | 
318 | 		err = r.Replace(data, replaceRows, act[d:])
319 | 		if err != nil {
320 | 			t.Fatal(err)
321 | 		}
322 | 
323 | 		for j := d; j < d+p; j++ {
324 | 			if !bytes.Equal(act[j], exp[j]) {
325 | 				t.Fatalf("replace failed: vect: %d, size: %d", j, size)
326 | 			}
327 | 		}
328 | 	}
329 | }
330 | 
331 | func makeReplaceRowRandom(d int) []int {
332 | 	rand.Seed(time.Now().UnixNano())
333 | 
334 | 	n := rand.Intn(d + 1)
335 | 	s := make([]int, 0)
336 | 	c := 0
337 | 	for i := 0; i < 64; i++ {
338 | 		if c == n {
339 | 			break
340 | 		}
341 | 		v := rand.Intn(d)
342 | 		if !isIn(v, s) {
343 | 			s = append(s, v)
344 | 			c++
345 | 		}
346 | 	}
347 | 	if c == 0 {
348 | 		s = []int{0}
349 | 	}
350 | 	return s
351 | }
352 | 
353 | func TestRS_getReconstMatrixFromCache(t *testing.T) {
354 | 	d, p := 64, 64 // Big enough for showing cache effects.
355 | 	r, err := New(d, p)
356 | 	if err != nil {
357 | 		t.Fatal(err)
358 | 	}
359 | 	// Enable Cache.
360 | 	r.inverseCacheEnabled = true
361 | 	r.inverseCache = new(sync.Map)
362 | 	r.inverseCacheMax = 1
363 | 
364 | 	rand.Seed(time.Now().UnixNano())
365 | 
366 | 	var survived, needReconst []int // genReconstMatrix needs survived vectors & data vectors need to be reconstructed.
367 | 	for {
368 | 		var needReconstData int
369 | 		survived, needReconst = genIdxForTest(d, p, d, p)
370 | 		survived, needReconst, needReconstData, err = r.checkReconst(survived, needReconst)
371 | 		if err != nil {
372 | 			t.Fatal(err)
373 | 		}
374 | 		if needReconstData != 0 { // At least has one.
375 | 			needReconst = needReconst[:needReconstData]
376 | 			break
377 | 		}
378 | 	}
379 | 
380 | 	start1 := time.Now()
381 | 	exp, err := r.getReconstMatrix(survived, needReconst)
382 | 	if err != nil {
383 | 		t.Fatal(err)
384 | 	}
385 | 	cost1 := time.Now().Sub(start1)
386 | 
387 | 	start2 := time.Now()
388 | 	act, err := r.getReconstMatrix(survived, needReconst)
389 | 	if err != nil {
390 | 		t.Fatal(err)
391 | 	}
392 | 	cost2 := time.Now().Sub(start2)
393 | 
394 | 	if cost2 >= cost1 {
395 | 		t.Fatal("cache is much slower than expect")
396 | 	}
397 | 
398 | 	if !bytes.Equal(act, exp) {
399 | 		t.Fatal("cache matrix mismatched")
400 | 	}
401 | }
402 | 
403 | func BenchmarkRS_Encode(b *testing.B) {
404 | 	dps := [][]int{
405 | 		{10, 2},
406 | 		{10, 4},
407 | 		{12, 4},
408 | 	}
409 | 
410 | 	sizes := []int{
411 | 		8 * kib, // Hot
412 | 		mib,     // Cold
413 | 	}
414 | 
415 | 	var feats []int
416 | 	switch getCPUFeature() {
417 | 	case featAVX2:
418 | 		feats = append(feats, featAVX2)
419 | 	}
420 | 	feats = append(feats, featNoSIMD)
421 | 
422 | 	for _, feat := range feats {
423 | 		for _, dp := range dps {
424 | 			d, p := dp[0], dp[1]
425 | 			for _, size := range sizes {
426 | 				b.Run(fmt.Sprintf("(%d+%d)-%s-%s", d, p, byteToStr(size), featToStr(feat)), func(b *testing.B) {
427 | 					benchEnc(b, d, p, size, feat)
428 | 				})
429 | 			}
430 | 		}
431 | 	}
432 | }
433 | 
434 | func benchEnc(b *testing.B, d, p, size, feat int) {
435 | 
436 | 	vects := make([][]byte, d+p)
437 | 	for j := 0; j < d+p; j++ {
438 | 		vects[j] = make([]byte, size)
439 | 	}
440 | 	for j := 0; j < d; j++ {
441 | 		fillRandom(vects[j])
442 | 	}
443 | 	r, err := newWithFeature(d, p, feat)
444 | 	if err != nil {
445 | 		b.Fatal(err)
446 | 	}
447 | 
448 | 	b.SetBytes(int64((d + p) * size))
449 | 	b.ResetTimer()
450 | 	for i := 0; i < b.N; i++ {
451 | 		err = r.Encode(vects)
452 | 		if err != nil {
453 | 			b.Fatal(err)
454 | 		}
455 | 	}
456 | }
457 | 
458 | func BenchmarkRS_Reconst(b *testing.B) {
459 | 	d, p := 10, 4
460 | 	size := 8 * kib
461 | 
462 | 	for i := 1; i <= p; i++ {
463 | 		survived, needReconst := genIdxForTest(d, p, d+p-i, i)
464 | 		b.Run(fmt.Sprintf("(%d+%d)-%s-reconst_%d_data_vects-%s",
465 | 			d, p, byteToStr(size), i, featToStr(getCPUFeature())),
466 | 			func(b *testing.B) { benchReconst(b, d, p, size, survived, needReconst) })
467 | 	}
468 | }
469 | 
470 | func benchReconst(b *testing.B, d, p, size int, survived, needReconst []int) {
471 | 	vects := make([][]byte, d+p)
472 | 	for j := 0; j < d+p; j++ {
473 | 		vects[j] = make([]byte, size)
474 | 	}
475 | 	for j := 0; j < d; j++ {
476 | 		fillRandom(vects[j])
477 | 	}
478 | 	r, err := New(d, p)
479 | 	if err != nil {
480 | 		b.Fatal(err)
481 | 	}
482 | 	err = r.Encode(vects)
483 | 	if err != nil {
484 | 		b.Fatal(err)
485 | 	}
486 | 
487 | 	b.SetBytes(int64((d + len(needReconst)) * size))
488 | 	b.ResetTimer()
489 | 	for i := 0; i < b.N; i++ {
490 | 		err = r.Reconst(vects, survived, needReconst)
491 | 		if err != nil {
492 | 			b.Fatal(err)
493 | 		}
494 | 	}
495 | }
496 | 
497 | func BenchmarkRS_checkReconst(b *testing.B) {
498 | 	dps := [][2]int{
499 | 		{10, 4},
500 | 	}
501 | 	for _, dp := range dps {
502 | 		d := dp[0]
503 | 		p := dp[1]
504 | 		r, err := New(d, p)
505 | 		if err != nil {
506 | 			b.Fatal(err)
507 | 		}
508 | 		for i := 1; i <= p; i++ {
509 | 			is, ir := genIdxForTest(d, p, d, i)
510 | 			b.Run(fmt.Sprintf("d:%d,p:%d,survived:%d,need_reconst:%d", d, p, len(is), len(ir)),
511 | 				func(b *testing.B) {
512 | 					b.ResetTimer()
513 | 					for j := 0; j < b.N; j++ {
514 | 						_, _, _, err = r.checkReconst(is, ir)
515 | 						if err != nil {
516 | 							b.Fatal(err)
517 | 						}
518 | 					}
519 | 				})
520 | 		}
521 | 	}
522 | }
523 | 
524 | func BenchmarkRS_Update(b *testing.B) {
525 | 	d, p := 10, 4
526 | 	size := 8 * kib
527 | 
528 | 	updateRow := rand.Intn(d)
529 | 	b.Run(fmt.Sprintf("(%d+%d)-%s-%s",
530 | 		d, p, byteToStr(size), featToStr(getCPUFeature())),
531 | 		func(b *testing.B) { benchUpdate(b, d, p, size, updateRow) })
532 | }
533 | 
534 | func benchUpdate(b *testing.B, d, p, size, updateRow int) {
535 | 	vects := make([][]byte, d+p)
536 | 	for j := 0; j < d+p; j++ {
537 | 		vects[j] = make([]byte, size)
538 | 	}
539 | 	for j := 0; j < d; j++ {
540 | 		fillRandom(vects[j])
541 | 	}
542 | 	r, err := New(d, p)
543 | 	if err != nil {
544 | 		b.Fatal(err)
545 | 	}
546 | 	err = r.Encode(vects)
547 | 	if err != nil {
548 | 		b.Fatal(err)
549 | 	}
550 | 
551 | 	newData := make([]byte, size)
552 | 	fillRandom(newData)
553 | 
554 | 	b.SetBytes(int64((p + 2 + p) * size))
555 | 	b.ResetTimer()
556 | 	for i := 0; i < b.N; i++ {
557 | 		err = r.Update(vects[updateRow], newData, updateRow, vects[d:])
558 | 		if err != nil {
559 | 			b.Fatal(err)
560 | 		}
561 | 	}
562 | }
563 | 
564 | func BenchmarkRS_Replace(b *testing.B) {
565 | 	d, p := 10, 4
566 | 	size := 8 * kib
567 | 
568 | 	for i := 1; i <= d-p; i++ {
569 | 		b.Run(fmt.Sprintf("(%d+%d)-%s-replace_%d_data_vects-%s",
570 | 			d, p, byteToStr(size), i, featToStr(getCPUFeature())),
571 | 			func(b *testing.B) { benchReplace(b, d, p, size, i) })
572 | 	}
573 | }
574 | 
575 | func benchReplace(b *testing.B, d, p, size, n int) {
576 | 	vects := make([][]byte, d+p)
577 | 	for j := 0; j < d+p; j++ {
578 | 		vects[j] = make([]byte, size)
579 | 	}
580 | 	for j := 0; j < d; j++ {
581 | 		fillRandom(vects[j])
582 | 	}
583 | 	r, err := New(d, p)
584 | 	if err != nil {
585 | 		b.Fatal(err)
586 | 	}
587 | 	err = r.Encode(vects)
588 | 	if err != nil {
589 | 		b.Fatal(err)
590 | 	}
591 | 
592 | 	updateRows := make([]int, n)
593 | 	for i := range updateRows {
594 | 		updateRows[i] = i
595 | 	}
596 | 	b.SetBytes(int64((n + p + p) * size))
597 | 	b.ResetTimer()
598 | 	for i := 0; i < b.N; i++ {
599 | 		err = r.Replace(vects[:n], updateRows, vects[d:])
600 | 		if err != nil {
601 | 			b.Fatal(err)
602 | 		}
603 | 	}
604 | }
605 | 


--------------------------------------------------------------------------------
/rs.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2017 Temple3x (temple3x@gmail.com)
  2 | //
  3 | // Use of this source code is governed by the MIT License
  4 | // that can be found in the LICENSE file.
  5 | 
  6 | // Package reedsolomon implements Erasure Codes (systematic codes),
  7 | // it's based on:
  8 | // Reed-Solomon Codes over GF(2^8).
  9 | // Primitive Polynomial:  x^8+x^4+x^3+x^2+1.
 10 | //
 11 | // Galois Filed arithmetic using Intel SIMD instructions (AVX512 or AVX2).
 12 | package reedsolomon
 13 | 
 14 | import (
 15 | 	"errors"
 16 | 	"sync"
 17 | 	"sync/atomic"
 18 | 
 19 | 	"github.com/templexxx/cpu"
 20 | 	xor "github.com/templexxx/xorsimd"
 21 | )
 22 | 
 23 | // RS Reed-Solomon Codes receiver.
 24 | type RS struct {
 25 | 	DataNum   int // DataNum is the number of data row vectors.
 26 | 	ParityNum int // ParityNum is the number of parity row vectors.
 27 | 
 28 | 	// CPU's feature.
 29 | 	// With SIMD feature, performance will be much better.
 30 | 	cpuFeat int
 31 | 
 32 | 	encMatrix matrix // Encoding matrix.
 33 | 	GenMatrix matrix // Generator matrix.
 34 | 
 35 | 	inverseCacheEnabled bool
 36 | 	inverseCache        *sync.Map // Inverse matrix's cache.
 37 | 	// Limitation of cache, total inverse matrix = C(DataNum+ParityNum, DataNum)
 38 | 	// = (DataNum+ParityNum)! / ParityNum!DataNum!
 39 | 	// If there is no limitation, memory will explode. See mathtool/cntinverse for details.
 40 | 	inverseCacheMax uint64
 41 | 	inverseCacheN   uint64 // cached inverse matrix.
 42 | 
 43 | 	*gmu
 44 | }
 45 | 
 46 | var ErrIllegalVects = errors.New("illegal data/parity number: <= 0 or data+parity > 256")
 47 | 
 48 | const (
 49 | 	maxVects                   = 256
 50 | 	kib                        = 1024
 51 | 	mib                        = 1024 * kib
 52 | 	maxInverseMatrixCapInCache = 16 * mib // Keeping inverse matrix cache small, 16 MiB is enough for most cases.
 53 | )
 54 | 
 55 | // New create an RS with specific data & parity numbers.
 56 | func New(dataNum, parityNum int) (r *RS, err error) {
 57 | 
 58 | 	return newWithFeature(dataNum, parityNum, featUnknown)
 59 | }
 60 | 
 61 | func newWithFeature(dataNum, parityNum, feat int) (r *RS, err error) {
 62 | 	d, p := dataNum, parityNum
 63 | 	if d <= 0 || p <= 0 || d+p > maxVects {
 64 | 		return nil, ErrIllegalVects
 65 | 	}
 66 | 
 67 | 	e := makeEncodeMatrix(d, p)
 68 | 	g := e[d*d:]
 69 | 	r = &RS{DataNum: d, ParityNum: p,
 70 | 		encMatrix: e, GenMatrix: g}
 71 | 
 72 | 	if r.DataNum+r.ParityNum <= 64 { // I'm using 64bit bitmap as inverse matrix cache's key.
 73 | 		r.inverseCacheEnabled = true
 74 | 		r.inverseCache = new(sync.Map)
 75 | 		r.inverseCacheMax = maxInverseMatrixCapInCache / uint64(r.DataNum) / uint64(r.DataNum)
 76 | 	}
 77 | 
 78 | 	r.cpuFeat = feat
 79 | 	if r.cpuFeat == featUnknown {
 80 | 		r.cpuFeat = getCPUFeature()
 81 | 	}
 82 | 
 83 | 	r.gmu = new(gmu)
 84 | 	r.initFunc(r.cpuFeat)
 85 | 
 86 | 	return
 87 | }
 88 | 
 89 | // CPU Features.
 90 | const (
 91 | 	featUnknown = iota
 92 | 	featAVX2
 93 | 	featNoSIMD
 94 | )
 95 | 
 96 | func getCPUFeature() int {
 97 | 	if cpu.X86.HasAVX2 {
 98 | 		return featAVX2
 99 | 	}
100 | 	return featNoSIMD
101 | }
102 | 
103 | // Encode encodes data for generating parity.
104 | // It multiplies generator matrix by vects[:r.DataNum] to get parity vectors,
105 | // and write into vects[r.DataNum:].
106 | func (r *RS) Encode(vects [][]byte) (err error) {
107 | 	err = r.checkEncode(vects)
108 | 	if err != nil {
109 | 		return
110 | 	}
111 | 	r.encode(vects, false)
112 | 	return
113 | }
114 | 
115 | var (
116 | 	ErrMismatchVects    = errors.New("too few/many vectors given")
117 | 	ErrZeroVectSize     = errors.New("vector size is 0")
118 | 	ErrMismatchVectSize = errors.New("vectors size mismatched")
119 | )
120 | 
121 | func (r *RS) checkEncode(vects [][]byte) (err error) {
122 | 	rows := len(vects)
123 | 	if r.DataNum+r.ParityNum != rows {
124 | 		return ErrMismatchVects
125 | 	}
126 | 	size := len(vects[0])
127 | 	if size == 0 {
128 | 		return ErrZeroVectSize
129 | 	}
130 | 	for i := 1; i < rows; i++ {
131 | 		if len(vects[i]) != size {
132 | 			return ErrMismatchVectSize
133 | 		}
134 | 	}
135 | 	return
136 | }
137 | 
138 | // encode data piece by piece.
139 | // Split vectors for cache-friendly (see func getSplitSize(n int) int for details).
140 | //
141 | // updateOnly: means update old results by XOR new results, but not write new results directly.
142 | // You can see Methods Encode and Update to figure out difference.
143 | func (r *RS) encode(vects [][]byte, updateOnly bool) {
144 | 	dv, pv := vects[:r.DataNum], vects[r.DataNum:]
145 | 	size := len(vects[0])
146 | 	splitSize := getSplitSize(size)
147 | 	start := 0
148 | 	for start < size {
149 | 		end := start + splitSize
150 | 		if end > size {
151 | 			end = size
152 | 		}
153 | 		r.encodePart(start, end, dv, pv, updateOnly)
154 | 		start = end
155 | 	}
156 | }
157 | 
158 | // size must be divisible by 16,
159 | // it's the smallest size for SIMD instructions,
160 | // see code block one16b in *_amd64.s for more details.
161 | func getSplitSize(n int) int {
162 | 	l1d := cpu.X86.Cache.L1D
163 | 	if l1d <= 0 { // Cannot detect cache size(-1) or CPU is not X86(0).
164 | 		l1d = 32 * 1024
165 | 	}
166 | 
167 | 	if n < 16 {
168 | 		return 16
169 | 	}
170 | 	// Half of L1 Data Cache Size is an empirical data.
171 | 	// Fit L1 Data Cache Size, but won't pollute too much in the next round.
172 | 	if n < l1d/2 {
173 | 		return (n >> 4) << 4
174 | 	}
175 | 	return l1d / 2
176 | }
177 | 
178 | func (r *RS) encodePart(start, end int, dv, pv [][]byte, updateOnly bool) {
179 | 	undone := end - start
180 | 	do := (undone >> 4) << 4 // do could be 0(when undone < 16)
181 | 	d, p, g := r.DataNum, r.ParityNum, r.GenMatrix
182 | 	if do >= 16 {
183 | 		end2 := start + do
184 | 		for i := 0; i < d; i++ {
185 | 			for j := 0; j < p; j++ {
186 | 				if i != 0 || updateOnly {
187 | 					r.mulVectXOR(g[j*d+i], dv[i][start:end2], pv[j][start:end2])
188 | 				} else {
189 | 					r.mulVect(g[j*d+i], dv[0][start:end2], pv[j][start:end2])
190 | 				}
191 | 			}
192 | 		}
193 | 	}
194 | 
195 | 	if undone > do { // 0 < undone-do < 16
196 | 		for i := 0; i < d; i++ {
197 | 			for j := 0; j < p; j++ {
198 | 				if i != 0 || updateOnly {
199 | 					mulVectXORNoSIMD(g[j*d+i], dv[i][start:end], pv[j][start:end])
200 | 				} else {
201 | 					mulVectNoSIMD(g[j*d], dv[0][start:end], pv[j][start:end])
202 | 				}
203 | 			}
204 | 		}
205 | 	}
206 | }
207 | 
208 | // Reconst reconstructs missing vectors,
209 | // vects: All vectors, len(vects) = dataNum + parityNum.
210 | // survived: Survived data & parity indexes, len(survived) must >= dataNum.
211 | // needReconst: Vectors index which need to be reconstructed.
212 | // needReconst has higher priority than survived:
213 | // e.g., survived: [1,2,3] needReconst [0,1] -> survived: [2,3] needReconst [0,1]
214 | // When len(survived) == 0, assuming all vectors survived, will be refreshed by needReconst later:
215 | // survived vectors must have correct data.
216 | //
217 | // e.g.,:
218 | // in 3+2, the whole index: [0,1,2,3,4],
219 | // if vects[0,4] are lost & they need to be reconstructed
220 | // (Maybe you only need to reconstruct vects[0] when lost vects[0,4], so the needReconst should be [0], but not [0,4]).
221 | // the survived will be [1,2,3] ,and you must be sure that vects[1,2,3] have correct data,
222 | // results will be written into vects[needReconst] directly.
223 | func (r *RS) Reconst(vects [][]byte, survived, needReconst []int) (err error) {
224 | 
225 | 	var dataNeedReconstN int
226 | 	survived, needReconst, dataNeedReconstN, err = r.checkReconst(survived, needReconst)
227 | 	if err != nil {
228 | 		if err == ErrNoNeedReconst {
229 | 			return nil
230 | 		}
231 | 		return
232 | 	}
233 | 
234 | 	err = r.reconstData(vects, survived, needReconst[:dataNeedReconstN])
235 | 	if err != nil {
236 | 		return
237 | 	}
238 | 	return r.reconstParity(vects, needReconst[dataNeedReconstN:])
239 | }
240 | 
241 | var (
242 | 	ErrNoNeedReconst = errors.New("no need reconst")
243 | 	ErrTooManyLost   = errors.New("too many lost")
244 | )
245 | 
246 | const (
247 | 	vectUnknown     = uint8(0)
248 | 	vectSurvived    = uint8(1)
249 | 	vectNeedReconst = uint8(2)
250 | )
251 | 
252 | func checkVectIdx(idx []int, d, p int) error {
253 | 	n := d + p
254 | 	for _, i := range idx {
255 | 		if i < 0 || i >= n {
256 | 			return ErrIllegalVects
257 | 		}
258 | 	}
259 | 	return nil
260 | }
261 | 
262 | // check arguments, return:
263 | // 1. survived index
264 | // 2. data & parity indexes which needed to be reconstructed (sorted after return)
265 | // 3. cnt of data vectors needed to be reconstructed.
266 | func (r *RS) checkReconst(survived, needReconst []int) (vs, nr []int, dn int, err error) {
267 | 	if len(needReconst) == 0 {
268 | 		err = ErrNoNeedReconst
269 | 		return
270 | 	}
271 | 
272 | 	d, p := r.DataNum, r.ParityNum
273 | 
274 | 	if err = checkVectIdx(survived, d, p); err != nil {
275 | 		return
276 | 	}
277 | 	if err = checkVectIdx(needReconst, d, p); err != nil {
278 | 		return
279 | 	}
280 | 
281 | 	status := make([]uint8, d+p)
282 | 
283 | 	if len(survived) == 0 { // Set all survived if no given survived index.
284 | 		for i := range status {
285 | 			status[i] = vectSurvived
286 | 		}
287 | 	}
288 | 	for _, v := range survived {
289 | 		status[v] = vectSurvived
290 | 	}
291 | 
292 | 	fullDataRequired := false
293 | 	for _, v := range needReconst {
294 | 		status[v] = vectNeedReconst // Origin survived status will be replaced if they're conflicting.
295 | 		if !fullDataRequired && v >= d {
296 | 			fullDataRequired = true // Need to reconstruct parity, full data vectors required.
297 | 		}
298 | 	}
299 | 	if fullDataRequired {
300 | 		for i, v := range status[:d] {
301 | 			if v == vectUnknown {
302 | 				status[i] = vectNeedReconst
303 | 			}
304 | 		}
305 | 	}
306 | 
307 | 	ints := make([]int, d+2*p)
308 | 	vs = ints[:d+p][:0]
309 | 	nr = ints[d+p:][:0]
310 | 	for i, s := range status {
311 | 		switch s {
312 | 		case vectSurvived:
313 | 			vs = append(vs, i)
314 | 		case vectNeedReconst:
315 | 			if i < d {
316 | 				dn++
317 | 			}
318 | 			nr = append(nr, i)
319 | 		}
320 | 	}
321 | 
322 | 	if len(vs) < d || len(nr) > p {
323 | 		err = ErrTooManyLost
324 | 		return
325 | 	}
326 | 	return
327 | }
328 | 
329 | func (r *RS) reconstData(vects [][]byte, survived, needReconst []int) (err error) {
330 | 
331 | 	nn := len(needReconst)
332 | 	if nn == 0 {
333 | 		return nil
334 | 	}
335 | 
336 | 	d := r.DataNum
337 | 	survived = survived[:d] // Only need dataNum vectors for reconstruction.
338 | 
339 | 	gm, err := r.getReconstMatrix(survived, needReconst)
340 | 	if err != nil {
341 | 		return
342 | 	}
343 | 	vs := make([][]byte, d+nn)
344 | 	for i, row := range survived {
345 | 		vs[i] = vects[row]
346 | 	}
347 | 	for i, row := range needReconst {
348 | 		vs[i+d] = vects[row]
349 | 	}
350 | 	return r.reconst(vs, gm, nn)
351 | }
352 | 
353 | func (r *RS) reconstParity(vects [][]byte, needReconst []int) (err error) {
354 | 
355 | 	nn := len(needReconst)
356 | 	if nn == 0 {
357 | 		return nil
358 | 	}
359 | 
360 | 	d := r.DataNum
361 | 	gm := make([]byte, nn*d)
362 | 	for i, l := range needReconst {
363 | 		copy(gm[i*d:i*d+d], r.encMatrix[l*d:l*d+d])
364 | 	}
365 | 
366 | 	vs := make([][]byte, d+nn)
367 | 	for i := 0; i < d; i++ {
368 | 		vs[i] = vects[i]
369 | 	}
370 | 	for i, p := range needReconst {
371 | 		vs[i+d] = vects[p]
372 | 	}
373 | 
374 | 	return r.reconst(vs, gm, nn)
375 | }
376 | 
377 | func (r *RS) reconst(vects [][]byte, gm matrix, pn int) error {
378 | 
379 | 	rTmp := &RS{DataNum: r.DataNum, ParityNum: pn, GenMatrix: gm, cpuFeat: r.cpuFeat, gmu: r.gmu}
380 | 	return rTmp.Encode(vects)
381 | 
382 | }
383 | 
384 | func (r *RS) getReconstMatrix(survived, needReconst []int) (rm []byte, err error) {
385 | 
386 | 	if !r.inverseCacheEnabled {
387 | 		em, err2 := r.encMatrix.makeEncMatrixForReconst(survived)
388 | 		if err2 != nil {
389 | 			return nil, err2
390 | 		}
391 | 		return em.makeReconstMatrix(survived, needReconst)
392 | 	}
393 | 	return r.getReconstMatrixFromCache(survived, needReconst)
394 | }
395 | 
396 | func (r *RS) getReconstMatrixFromCache(survived, needReconst []int) (rm matrix, err error) {
397 | 
398 | 	key := makeInverseCacheKey(survived)
399 | 
400 | 	emRaw, ok := r.inverseCache.Load(key)
401 | 	if ok {
402 | 		em := emRaw.(matrix)
403 | 		return em.makeReconstMatrix(survived, needReconst)
404 | 	}
405 | 
406 | 	em, err := r.encMatrix.makeEncMatrixForReconst(survived)
407 | 	if err != nil {
408 | 		return
409 | 	}
410 | 	if atomic.AddUint64(&r.inverseCacheN, 1) <= r.inverseCacheMax {
411 | 		r.inverseCache.Store(key, em)
412 | 	}
413 | 	return em.makeReconstMatrix(survived, needReconst)
414 | }
415 | 
416 | func makeInverseCacheKey(survived []int) uint64 {
417 | 	var key uint64
418 | 	for _, i := range survived {
419 | 		key += 1 << uint8(i) // elements in survived are unique and sorted, okay to use add.
420 | 	}
421 | 	return key
422 | }
423 | 
424 | // Update updates parity_data when one data_vect changes.
425 | // row: It's the new data's index in the whole vectors.
426 | func (r *RS) Update(oldData []byte, newData []byte, row int, parity [][]byte) (err error) {
427 | 
428 | 	err = r.checkUpdate(oldData, newData, row, parity)
429 | 	if err != nil {
430 | 		return
431 | 	}
432 | 
433 | 	// Step1: old_data xor new_data.
434 | 	buf := make([]byte, len(oldData))
435 | 	xor.Encode(buf, [][]byte{oldData, newData})
436 | 
437 | 	// Step2: recalculate parity.
438 | 	vects := make([][]byte, 1+r.ParityNum)
439 | 	vects[0] = buf
440 | 	gm := make([]byte, r.ParityNum)
441 | 	for i := 0; i < r.ParityNum; i++ {
442 | 		col := row
443 | 		off := i*r.DataNum + col
444 | 		c := r.GenMatrix[off]
445 | 		gm[i] = c
446 | 		vects[i+1] = parity[i]
447 | 	}
448 | 	rs := &RS{DataNum: 1, ParityNum: r.ParityNum, GenMatrix: gm, cpuFeat: r.cpuFeat, gmu: r.gmu}
449 | 	rs.encode(vects, true)
450 | 	return nil
451 | }
452 | 
453 | var (
454 | 	ErrMismatchParityNum = errors.New("parity number mismatched")
455 | 	ErrIllegalVectIndex  = errors.New("illegal vect index")
456 | )
457 | 
458 | func (r *RS) checkUpdate(oldData []byte, newData []byte, row int, parity [][]byte) (err error) {
459 | 	if len(parity) != r.ParityNum {
460 | 		return ErrMismatchParityNum
461 | 	}
462 | 	size := len(newData)
463 | 	if size == 0 {
464 | 		return ErrZeroVectSize
465 | 	}
466 | 	if size != len(oldData) {
467 | 		return ErrMismatchVectSize
468 | 	}
469 | 
470 | 	for i := range parity {
471 | 		if len(parity[i]) != size {
472 | 			return ErrMismatchVectSize
473 | 		}
474 | 	}
475 | 	if row >= r.DataNum || row < 0 {
476 | 		return ErrIllegalVectIndex
477 | 	}
478 | 	return
479 | }
480 | 
481 | // Replace replaces oldData vectors with 0 or replaces 0 with newData vectors.
482 | //
483 | // It's used in two situations:
484 | // 1. We didn't have enough data for filling in a stripe, but still did ec encode,
485 | // we need replace several zero vectors with new vectors which have data after we get enough data finally.
486 | // 2. After compact, we may have several useless vectors in a stripe,
487 | // we need replaces these useless vectors with zero vectors for free space.
488 | //
489 | // In practice,
490 | // If len(replaceRows) > dataNum-parityNum, it's better to use Encode,
491 | // because Replace need to read len(replaceRows) + parityNum vectors,
492 | // if replaceRows are too many, the cost maybe larger than Encode
493 | // (Encode only need read dataNum).
494 | //
495 | // Warn:
496 | // data's index & replaceRows must have the same sort.
497 | func (r *RS) Replace(data [][]byte, replaceRows []int, parity [][]byte) (err error) {
498 | 
499 | 	err = r.checkReplace(data, replaceRows, parity)
500 | 	if err != nil {
501 | 		return
502 | 	}
503 | 
504 | 	d, p := r.DataNum, r.ParityNum
505 | 	rn := len(replaceRows)
506 | 
507 | 	// Make generator matrix for replacing.
508 | 	//
509 | 	// Values in replaceRows are row index of data,
510 | 	// and also the column index of generator matrix
511 | 	gm := make([]byte, p*rn)
512 | 	off := 0
513 | 	for i := 0; i < p; i++ {
514 | 		for j := 0; j < rn; j++ {
515 | 			k := i*d + replaceRows[j]
516 | 			gm[off] = r.GenMatrix[k]
517 | 			off++
518 | 		}
519 | 	}
520 | 
521 | 	vects := make([][]byte, p+rn)
522 | 	for i := range data {
523 | 		vects[i] = data[i]
524 | 	}
525 | 
526 | 	for i := range parity {
527 | 		vects[rn+i] = parity[i]
528 | 	}
529 | 
530 | 	updateRS := &RS{DataNum: rn, ParityNum: p,
531 | 		GenMatrix: gm, cpuFeat: r.cpuFeat, gmu: r.gmu}
532 | 	updateRS.encode(vects, true)
533 | 	return nil
534 | }
535 | 
536 | var (
537 | 	ErrTooManyReplace  = errors.New("too many data for replacing")
538 | 	ErrMismatchReplace = errors.New("number of replaceRows and data mismatch")
539 | )
540 | 
541 | func (r *RS) checkReplace(data [][]byte, replaceRows []int, parity [][]byte) (err error) {
542 | 	if len(data) > r.DataNum {
543 | 		return ErrTooManyReplace
544 | 	}
545 | 
546 | 	if len(replaceRows) != len(data) {
547 | 		return ErrMismatchReplace
548 | 	}
549 | 
550 | 	if len(parity) != r.ParityNum {
551 | 		return ErrMismatchParityNum
552 | 	}
553 | 
554 | 	size := len(data[0])
555 | 	if size == 0 {
556 | 		return ErrZeroVectSize
557 | 	}
558 | 	for i := range data {
559 | 		if size != len(data[i]) {
560 | 			return ErrMismatchVectSize
561 | 		}
562 | 	}
563 | 	for i := range parity {
564 | 		if size != len(parity[i]) {
565 | 			return ErrMismatchVectSize
566 | 		}
567 | 	}
568 | 
569 | 	for _, rr := range replaceRows {
570 | 		if rr >= r.DataNum || rr < 0 {
571 | 			return ErrIllegalVectIndex
572 | 		}
573 | 	}
574 | 	return
575 | }
576 | 


--------------------------------------------------------------------------------