├── .gitignore
├── go.mod
├── .github
    └── workflows
    │   └── go.yml
├── LICENSE
├── integer_test.go
├── integer.go
├── default.go
├── benchmark_test.go
├── base62_test.go
├── base62.go
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/jxskiss/base62
2 | 
3 | go 1.13
4 | 


--------------------------------------------------------------------------------
/.github/workflows/go.yml:
--------------------------------------------------------------------------------
 1 | name: Go
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   build:
11 |     name: Build
12 |     runs-on: ubuntu-latest
13 |     steps:
14 | 
15 |       - name: Set up Go 1.x
16 |         uses: actions/setup-go@v2
17 |         with:
18 |           go-version: ^1.11
19 |         id: go
20 | 
21 |       - name: Check out code into the Go module directory
22 |         uses: actions/checkout@v2
23 | 
24 |       - name: Build
25 |         run: go build -v ./...
26 | 
27 |       - name: Test
28 |         run: go test -v ./...
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Shawn Wang <jxskiss@126.com>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/integer_test.go:
--------------------------------------------------------------------------------
 1 | package base62
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"math"
 6 | 	"testing"
 7 | )
 8 | 
 9 | func Test_FormatUint_ParseUint(t *testing.T) {
10 | 	x := uint64(math.MaxUint64)
11 | 	dst := FormatUint(x)
12 | 
13 | 	got, err := ParseUint(dst)
14 | 	if err != nil {
15 | 		t.Fatalf("failed parse uint, err = %v", err)
16 | 	}
17 | 	if got != x {
18 | 		t.Fatalf("failed parse uint, got = %v, want = %v", got, x)
19 | 	}
20 | }
21 | 
22 | func Test_FormatUint_ParseInt(t *testing.T) {
23 | 	x := int64(math.MaxInt64)
24 | 	dst := FormatInt(x)
25 | 
26 | 	got, err := ParseInt(dst)
27 | 	if err != nil {
28 | 		t.Fatalf("failed parse int, err = %v", err)
29 | 	}
30 | 	if got != x {
31 | 		t.Fatalf("failed parse int, got = %v, want = %v", got, x)
32 | 	}
33 | }
34 | 
35 | func Test_AppendInt_AppendUint(t *testing.T) {
36 | 	x := int64(math.MaxInt64)
37 | 
38 | 	dst1 := AppendInt(nil, x)
39 | 	dst2 := AppendUint(nil, uint64(x))
40 | 
41 | 	if !bytes.Equal(dst1, dst2) {
42 | 		t.Fatal("integer append result not equal")
43 | 	}
44 | }
45 | 
46 | func Test_FormatInt_Zero(t *testing.T) {
47 | 	dst := FormatInt(0)
48 | 	if len(dst) != 1 || dst[0] != encodeStd[0] {
49 | 		t.Fatalf("failed format zero int, got = %v", string(dst))
50 | 	}
51 | 
52 | 	got, err := ParseInt(dst)
53 | 	if err != nil {
54 | 		t.Fatalf("failed parse zero int, err = %v", err)
55 | 	}
56 | 	if got != 0 {
57 | 		t.Fatalf("failed parse zero int, got = %v, want = 0", got)
58 | 	}
59 | }
60 | 


--------------------------------------------------------------------------------
/integer.go:
--------------------------------------------------------------------------------
 1 | package base62
 2 | 
 3 | // FormatInt encodes an integer num to base62 using the encoding enc.
 4 | func (enc *Encoding) FormatInt(num int64) []byte {
 5 | 	dst := make([]byte, 0)
 6 | 	return enc.AppendUint(dst, uint64(num))
 7 | }
 8 | 
 9 | // FormatUint encodes an unsigned integer num to base62 using the encoding enc.
10 | func (enc *Encoding) FormatUint(num uint64) []byte {
11 | 	dst := make([]byte, 0)
12 | 	return enc.AppendUint(dst, num)
13 | }
14 | 
15 | // AppendInt appends the base62 representation of the integer num,
16 | // as generated by FormatInt, to dst and returns the extended buffer.
17 | func (enc *Encoding) AppendInt(dst []byte, num int64) []byte {
18 | 	return enc.AppendUint(dst, uint64(num))
19 | }
20 | 
21 | // AppendUint appends the base62 representation of the unsigned integer num,
22 | // as generated by FormatUint, to dst and returns the extended buffer.
23 | func (enc *Encoding) AppendUint(dst []byte, num uint64) []byte {
24 | 	if num == 0 {
25 | 		dst = append(dst, enc.encode[0])
26 | 		return dst
27 | 	}
28 | 
29 | 	var buf [11]byte
30 | 	var i = 11
31 | 	for num > 0 {
32 | 		r := num % base
33 | 		num /= base
34 | 		i--
35 | 		buf[i] = enc.encode[r]
36 | 	}
37 | 	dst = append(dst, buf[i:]...)
38 | 	return dst
39 | }
40 | 
41 | // ParseInt returns an integer from its base62 representation.
42 | //
43 | // If src contains invalid base62 data, it returns 0 and CorruptInputError.
44 | func (enc *Encoding) ParseInt(src []byte) (int64, error) {
45 | 	num, err := enc.ParseUint(src)
46 | 	if err != nil {
47 | 		return 0, err
48 | 	}
49 | 	return int64(num), nil
50 | }
51 | 
52 | // ParseUint returns an unsigned integer from its base62 representation.
53 | //
54 | // If src contains invalid base62 data, it returns 0 and CorruptInputError.
55 | func (enc *Encoding) ParseUint(src []byte) (uint64, error) {
56 | 	var num uint64
57 | 	for i, c := range src {
58 | 		x := enc.decodeMap[c]
59 | 		if x == 0xFF {
60 | 			return 0, CorruptInputError(i)
61 | 		}
62 | 		num = num*base + uint64(x)
63 | 	}
64 | 	return num, nil
65 | }
66 | 


--------------------------------------------------------------------------------
/default.go:
--------------------------------------------------------------------------------
 1 | package base62
 2 | 
 3 | const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
 4 | 
 5 | // StdEncoding is the default base62 encoding using alphabet [A-Za-z0-9].
 6 | var StdEncoding = NewEncoding(encodeStd)
 7 | 
 8 | // Encode encodes src using StdEncoding, returns the encoded bytes.
 9 | func Encode(src []byte) []byte {
10 | 	return StdEncoding.Encode(src)
11 | }
12 | 
13 | // EncodeToString returns a base62 string representation of src
14 | // using StdEncoding.
15 | func EncodeToString(src []byte) string {
16 | 	return StdEncoding.EncodeToString(src)
17 | }
18 | 
19 | // EncodeToBuf encodes src using StdEncoding, appending the encoded
20 | // bytes to dst. If dst has not enough capacity, it copies dst and returns
21 | // the extended buffer.
22 | func EncodeToBuf(dst []byte, src []byte) []byte {
23 | 	return StdEncoding.EncodeToBuf(dst, src)
24 | }
25 | 
26 | // Decode decodes src using StdEncoding, returns the decoded bytes.
27 | //
28 | // If src contains invalid base62 data, it will return nil and CorruptInputError.
29 | func Decode(src []byte) ([]byte, error) {
30 | 	return StdEncoding.Decode(src)
31 | }
32 | 
33 | // DecodeString returns the bytes represented by the base62 string src
34 | // using StdEncoding.
35 | func DecodeString(src string) ([]byte, error) {
36 | 	return StdEncoding.DecodeString(src)
37 | }
38 | 
39 | // DecodeToBuf decodes src using StdEncoding, appending the decoded
40 | // bytes to dst. If dst has not enough capacity, it copies dst and returns
41 | // the extended buffer.
42 | //
43 | // If src contains invalid base62 data, it will return nil and CorruptInputError.
44 | func DecodeToBuf(dst []byte, src []byte) ([]byte, error) {
45 | 	return StdEncoding.DecodeToBuf(dst, src)
46 | }
47 | 
48 | // FormatInt encodes an integer num to base62 using StdEncoding.
49 | func FormatInt(num int64) []byte {
50 | 	return StdEncoding.FormatInt(num)
51 | }
52 | 
53 | // FormatUint encodes an unsigned integer num to base62 using StdEncoding.
54 | func FormatUint(num uint64) []byte {
55 | 	return StdEncoding.FormatUint(num)
56 | }
57 | 
58 | // AppendInt appends the base62 representation of the integer num
59 | // using StdEncoding, to dst and returns the extended buffer.
60 | func AppendInt(dst []byte, num int64) []byte {
61 | 	return StdEncoding.AppendInt(dst, num)
62 | }
63 | 
64 | // AppendUint appends the base62 representation of the unsigned integer num
65 | // using StdEncoding, to dst and returns the extended buffer.
66 | func AppendUint(dst []byte, num uint64) []byte {
67 | 	return StdEncoding.AppendUint(dst, num)
68 | }
69 | 
70 | // ParseInt returns an integer from its base62 representation
71 | // using StdEncoding.
72 | //
73 | // If src contains invalid base62 data, it returns 0 and CorruptInputError.
74 | func ParseInt(src []byte) (int64, error) {
75 | 	return StdEncoding.ParseInt(src)
76 | }
77 | 
78 | // ParseUint returns an unsigned integer from its base62 representation
79 | // using StdEncoding.
80 | //
81 | // If src contains invalid base62 data, it returns 0 and CorruptInputError.
82 | func ParseUint(src []byte) (uint64, error) {
83 | 	return StdEncoding.ParseUint(src)
84 | }
85 | 


--------------------------------------------------------------------------------
/benchmark_test.go:
--------------------------------------------------------------------------------
  1 | package base62
  2 | 
  3 | import (
  4 | 	"crypto/rand"
  5 | 	"encoding/base64"
  6 | 	"math"
  7 | 	"math/big"
  8 | 	"testing"
  9 | )
 10 | 
 11 | var testRandBytes = make([]byte, 16)
 12 | var testEncodedBytes []byte
 13 | var testEncodedBase64 []byte
 14 | var testInteger = uint64(math.MaxInt64)
 15 | var testEncodedInteger = []byte("V8qRkBGKRiP")
 16 | 
 17 | func init() {
 18 | 	if _, err := rand.Read(testRandBytes); err != nil {
 19 | 		panic(err)
 20 | 	}
 21 | 	testEncodedBytes = Encode(testRandBytes)
 22 | 
 23 | 	testEncodedBase64 = make([]byte, base64.RawStdEncoding.EncodedLen(len(testRandBytes)))
 24 | 	base64.RawStdEncoding.Encode(testEncodedBase64, testRandBytes)
 25 | }
 26 | 
 27 | func encodeWithBigInt(b []byte) []byte {
 28 | 	base := big.NewInt(base)
 29 | 	num := new(big.Int).SetBytes(b)
 30 | 	mod := new(big.Int)
 31 | 
 32 | 	ret := make([]byte, 0, len(b)*8/5+1)
 33 | 	for num.BitLen() > 0 {
 34 | 		num.DivMod(num, base, mod)
 35 | 		ret = append(ret, encodeStd[mod.Int64()])
 36 | 	}
 37 | 	return ret
 38 | }
 39 | 
 40 | func Benchmark_Encode_V2(bb *testing.B) {
 41 | 	for i := 0; i < bb.N; i++ {
 42 | 		_ = StdEncoding._encodeV2(testRandBytes)
 43 | 	}
 44 | }
 45 | 
 46 | func Benchmark_Encode_legacyV1(bb *testing.B) {
 47 | 	for i := 0; i < bb.N; i++ {
 48 | 		_ = StdEncoding._encodeV1(testRandBytes)
 49 | 	}
 50 | }
 51 | 
 52 | func Benchmark_EncodeToString(bb *testing.B) {
 53 | 	for i := 0; i < bb.N; i++ {
 54 | 		_ = EncodeToString(testRandBytes)
 55 | 	}
 56 | }
 57 | 
 58 | func Benchmark_EncodeToBuf(bb *testing.B) {
 59 | 	buf := make([]byte, 0, 1000)
 60 | 	for i := 0; i < bb.N; i++ {
 61 | 		_ = EncodeToBuf(buf, testRandBytes)
 62 | 	}
 63 | }
 64 | 
 65 | func Benchmark_Decode(bb *testing.B) {
 66 | 	for i := 0; i < bb.N; i++ {
 67 | 		_, _ = Decode(testEncodedBytes)
 68 | 	}
 69 | }
 70 | 
 71 | func Benchmark_DecodeString(bb *testing.B) {
 72 | 	s := string(testEncodedBytes)
 73 | 	for i := 0; i < bb.N; i++ {
 74 | 		_, _ = DecodeString(s)
 75 | 	}
 76 | }
 77 | 
 78 | func Benchmark_DecodeToBuf(bb *testing.B) {
 79 | 	buf := make([]byte, 0, 1000)
 80 | 	for i := 0; i < bb.N; i++ {
 81 | 		_, _ = DecodeToBuf(buf, testRandBytes)
 82 | 	}
 83 | }
 84 | 
 85 | func Benchmark_Encode_BigInt(bb *testing.B) {
 86 | 	for i := 0; i < bb.N; i++ {
 87 | 		_ = encodeWithBigInt(testRandBytes)
 88 | 	}
 89 | }
 90 | 
 91 | func Benchmark_Base64_EncodeToString(bb *testing.B) {
 92 | 	for i := 0; i < bb.N; i++ {
 93 | 		_ = base64.RawStdEncoding.EncodeToString(testRandBytes)
 94 | 	}
 95 | }
 96 | 
 97 | func Benchmark_Base64_Encode(bb *testing.B) {
 98 | 	buf := make([]byte, 1000)
 99 | 	for i := 0; i < bb.N; i++ {
100 | 		base64.RawStdEncoding.Encode(buf, testRandBytes)
101 | 	}
102 | }
103 | 
104 | func Benchmark_Base64_DecodeString(bb *testing.B) {
105 | 	s := string(testEncodedBase64)
106 | 	for i := 0; i < bb.N; i++ {
107 | 		_, _ = base64.RawStdEncoding.DecodeString(s)
108 | 	}
109 | }
110 | 
111 | func Benchmark_Base64_Decode(bb *testing.B) {
112 | 	buf := make([]byte, 1000)
113 | 	for i := 0; i < bb.N; i++ {
114 | 		_, _ = base64.RawStdEncoding.Decode(buf, testEncodedBase64)
115 | 	}
116 | }
117 | 
118 | func Benchmark_EncodeInteger(bb *testing.B) {
119 | 	for i := 0; i < bb.N; i++ {
120 | 		_ = FormatUint(testInteger)
121 | 	}
122 | }
123 | 
124 | func Benchmark_DecodeInteger(bb *testing.B) {
125 | 	for i := 0; i < bb.N; i++ {
126 | 		_, _ = ParseUint(testEncodedInteger)
127 | 	}
128 | }
129 | 


--------------------------------------------------------------------------------
/base62_test.go:
--------------------------------------------------------------------------------
  1 | package base62
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"crypto/rand"
  6 | 	mathrand "math/rand"
  7 | 	"strings"
  8 | 	"testing"
  9 | )
 10 | 
 11 | func Test_EncodeDecode(t *testing.T) {
 12 | 	src := []byte("Hello, 世界！")
 13 | 	dst := Encode(src)
 14 | 	got, err := Decode(dst)
 15 | 	if err != nil {
 16 | 		t.Fatalf("failed decode, err = %v", err)
 17 | 	}
 18 | 	if !bytes.Equal(src, got) {
 19 | 		t.Fatalf("failed decode, got = %v, want = %v", got, src)
 20 | 	}
 21 | 
 22 | 	dstStr := EncodeToString(src)
 23 | 	got, _ = DecodeString(dstStr)
 24 | 	if !bytes.Equal(src, got) {
 25 | 		t.Fatalf("failed decode string, got = %v, want = %v", got, src)
 26 | 	}
 27 | }
 28 | 
 29 | func Test_EncodeDecode_Zeros(t *testing.T) {
 30 | 	for i := 0; i < 1000; i++ {
 31 | 		src := make([]byte, i)
 32 | 		dst := StdEncoding._encodeV1(src)
 33 | 		got, err := Decode(dst)
 34 | 		if err != nil {
 35 | 			t.Fatalf("failed decode: err = %v", err)
 36 | 		}
 37 | 		if !bytes.Equal(src, got) {
 38 | 			t.Fatalf("failed decode, got = %v, want = %v", got, src)
 39 | 		}
 40 | 
 41 | 		// Make sure the new implementation is compatible with the old.
 42 | 		v2Dst := StdEncoding._encodeV2(src)
 43 | 		if !bytes.Equal(dst, v2Dst) {
 44 | 			t.Logf("src= %v\n  v1= %v\n  v2= %v", src, dst, v2Dst)
 45 | 			t.Fatalf("encode new implementation not equal to v1")
 46 | 		}
 47 | 	}
 48 | }
 49 | 
 50 | func Test_EncodeDecode_0xFF(t *testing.T) {
 51 | 	for i := 0; i < 1000; i++ {
 52 | 		src := make([]byte, i)
 53 | 		for i := range src {
 54 | 			src[i] = 0xff
 55 | 		}
 56 | 		dst := StdEncoding._encodeV1(src)
 57 | 		got, err := Decode(dst)
 58 | 		if err != nil {
 59 | 			t.Fatalf("failed decode: err = %v", err)
 60 | 		}
 61 | 		if !bytes.Equal(src, got) {
 62 | 			t.Fatalf("failed decode, got = %v, want = %v", got, src)
 63 | 		}
 64 | 
 65 | 		// Make sure the new implementation is compatible with the old.
 66 | 		v2Dst := StdEncoding._encodeV2(src)
 67 | 		if !bytes.Equal(dst, v2Dst) {
 68 | 			t.Logf("src= %v\n  v1= %v\n  v2= %v", src, dst, v2Dst)
 69 | 			t.Fatalf("encode new implementation not equal to v1")
 70 | 		}
 71 | 	}
 72 | }
 73 | 
 74 | func Test_EncodeDecode_RandomBytes(t *testing.T) {
 75 | 	for i := 0; i < 1000000; i++ {
 76 | 		src := make([]byte, 32+mathrand.Intn(32))
 77 | 		_, _ = rand.Read(src)
 78 | 		dst := StdEncoding._encodeV1(src)
 79 | 		got, err := Decode(dst)
 80 | 		if err != nil {
 81 | 			t.Fatalf("failed decode, err = %v", err)
 82 | 		}
 83 | 		if !bytes.Equal(src, got) {
 84 | 			t.Fatalf("failed decode, got = %v, want = %v", got, src)
 85 | 		}
 86 | 
 87 | 		// Make sure the new implementation is compatible with the old.
 88 | 		v2Dst := StdEncoding._encodeV2(src)
 89 | 		if !bytes.Equal(dst, v2Dst) {
 90 | 			t.Logf("src= %v\n  v1= %v\n  v2= %v", src, dst, v2Dst)
 91 | 			t.Fatalf("encode new implementation not equal to v1")
 92 | 		}
 93 | 	}
 94 | }
 95 | 
 96 | func Test_EncodeToBuf(t *testing.T) {
 97 | 	buf := make([]byte, 0, 1000)
 98 | 	for i := 0; i < 10000; i++ {
 99 | 		src := make([]byte, 32+mathrand.Intn(100))
100 | 		_, _ = rand.Read(src)
101 | 		want := Encode(src)
102 | 
103 | 		got1 := EncodeToBuf(make([]byte, 0, 2), src)
104 | 		if !bytes.Equal(want, got1) {
105 | 			t.Fatal("incorrect result from EncodeToBuf")
106 | 		}
107 | 
108 | 		got2 := EncodeToBuf(buf, src)
109 | 		if !bytes.Equal(want, got2) {
110 | 			t.Fatal("incorrect result from EncodeToBuf")
111 | 		}
112 | 	}
113 | }
114 | 
115 | func TestDecodeToBuf(t *testing.T) {
116 | 	buf := make([]byte, 0, 1000)
117 | 	for i := 0; i < 10000; i++ {
118 | 		src := make([]byte, 32+mathrand.Intn(100))
119 | 		_, _ = rand.Read(src)
120 | 		encoded := Encode(src)
121 | 
122 | 		got1, err := DecodeToBuf(make([]byte, 0, 2), encoded)
123 | 		if err != nil {
124 | 			t.Fatalf("failed DecodeToBuf, err = %v", err)
125 | 		}
126 | 		if !bytes.Equal(src, got1) {
127 | 			t.Fatalf("incorrect result from DecodeToBuf, encoded = %v", encoded)
128 | 		}
129 | 
130 | 		got2, err := DecodeToBuf(buf, encoded)
131 | 		if err != nil {
132 | 			t.Fatalf("failed DecodeToBuf, err = %v", err)
133 | 		}
134 | 		if !bytes.Equal(src, got2) {
135 | 			t.Fatalf("incorrect result from DecodeToBuf, encoded = %v", encoded)
136 | 		}
137 | 	}
138 | }
139 | 
140 | // ----------
141 | 
142 | func Test_encoder_next(t *testing.T) {
143 | 	src := []byte{123, 234, 255}
144 | 	enc := newEncoder(src)
145 | 
146 | 	//for _, w := range src {
147 | 	//	fmt.Printf("%08b", w)
148 | 	//}
149 | 	//fmt.Println()
150 | 
151 | 	x, hasMore := enc.next()
152 | 	for {
153 | 		_ = x
154 | 		if !hasMore {
155 | 			break
156 | 		}
157 | 		x, hasMore = enc.next()
158 | 	}
159 | }
160 | 
161 | // ----------
162 | 
163 | func Test_NewEncoding_panic(t *testing.T) {
164 | 	func() {
165 | 		encoder := "abcdef"
166 | 		defer func() {
167 | 			if r := recover(); r == nil {
168 | 				t.Errorf("NewEncoding did not panic with encoder %q", encoder)
169 | 			}
170 | 		}()
171 | 		_ = NewEncoding(encoder)
172 | 	}()
173 | 
174 | 	func() {
175 | 		encoder := []byte(encodeStd)
176 | 		encoder[1] = '\n'
177 | 		defer func() {
178 | 			if r := recover(); r == nil {
179 | 				t.Error("NewEncoding did not panic with encoder contains \\n")
180 | 			}
181 | 		}()
182 | 		_ = NewEncoding(string(encoder))
183 | 	}()
184 | 
185 | 	func() {
186 | 		encoder := []byte(encodeStd)
187 | 		encoder[1] = '\r'
188 | 		defer func() {
189 | 			if r := recover(); r == nil {
190 | 				t.Error("NewEncoding did not panic with encoder contains \\r")
191 | 			}
192 | 		}()
193 | 		_ = NewEncoding(string(encoder))
194 | 	}()
195 | }
196 | 
197 | func Test_Decode_CorruptInputError(t *testing.T) {
198 | 	src := make([]byte, 256)
199 | 	for i := range src {
200 | 		src[i] = byte(i)
201 | 	}
202 | 	_, err := StdEncoding.Decode(src)
203 | 	if err == nil || !strings.Contains(err.Error(), "illegal base62 data at input byte") {
204 | 		t.Fatal("decoding invalid data did not return CorruptInputError")
205 | 	}
206 | }
207 | 


--------------------------------------------------------------------------------
/base62.go:
--------------------------------------------------------------------------------
  1 | package base62
  2 | 
  3 | import (
  4 | 	"math/bits"
  5 | 	"reflect"
  6 | 	"strconv"
  7 | 	"unsafe"
  8 | )
  9 | 
 10 | const (
 11 | 	base        = 62
 12 | 	compactMask = 0x1E // 00011110
 13 | 	mask5bits   = 0x1F // 00011111
 14 | 	mask6bits   = 0x3F // 00111111
 15 | )
 16 | 
 17 | // An Encoding is a radix 62 encoding/decoding scheme, defined by a
 18 | // 62-character alphabet.
 19 | type Encoding struct {
 20 | 	encode    [base]byte
 21 | 	decodeMap [256]byte
 22 | }
 23 | 
 24 | // NewEncoding returns a new Encoding defined by the given alphabet,
 25 | // which must be a 62-byte string that does not contain CR / LF ('\r', '\n').
 26 | func NewEncoding(encoder string) *Encoding {
 27 | 	if len(encoder) != base {
 28 | 		panic("encoding alphabet is not 62-bytes long")
 29 | 	}
 30 | 	for i := 0; i < len(encoder); i++ {
 31 | 		if encoder[i] == '\n' || encoder[i] == '\r' {
 32 | 			panic("encoding alphabet contains newline character")
 33 | 		}
 34 | 	}
 35 | 
 36 | 	e := new(Encoding)
 37 | 	copy(e.encode[:], encoder)
 38 | 	for i := 0; i < len(e.decodeMap); i++ {
 39 | 		e.decodeMap[i] = 0xFF
 40 | 	}
 41 | 	for i := 0; i < len(encoder); i++ {
 42 | 		e.decodeMap[encoder[i]] = byte(i)
 43 | 	}
 44 | 	return e
 45 | }
 46 | 
 47 | // Encode encodes src using the encoding enc, returns the encoded bytes.
 48 | func (enc *Encoding) Encode(src []byte) []byte {
 49 | 	return enc._encodeV2(src)
 50 | }
 51 | 
 52 | // _encodeV1 is obsolete, it is here to make sure that the new
 53 | // implementation is compatible with the old one.
 54 | //
 55 | // We don't want to break the programs which use this package.
 56 | func (enc *Encoding) _encodeV1(src []byte) []byte {
 57 | 	if len(src) == 0 {
 58 | 		return []byte{}
 59 | 	}
 60 | 	dst := make([]byte, 0, len(src)*9/5)
 61 | 	encoder := newEncoder(src)
 62 | 	return encoder.encode(dst, enc.encode[:])
 63 | }
 64 | 
 65 | func (enc *Encoding) _encodeV2(src []byte) []byte {
 66 | 	if len(src) == 0 {
 67 | 		return []byte{}
 68 | 	}
 69 | 	dst := make([]byte, 0, len(src)*9/5)
 70 | 	encoder := newEncoder(src)
 71 | 	return encoder.encodeV2(dst, enc.encode[:])
 72 | }
 73 | 
 74 | // EncodeToString returns a base62 string representation of src.
 75 | func (enc *Encoding) EncodeToString(src []byte) string {
 76 | 	ret := enc.Encode(src)
 77 | 	return b2s(ret)
 78 | }
 79 | 
 80 | // EncodeToBuf encodes src using the encoding enc, appending the encoded
 81 | // bytes to dst. If dst has not enough capacity, it copies dst and returns
 82 | // the extended buffer.
 83 | func (enc *Encoding) EncodeToBuf(dst []byte, src []byte) []byte {
 84 | 	if len(src) == 0 {
 85 | 		return []byte{}
 86 | 	}
 87 | 	encoder := newEncoder(src)
 88 | 	return encoder.encodeV2(dst, enc.encode[:])
 89 | }
 90 | 
 91 | type encoder struct {
 92 | 	src []byte
 93 | 	pos int
 94 | }
 95 | 
 96 | func newEncoder(src []byte) *encoder {
 97 | 	return &encoder{
 98 | 		src: src,
 99 | 		pos: len(src) * 8,
100 | 	}
101 | }
102 | 
103 | func (enc *encoder) next() (byte, bool) {
104 | 	var i, pos int
105 | 	var j, blen byte
106 | 	pos = enc.pos - 6
107 | 	if pos <= 0 {
108 | 		pos = 0
109 | 		blen = byte(enc.pos)
110 | 	} else {
111 | 		i = pos / 8
112 | 		j = byte(pos % 8)
113 | 		blen = byte((i+1)*8 - pos)
114 | 		if blen > 6 {
115 | 			blen = 6
116 | 		}
117 | 	}
118 | 	shift := 8 - j - blen
119 | 	b := enc.src[i] >> shift & (1<<blen - 1)
120 | 
121 | 	if blen < 6 && pos > 0 {
122 | 		blen1 := 6 - blen
123 | 		b = b<<blen1 | enc.src[i+1]>>(8-blen1)
124 | 	}
125 | 	if b&compactMask == compactMask {
126 | 		if pos > 0 || b > mask5bits {
127 | 			pos++
128 | 		}
129 | 		b &= mask5bits
130 | 	}
131 | 	enc.pos = pos
132 | 
133 | 	return b, pos > 0
134 | }
135 | 
136 | func (enc *encoder) encode(dst []byte, encTable []byte) []byte {
137 | 	x, hasMore := enc.next()
138 | 	for {
139 | 		dst = append(dst, encTable[x])
140 | 		if !hasMore {
141 | 			break
142 | 		}
143 | 		x, hasMore = enc.next()
144 | 	}
145 | 	return dst
146 | }
147 | 
148 | func (enc *encoder) encodeV2(dst []byte, encTable []byte) []byte {
149 | 	for enc.pos > 0 {
150 | 		size := 6
151 | 		b := enc.get6bits()
152 | 		if b&compactMask == compactMask {
153 | 			if enc.pos > 6 || b > mask5bits {
154 | 				size = 5
155 | 			}
156 | 			b &= mask5bits
157 | 		}
158 | 		dst = append(dst, encTable[b])
159 | 		enc.pos -= size
160 | 	}
161 | 	return dst
162 | }
163 | 
164 | func (enc *encoder) get6bits() byte {
165 | 	r := enc.pos & 0x7
166 | 	i := enc.pos >> 3
167 | 	if r == 0 {
168 | 		i, r = i-1, 8
169 | 	}
170 | 	b := enc.src[i] >> (8 - r)
171 | 	if r < 6 && i > 0 {
172 | 		b |= enc.src[i-1] << r
173 | 	}
174 | 	return b & mask6bits
175 | }
176 | 
177 | type CorruptInputError int64
178 | 
179 | func (e CorruptInputError) Error() string {
180 | 	return "illegal base62 data at input byte " + strconv.FormatInt(int64(e), 10)
181 | }
182 | 
183 | // Decode decodes src using the encoding enc, returns the decoded bytes.
184 | //
185 | // If src contains invalid base62 data, it will return nil and CorruptInputError.
186 | func (enc *Encoding) Decode(src []byte) ([]byte, error) {
187 | 	if len(src) == 0 {
188 | 		return []byte{}, nil
189 | 	}
190 | 	dst := make([]byte, len(src)*6/8+1)
191 | 	dec := decoder(src)
192 | 	idx, err := dec.decode(dst, enc.decodeMap[:])
193 | 	if err != nil {
194 | 		return nil, err
195 | 	}
196 | 	return dst[idx:], nil
197 | }
198 | 
199 | // DecodeString returns the bytes represented by the base62 string src.
200 | func (enc *Encoding) DecodeString(src string) ([]byte, error) {
201 | 	b := s2b(src)
202 | 	return enc.Decode(b)
203 | }
204 | 
205 | // DecodeToBuf decodes src using the encoding enc, appending the decoded
206 | // bytes to dst. If dst has not enough capacity, it copies dst and returns
207 | // the extended buffer.
208 | //
209 | // If src contains invalid base62 data, it will return nil and CorruptInputError.
210 | func (enc *Encoding) DecodeToBuf(dst []byte, src []byte) ([]byte, error) {
211 | 	if len(src) == 0 {
212 | 		return []byte{}, nil
213 | 	}
214 | 	oldCap, oldLen := cap(dst), len(dst)
215 | 	possibleLen := len(src)*6/8 + 1
216 | 	if oldCap < oldLen+possibleLen {
217 | 		newBuf := make([]byte, oldLen, oldLen+possibleLen)
218 | 		copy(newBuf, dst)
219 | 		dst = newBuf
220 | 	}
221 | 	dec := decoder(src)
222 | 	idx, err := dec.decode(dst[oldLen:cap(dst)], enc.decodeMap[:])
223 | 	if err != nil {
224 | 		return nil, err
225 | 	}
226 | 	if idx != 0 {
227 | 		copy(dst[oldLen:cap(dst)], dst[oldLen+idx:cap(dst)])
228 | 	}
229 | 	dst = dst[:cap(dst)-idx]
230 | 	return dst, nil
231 | }
232 | 
233 | type decoder []byte
234 | 
235 | func (dec decoder) decode(dst []byte, decTable []byte) (int, error) {
236 | 	idx := len(dst)
237 | 	pos := byte(0)
238 | 	b := 0
239 | 	for i, c := range dec {
240 | 		x := decTable[c]
241 | 		if x == 0xFF {
242 | 			return 0, CorruptInputError(i)
243 | 		}
244 | 		if i == len(dec)-1 {
245 | 			b |= int(x) << pos
246 | 			pos += byte(bits.Len8(x))
247 | 		} else if x&compactMask == compactMask {
248 | 			b |= int(x) << pos
249 | 			pos += 5
250 | 		} else {
251 | 			b |= int(x) << pos
252 | 			pos += 6
253 | 		}
254 | 		if pos >= 8 {
255 | 			idx--
256 | 			dst[idx] = byte(b)
257 | 			pos %= 8
258 | 			b >>= 8
259 | 		}
260 | 	}
261 | 	if pos > 0 {
262 | 		idx--
263 | 		dst[idx] = byte(b)
264 | 	}
265 | 	return idx, nil
266 | }
267 | 
268 | func b2s(b []byte) string {
269 | 	return *(*string)(unsafe.Pointer(&b))
270 | }
271 | 
272 | func s2b(s string) (b []byte) {
273 | 	sh := (*reflect.StringHeader)(unsafe.Pointer(&s))
274 | 	bh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
275 | 	bh.Data = sh.Data
276 | 	bh.Len = sh.Len
277 | 	bh.Cap = sh.Len
278 | 	return
279 | }
280 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # base62
  2 | 
  3 | [![GoDoc](https://img.shields.io/badge/api-Godoc-blue.svg)][godoc]
  4 | [![Go Report Card](https://goreportcard.com/badge/github.com/jxskiss/base62)][goreport]
  5 | [![Issues](https://img.shields.io/github/issues/jxskiss/base62.svg)][issues]
  6 | [![GitHub release](http://img.shields.io/github/release/jxskiss/base62.svg)][release]
  7 | [![MIT License](http://img.shields.io/badge/license-MIT-blue.svg)][license]
  8 | 
  9 | [godoc]: https://pkg.go.dev/github.com/jxskiss/base62
 10 | [goreport]: https://goreportcard.com/report/github.com/jxskiss/base62
 11 | [issues]: https://github.com/jxskiss/base62/issues
 12 | [release]: https://github.com/jxskiss/base62/releases
 13 | [license]: https://github.com/jxskiss/base62/blob/master/LICENSE
 14 | 
 15 | base62 is a correctly implemented, compact and fast implementation of `Base62` encoding/decoding algorithm.
 16 | It is inspired by the [java implementation by glowfall](https://github.com/glowfall/base62).
 17 | 
 18 | This `Base62` implementation can encode/decode both integers and bytes of arbitrary length,
 19 | the correctness is tested using a large number of randomly generated bytes.
 20 | 
 21 | It is much faster than `big.Int` based implementation and is not much slower than typical `Base64`
 22 | implementations. See the benchmark results below.
 23 | 
 24 | ## Why Base62
 25 | 
 26 | In comparison with Base64/Base32, Base62 is more friendly to human:
 27 | 
 28 | - it contains only alpha-numerical symbols, no special characters
 29 | - can be validated by eyes and more simple regexp
 30 | - can be fully selected by mouse double-click in any text editors and browser address bar
 31 | - it's compact and generates shorter strings than Base32
 32 | - it's the most natural and unambiguous way to encode your data in human-readable form :)
 33 | 
 34 | Variations of Base62 algorithm cann be widely used to represent authentication data in printable and
 35 | easy-copyable form, for example to encode OAuth 2.0 `access_token` data.
 36 | 
 37 | ## Usage
 38 | 
 39 | ```go
 40 | // Basic usage.
 41 | Encode(src []byte) []byte
 42 | EncodeToString(src []byte) string
 43 | Decode(src []byte) ([]byte, error)
 44 | DecodeString(src string) ([]byte, error)
 45 | FormatInt(num int64) []byte
 46 | FormatUint(num uint64) []byte
 47 | ParseInt(src []byte) (int64, error)
 48 | ParseUint(src []byte) (uint64, error)
 49 | 
 50 | // Providing a dst buffer, you may reuse buffers to reduce memory allocation.
 51 | EncodeToBuf(dst []byte, src []byte) []byte
 52 | DecodeToBuf(dst []byte, src []byte) ([]byte, error)
 53 | AppendInt(dst []byte, num int64) []byte
 54 | AppendUint(dst []byte, num uint64) []byte
 55 | 
 56 | // Or you may use a custom encoding alphabet.
 57 | enc := NewEncoding("...my-62-byte-string-alphabet...")
 58 | enc.XXX()
 59 | ```
 60 | 
 61 | ## Benchmark
 62 | 
 63 | ```text
 64 | Benchmark_Encode-12                     11654754                97.41 ns/op
 65 | Benchmark_Decode-12                     15481666                73.60 ns/op
 66 | Benchmark_EncodeToString-12             11950086                99.54 ns/op
 67 | Benchmark_DecodeString-12               16301325                74.36 ns/op
 68 | 
 69 | Benchmark_EncodeToBuf-12                13855840                84.68 ns/op
 70 | Benchmark_DecodeToBuf-12                97695962                12.21 ns/op
 71 | 
 72 | Benchmark_EncodeInteger-12              29119437                41.30 ns/op
 73 | Benchmark_DecodeInteger-12             120328183                 9.917 ns/op
 74 | 
 75 | Benchmark_Encode_BigInt-12               1000000              1048 ns/op
 76 | 
 77 | Benchmark_Base64_EncodeToString-12      17803440                70.12 ns/op
 78 | Benchmark_Base64_DecodeString-12        19884616                55.09 ns/op
 79 | 
 80 | Benchmark_Base64_Encode-12              68163142                17.93 ns/op
 81 | Benchmark_Base64_Decode-12              41990004                28.25 ns/op
 82 | ```
 83 | 
 84 | ## How it works
 85 | 
 86 | Encoding Base64 and Base32 both are bit-aligned, 6 bits for Base64 (2^6 = 64)
 87 | and 5 bits for Base32 (2^5 = 32), but Base62 is not bit-aligned, it's inefficient
 88 | to do divmod operation for non-bit-aligned integers, typical Base62 implementations
 89 | are BigInt based, which encodes input data block by block to get better performance,
 90 | (e.g. https://github.com/keybase/saltpack/blob/master/encoding/basex).
 91 | 
 92 | 64 characters can fully represent 6 bits, but 62 characters can not, if each character
 93 | represents 5 bits, that is the Base32 encoding.
 94 | 
 95 | A naive BigInt based algorithm gives the shortest result, which is roughly like this
 96 | (e.g. https://github.com/eknkc/basex/blob/6baac8ea8b19cc66d125286d213770fec0691867/basex.go#L46):
 97 | 
 98 | ```go
 99 | digits := []int{0}
100 | for i := 0; i < len(src); i++ {
101 |     carry := int(src[i])
102 | 
103 |     for j := 0; j < len(digits); j++ {
104 |         carry += digits[j] << 8
105 |         digits[j] = carry % e.base
106 |         carry = carry / e.base
107 |     }
108 | 
109 |     for carry > 0 {
110 |         digits = append(digits, carry%e.base)
111 |         carry = carry / e.base
112 |     }
113 | }
114 | // map to encoding alphabet
115 | _ = digits
116 | ```
117 | 
118 | This works but the time complexity is O(n^2) where n is the length of src. If the
119 | input can be very large, the cost is unacceptable.
120 | 
121 | Improved algorithm splits the input into blocks, which reduce the time complexity to
122 | O(kn) where k is the block size, and n is the length of src, it generates slightly
123 | longer result than naive BigInt algorithm, but it is worth for the reduced time
124 | complexity. When k is n, it degrades to the naive algorithm, when k is 1, the output
125 | length is twice of the input, we don't want that. 32 is chosen as the block size
126 | in library [saltpack/encoding/basex](https://github.com/keybase/saltpack/blob/master/encoding/basex).
127 | 
128 | Inspired by the [java implementation by glowfall](https://github.com/glowfall/base62),
129 | this library is not BigInt based, it encodes and decodes any arbitrary bytes in O(n)
130 | time complexity. Here is a brief description of the algorithm.
131 | 
132 | This library uses a variadic length encoding, for each 6 bits, if the value is in range
133 | [0, 62), it can be directly map to the 62 characters, if the value is 62 or 63 which
134 | exceeds 61, we turn to encoding just the lower 5 bits. If we find a way to recognize
135 | the 5 bits pattern, then we can correctly decode it back to the source data.
136 | 
137 | The binary representation of 62 and 63 is:
138 | 
139 |     62 - 0b_0011_1110
140 |     63 - 0b_0011_1111
141 | 
142 | They have a common mask `0b_0011_1110`, in range [0, 62), there are another two integers
143 | have a similar mask `0b_0001_1110`, 30 and 31, which is:
144 | 
145 |     30 - 0b_0001_1110
146 |     31 - 0b_0001_1111
147 | 
148 | The four integers 30, 31, 62, 63 share a common mask `0b_0001_1110`, while all other
149 | integers in range [0, 64) don't share the mask, i.e. for all other integers, the
150 | expression `value & 0b_0001_1110 == 0b_0001_1110` evaluates to false.
151 | 
152 | **This is the key point!**
153 | 
154 | We define a `compactMask` as `0b_0001_1110`.
155 | 
156 | When encoding, for each 6 bits integer x, if `x & compactMask == compactMask` is true,
157 | it must be one of 30, 31, 62 or 63, we just encode the lower 5 bits, which are
158 | `0b_0001_1110` (30) and `0b_0001_1111` (31), we leave the 6-th bit to next byte.
159 | 
160 | When decoding, for each encoded byte x, we check `x & compactMask == compactMask`, when
161 | it is false, we know that the byte represents 6 bits in the raw data, it is in range
162 | [0, 30) or [32, 62), else it represents 5 bits in the raw data, it is 30 or 31.
163 | 
164 | That is it, by using variadic length encoding, we successfully limit the value range
165 | to [0, 62), we get very compact result and a simple O(n) time complexity for encoding
166 | and decoding data of arbitrary length.
167 | 
168 | ## Compatibility
169 | 
170 | This library guarantees that it can correctly decode data encoded by itself.
171 | 
172 | The encoded result is not compatible with BigInt based algorithm,
173 | (e.g. [saltpack/encoding/basex, GMP and GnuPG](https://github.com/jxskiss/base62/issues/2)).
174 | 
175 | The algorithm may be ported to other languages in future (if someone does a porting,
176 | I'm glad to link it here :-) ).
177 | 
178 | ## Changelog
179 | 
180 | ### v1.1.0 - 2022/1/3
181 | 
182 | 1. Refactor the encoding code to be simpler and cleaner, as a bonus, it gives better
183 |    performance which is 1.5X~ faster than the old.
184 | 2. Add a brief description of the algorithm and state the compatibility with other
185 |    Base62 implementations.
186 | 
187 | ### v1.0.0 - 2021/10/23
188 | 
189 | First stable release, the package has been used in several small and medium projects.
190 | 
191 | This release adds new APIs which help to reuse buffers to reduce memory allocation.
192 | 


--------------------------------------------------------------------------------