├── .gitignore ├── go.mod ├── .github └── workflows │ └── go.yml ├── LICENSE ├── integer_test.go ├── integer.go ├── default.go ├── benchmark_test.go ├── base62_test.go ├── base62.go └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/jxskiss/base62 2 | 3 | go 1.13 4 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | name: Build 12 | runs-on: ubuntu-latest 13 | steps: 14 | 15 | - name: Set up Go 1.x 16 | uses: actions/setup-go@v2 17 | with: 18 | go-version: ^1.11 19 | id: go 20 | 21 | - name: Check out code into the Go module directory 22 | uses: actions/checkout@v2 23 | 24 | - name: Build 25 | run: go build -v ./... 26 | 27 | - name: Test 28 | run: go test -v ./... 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Shawn Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /integer_test.go: -------------------------------------------------------------------------------- 1 | package base62 2 | 3 | import ( 4 | "bytes" 5 | "math" 6 | "testing" 7 | ) 8 | 9 | func Test_FormatUint_ParseUint(t *testing.T) { 10 | x := uint64(math.MaxUint64) 11 | dst := FormatUint(x) 12 | 13 | got, err := ParseUint(dst) 14 | if err != nil { 15 | t.Fatalf("failed parse uint, err = %v", err) 16 | } 17 | if got != x { 18 | t.Fatalf("failed parse uint, got = %v, want = %v", got, x) 19 | } 20 | } 21 | 22 | func Test_FormatUint_ParseInt(t *testing.T) { 23 | x := int64(math.MaxInt64) 24 | dst := FormatInt(x) 25 | 26 | got, err := ParseInt(dst) 27 | if err != nil { 28 | t.Fatalf("failed parse int, err = %v", err) 29 | } 30 | if got != x { 31 | t.Fatalf("failed parse int, got = %v, want = %v", got, x) 32 | } 33 | } 34 | 35 | func Test_AppendInt_AppendUint(t *testing.T) { 36 | x := int64(math.MaxInt64) 37 | 38 | dst1 := AppendInt(nil, x) 39 | dst2 := AppendUint(nil, uint64(x)) 40 | 41 | if !bytes.Equal(dst1, dst2) { 42 | t.Fatal("integer append result not equal") 43 | } 44 | } 45 | 46 | func Test_FormatInt_Zero(t *testing.T) { 47 | dst := FormatInt(0) 48 | if len(dst) != 1 || dst[0] != encodeStd[0] { 49 | t.Fatalf("failed format zero int, got = %v", string(dst)) 50 | } 51 | 52 | got, err := ParseInt(dst) 53 | if err != nil { 54 | t.Fatalf("failed parse zero int, err = %v", err) 55 | } 56 | if got != 0 { 57 | t.Fatalf("failed parse zero int, got = %v, want = 0", got) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /integer.go: -------------------------------------------------------------------------------- 1 | package base62 2 | 3 | // FormatInt encodes an integer num to base62 using the encoding enc. 4 | func (enc *Encoding) FormatInt(num int64) []byte { 5 | dst := make([]byte, 0) 6 | return enc.AppendUint(dst, uint64(num)) 7 | } 8 | 9 | // FormatUint encodes an unsigned integer num to base62 using the encoding enc. 10 | func (enc *Encoding) FormatUint(num uint64) []byte { 11 | dst := make([]byte, 0) 12 | return enc.AppendUint(dst, num) 13 | } 14 | 15 | // AppendInt appends the base62 representation of the integer num, 16 | // as generated by FormatInt, to dst and returns the extended buffer. 17 | func (enc *Encoding) AppendInt(dst []byte, num int64) []byte { 18 | return enc.AppendUint(dst, uint64(num)) 19 | } 20 | 21 | // AppendUint appends the base62 representation of the unsigned integer num, 22 | // as generated by FormatUint, to dst and returns the extended buffer. 23 | func (enc *Encoding) AppendUint(dst []byte, num uint64) []byte { 24 | if num == 0 { 25 | dst = append(dst, enc.encode[0]) 26 | return dst 27 | } 28 | 29 | var buf [11]byte 30 | var i = 11 31 | for num > 0 { 32 | r := num % base 33 | num /= base 34 | i-- 35 | buf[i] = enc.encode[r] 36 | } 37 | dst = append(dst, buf[i:]...) 38 | return dst 39 | } 40 | 41 | // ParseInt returns an integer from its base62 representation. 42 | // 43 | // If src contains invalid base62 data, it returns 0 and CorruptInputError. 44 | func (enc *Encoding) ParseInt(src []byte) (int64, error) { 45 | num, err := enc.ParseUint(src) 46 | if err != nil { 47 | return 0, err 48 | } 49 | return int64(num), nil 50 | } 51 | 52 | // ParseUint returns an unsigned integer from its base62 representation. 53 | // 54 | // If src contains invalid base62 data, it returns 0 and CorruptInputError. 55 | func (enc *Encoding) ParseUint(src []byte) (uint64, error) { 56 | var num uint64 57 | for i, c := range src { 58 | x := enc.decodeMap[c] 59 | if x == 0xFF { 60 | return 0, CorruptInputError(i) 61 | } 62 | num = num*base + uint64(x) 63 | } 64 | return num, nil 65 | } 66 | -------------------------------------------------------------------------------- /default.go: -------------------------------------------------------------------------------- 1 | package base62 2 | 3 | const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" 4 | 5 | // StdEncoding is the default base62 encoding using alphabet [A-Za-z0-9]. 6 | var StdEncoding = NewEncoding(encodeStd) 7 | 8 | // Encode encodes src using StdEncoding, returns the encoded bytes. 9 | func Encode(src []byte) []byte { 10 | return StdEncoding.Encode(src) 11 | } 12 | 13 | // EncodeToString returns a base62 string representation of src 14 | // using StdEncoding. 15 | func EncodeToString(src []byte) string { 16 | return StdEncoding.EncodeToString(src) 17 | } 18 | 19 | // EncodeToBuf encodes src using StdEncoding, appending the encoded 20 | // bytes to dst. If dst has not enough capacity, it copies dst and returns 21 | // the extended buffer. 22 | func EncodeToBuf(dst []byte, src []byte) []byte { 23 | return StdEncoding.EncodeToBuf(dst, src) 24 | } 25 | 26 | // Decode decodes src using StdEncoding, returns the decoded bytes. 27 | // 28 | // If src contains invalid base62 data, it will return nil and CorruptInputError. 29 | func Decode(src []byte) ([]byte, error) { 30 | return StdEncoding.Decode(src) 31 | } 32 | 33 | // DecodeString returns the bytes represented by the base62 string src 34 | // using StdEncoding. 35 | func DecodeString(src string) ([]byte, error) { 36 | return StdEncoding.DecodeString(src) 37 | } 38 | 39 | // DecodeToBuf decodes src using StdEncoding, appending the decoded 40 | // bytes to dst. If dst has not enough capacity, it copies dst and returns 41 | // the extended buffer. 42 | // 43 | // If src contains invalid base62 data, it will return nil and CorruptInputError. 44 | func DecodeToBuf(dst []byte, src []byte) ([]byte, error) { 45 | return StdEncoding.DecodeToBuf(dst, src) 46 | } 47 | 48 | // FormatInt encodes an integer num to base62 using StdEncoding. 49 | func FormatInt(num int64) []byte { 50 | return StdEncoding.FormatInt(num) 51 | } 52 | 53 | // FormatUint encodes an unsigned integer num to base62 using StdEncoding. 54 | func FormatUint(num uint64) []byte { 55 | return StdEncoding.FormatUint(num) 56 | } 57 | 58 | // AppendInt appends the base62 representation of the integer num 59 | // using StdEncoding, to dst and returns the extended buffer. 60 | func AppendInt(dst []byte, num int64) []byte { 61 | return StdEncoding.AppendInt(dst, num) 62 | } 63 | 64 | // AppendUint appends the base62 representation of the unsigned integer num 65 | // using StdEncoding, to dst and returns the extended buffer. 66 | func AppendUint(dst []byte, num uint64) []byte { 67 | return StdEncoding.AppendUint(dst, num) 68 | } 69 | 70 | // ParseInt returns an integer from its base62 representation 71 | // using StdEncoding. 72 | // 73 | // If src contains invalid base62 data, it returns 0 and CorruptInputError. 74 | func ParseInt(src []byte) (int64, error) { 75 | return StdEncoding.ParseInt(src) 76 | } 77 | 78 | // ParseUint returns an unsigned integer from its base62 representation 79 | // using StdEncoding. 80 | // 81 | // If src contains invalid base62 data, it returns 0 and CorruptInputError. 82 | func ParseUint(src []byte) (uint64, error) { 83 | return StdEncoding.ParseUint(src) 84 | } 85 | -------------------------------------------------------------------------------- /benchmark_test.go: -------------------------------------------------------------------------------- 1 | package base62 2 | 3 | import ( 4 | "crypto/rand" 5 | "encoding/base64" 6 | "math" 7 | "math/big" 8 | "testing" 9 | ) 10 | 11 | var testRandBytes = make([]byte, 16) 12 | var testEncodedBytes []byte 13 | var testEncodedBase64 []byte 14 | var testInteger = uint64(math.MaxInt64) 15 | var testEncodedInteger = []byte("V8qRkBGKRiP") 16 | 17 | func init() { 18 | if _, err := rand.Read(testRandBytes); err != nil { 19 | panic(err) 20 | } 21 | testEncodedBytes = Encode(testRandBytes) 22 | 23 | testEncodedBase64 = make([]byte, base64.RawStdEncoding.EncodedLen(len(testRandBytes))) 24 | base64.RawStdEncoding.Encode(testEncodedBase64, testRandBytes) 25 | } 26 | 27 | func encodeWithBigInt(b []byte) []byte { 28 | base := big.NewInt(base) 29 | num := new(big.Int).SetBytes(b) 30 | mod := new(big.Int) 31 | 32 | ret := make([]byte, 0, len(b)*8/5+1) 33 | for num.BitLen() > 0 { 34 | num.DivMod(num, base, mod) 35 | ret = append(ret, encodeStd[mod.Int64()]) 36 | } 37 | return ret 38 | } 39 | 40 | func Benchmark_Encode_V2(bb *testing.B) { 41 | for i := 0; i < bb.N; i++ { 42 | _ = StdEncoding._encodeV2(testRandBytes) 43 | } 44 | } 45 | 46 | func Benchmark_Encode_legacyV1(bb *testing.B) { 47 | for i := 0; i < bb.N; i++ { 48 | _ = StdEncoding._encodeV1(testRandBytes) 49 | } 50 | } 51 | 52 | func Benchmark_EncodeToString(bb *testing.B) { 53 | for i := 0; i < bb.N; i++ { 54 | _ = EncodeToString(testRandBytes) 55 | } 56 | } 57 | 58 | func Benchmark_EncodeToBuf(bb *testing.B) { 59 | buf := make([]byte, 0, 1000) 60 | for i := 0; i < bb.N; i++ { 61 | _ = EncodeToBuf(buf, testRandBytes) 62 | } 63 | } 64 | 65 | func Benchmark_Decode(bb *testing.B) { 66 | for i := 0; i < bb.N; i++ { 67 | _, _ = Decode(testEncodedBytes) 68 | } 69 | } 70 | 71 | func Benchmark_DecodeString(bb *testing.B) { 72 | s := string(testEncodedBytes) 73 | for i := 0; i < bb.N; i++ { 74 | _, _ = DecodeString(s) 75 | } 76 | } 77 | 78 | func Benchmark_DecodeToBuf(bb *testing.B) { 79 | buf := make([]byte, 0, 1000) 80 | for i := 0; i < bb.N; i++ { 81 | _, _ = DecodeToBuf(buf, testRandBytes) 82 | } 83 | } 84 | 85 | func Benchmark_Encode_BigInt(bb *testing.B) { 86 | for i := 0; i < bb.N; i++ { 87 | _ = encodeWithBigInt(testRandBytes) 88 | } 89 | } 90 | 91 | func Benchmark_Base64_EncodeToString(bb *testing.B) { 92 | for i := 0; i < bb.N; i++ { 93 | _ = base64.RawStdEncoding.EncodeToString(testRandBytes) 94 | } 95 | } 96 | 97 | func Benchmark_Base64_Encode(bb *testing.B) { 98 | buf := make([]byte, 1000) 99 | for i := 0; i < bb.N; i++ { 100 | base64.RawStdEncoding.Encode(buf, testRandBytes) 101 | } 102 | } 103 | 104 | func Benchmark_Base64_DecodeString(bb *testing.B) { 105 | s := string(testEncodedBase64) 106 | for i := 0; i < bb.N; i++ { 107 | _, _ = base64.RawStdEncoding.DecodeString(s) 108 | } 109 | } 110 | 111 | func Benchmark_Base64_Decode(bb *testing.B) { 112 | buf := make([]byte, 1000) 113 | for i := 0; i < bb.N; i++ { 114 | _, _ = base64.RawStdEncoding.Decode(buf, testEncodedBase64) 115 | } 116 | } 117 | 118 | func Benchmark_EncodeInteger(bb *testing.B) { 119 | for i := 0; i < bb.N; i++ { 120 | _ = FormatUint(testInteger) 121 | } 122 | } 123 | 124 | func Benchmark_DecodeInteger(bb *testing.B) { 125 | for i := 0; i < bb.N; i++ { 126 | _, _ = ParseUint(testEncodedInteger) 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /base62_test.go: -------------------------------------------------------------------------------- 1 | package base62 2 | 3 | import ( 4 | "bytes" 5 | "crypto/rand" 6 | mathrand "math/rand" 7 | "strings" 8 | "testing" 9 | ) 10 | 11 | func Test_EncodeDecode(t *testing.T) { 12 | src := []byte("Hello, 世界!") 13 | dst := Encode(src) 14 | got, err := Decode(dst) 15 | if err != nil { 16 | t.Fatalf("failed decode, err = %v", err) 17 | } 18 | if !bytes.Equal(src, got) { 19 | t.Fatalf("failed decode, got = %v, want = %v", got, src) 20 | } 21 | 22 | dstStr := EncodeToString(src) 23 | got, _ = DecodeString(dstStr) 24 | if !bytes.Equal(src, got) { 25 | t.Fatalf("failed decode string, got = %v, want = %v", got, src) 26 | } 27 | } 28 | 29 | func Test_EncodeDecode_Zeros(t *testing.T) { 30 | for i := 0; i < 1000; i++ { 31 | src := make([]byte, i) 32 | dst := StdEncoding._encodeV1(src) 33 | got, err := Decode(dst) 34 | if err != nil { 35 | t.Fatalf("failed decode: err = %v", err) 36 | } 37 | if !bytes.Equal(src, got) { 38 | t.Fatalf("failed decode, got = %v, want = %v", got, src) 39 | } 40 | 41 | // Make sure the new implementation is compatible with the old. 42 | v2Dst := StdEncoding._encodeV2(src) 43 | if !bytes.Equal(dst, v2Dst) { 44 | t.Logf("src= %v\n v1= %v\n v2= %v", src, dst, v2Dst) 45 | t.Fatalf("encode new implementation not equal to v1") 46 | } 47 | } 48 | } 49 | 50 | func Test_EncodeDecode_0xFF(t *testing.T) { 51 | for i := 0; i < 1000; i++ { 52 | src := make([]byte, i) 53 | for i := range src { 54 | src[i] = 0xff 55 | } 56 | dst := StdEncoding._encodeV1(src) 57 | got, err := Decode(dst) 58 | if err != nil { 59 | t.Fatalf("failed decode: err = %v", err) 60 | } 61 | if !bytes.Equal(src, got) { 62 | t.Fatalf("failed decode, got = %v, want = %v", got, src) 63 | } 64 | 65 | // Make sure the new implementation is compatible with the old. 66 | v2Dst := StdEncoding._encodeV2(src) 67 | if !bytes.Equal(dst, v2Dst) { 68 | t.Logf("src= %v\n v1= %v\n v2= %v", src, dst, v2Dst) 69 | t.Fatalf("encode new implementation not equal to v1") 70 | } 71 | } 72 | } 73 | 74 | func Test_EncodeDecode_RandomBytes(t *testing.T) { 75 | for i := 0; i < 1000000; i++ { 76 | src := make([]byte, 32+mathrand.Intn(32)) 77 | _, _ = rand.Read(src) 78 | dst := StdEncoding._encodeV1(src) 79 | got, err := Decode(dst) 80 | if err != nil { 81 | t.Fatalf("failed decode, err = %v", err) 82 | } 83 | if !bytes.Equal(src, got) { 84 | t.Fatalf("failed decode, got = %v, want = %v", got, src) 85 | } 86 | 87 | // Make sure the new implementation is compatible with the old. 88 | v2Dst := StdEncoding._encodeV2(src) 89 | if !bytes.Equal(dst, v2Dst) { 90 | t.Logf("src= %v\n v1= %v\n v2= %v", src, dst, v2Dst) 91 | t.Fatalf("encode new implementation not equal to v1") 92 | } 93 | } 94 | } 95 | 96 | func Test_EncodeToBuf(t *testing.T) { 97 | buf := make([]byte, 0, 1000) 98 | for i := 0; i < 10000; i++ { 99 | src := make([]byte, 32+mathrand.Intn(100)) 100 | _, _ = rand.Read(src) 101 | want := Encode(src) 102 | 103 | got1 := EncodeToBuf(make([]byte, 0, 2), src) 104 | if !bytes.Equal(want, got1) { 105 | t.Fatal("incorrect result from EncodeToBuf") 106 | } 107 | 108 | got2 := EncodeToBuf(buf, src) 109 | if !bytes.Equal(want, got2) { 110 | t.Fatal("incorrect result from EncodeToBuf") 111 | } 112 | } 113 | } 114 | 115 | func TestDecodeToBuf(t *testing.T) { 116 | buf := make([]byte, 0, 1000) 117 | for i := 0; i < 10000; i++ { 118 | src := make([]byte, 32+mathrand.Intn(100)) 119 | _, _ = rand.Read(src) 120 | encoded := Encode(src) 121 | 122 | got1, err := DecodeToBuf(make([]byte, 0, 2), encoded) 123 | if err != nil { 124 | t.Fatalf("failed DecodeToBuf, err = %v", err) 125 | } 126 | if !bytes.Equal(src, got1) { 127 | t.Fatalf("incorrect result from DecodeToBuf, encoded = %v", encoded) 128 | } 129 | 130 | got2, err := DecodeToBuf(buf, encoded) 131 | if err != nil { 132 | t.Fatalf("failed DecodeToBuf, err = %v", err) 133 | } 134 | if !bytes.Equal(src, got2) { 135 | t.Fatalf("incorrect result from DecodeToBuf, encoded = %v", encoded) 136 | } 137 | } 138 | } 139 | 140 | // ---------- 141 | 142 | func Test_encoder_next(t *testing.T) { 143 | src := []byte{123, 234, 255} 144 | enc := newEncoder(src) 145 | 146 | //for _, w := range src { 147 | // fmt.Printf("%08b", w) 148 | //} 149 | //fmt.Println() 150 | 151 | x, hasMore := enc.next() 152 | for { 153 | _ = x 154 | if !hasMore { 155 | break 156 | } 157 | x, hasMore = enc.next() 158 | } 159 | } 160 | 161 | // ---------- 162 | 163 | func Test_NewEncoding_panic(t *testing.T) { 164 | func() { 165 | encoder := "abcdef" 166 | defer func() { 167 | if r := recover(); r == nil { 168 | t.Errorf("NewEncoding did not panic with encoder %q", encoder) 169 | } 170 | }() 171 | _ = NewEncoding(encoder) 172 | }() 173 | 174 | func() { 175 | encoder := []byte(encodeStd) 176 | encoder[1] = '\n' 177 | defer func() { 178 | if r := recover(); r == nil { 179 | t.Error("NewEncoding did not panic with encoder contains \\n") 180 | } 181 | }() 182 | _ = NewEncoding(string(encoder)) 183 | }() 184 | 185 | func() { 186 | encoder := []byte(encodeStd) 187 | encoder[1] = '\r' 188 | defer func() { 189 | if r := recover(); r == nil { 190 | t.Error("NewEncoding did not panic with encoder contains \\r") 191 | } 192 | }() 193 | _ = NewEncoding(string(encoder)) 194 | }() 195 | } 196 | 197 | func Test_Decode_CorruptInputError(t *testing.T) { 198 | src := make([]byte, 256) 199 | for i := range src { 200 | src[i] = byte(i) 201 | } 202 | _, err := StdEncoding.Decode(src) 203 | if err == nil || !strings.Contains(err.Error(), "illegal base62 data at input byte") { 204 | t.Fatal("decoding invalid data did not return CorruptInputError") 205 | } 206 | } 207 | -------------------------------------------------------------------------------- /base62.go: -------------------------------------------------------------------------------- 1 | package base62 2 | 3 | import ( 4 | "math/bits" 5 | "reflect" 6 | "strconv" 7 | "unsafe" 8 | ) 9 | 10 | const ( 11 | base = 62 12 | compactMask = 0x1E // 00011110 13 | mask5bits = 0x1F // 00011111 14 | mask6bits = 0x3F // 00111111 15 | ) 16 | 17 | // An Encoding is a radix 62 encoding/decoding scheme, defined by a 18 | // 62-character alphabet. 19 | type Encoding struct { 20 | encode [base]byte 21 | decodeMap [256]byte 22 | } 23 | 24 | // NewEncoding returns a new Encoding defined by the given alphabet, 25 | // which must be a 62-byte string that does not contain CR / LF ('\r', '\n'). 26 | func NewEncoding(encoder string) *Encoding { 27 | if len(encoder) != base { 28 | panic("encoding alphabet is not 62-bytes long") 29 | } 30 | for i := 0; i < len(encoder); i++ { 31 | if encoder[i] == '\n' || encoder[i] == '\r' { 32 | panic("encoding alphabet contains newline character") 33 | } 34 | } 35 | 36 | e := new(Encoding) 37 | copy(e.encode[:], encoder) 38 | for i := 0; i < len(e.decodeMap); i++ { 39 | e.decodeMap[i] = 0xFF 40 | } 41 | for i := 0; i < len(encoder); i++ { 42 | e.decodeMap[encoder[i]] = byte(i) 43 | } 44 | return e 45 | } 46 | 47 | // Encode encodes src using the encoding enc, returns the encoded bytes. 48 | func (enc *Encoding) Encode(src []byte) []byte { 49 | return enc._encodeV2(src) 50 | } 51 | 52 | // _encodeV1 is obsolete, it is here to make sure that the new 53 | // implementation is compatible with the old one. 54 | // 55 | // We don't want to break the programs which use this package. 56 | func (enc *Encoding) _encodeV1(src []byte) []byte { 57 | if len(src) == 0 { 58 | return []byte{} 59 | } 60 | dst := make([]byte, 0, len(src)*9/5) 61 | encoder := newEncoder(src) 62 | return encoder.encode(dst, enc.encode[:]) 63 | } 64 | 65 | func (enc *Encoding) _encodeV2(src []byte) []byte { 66 | if len(src) == 0 { 67 | return []byte{} 68 | } 69 | dst := make([]byte, 0, len(src)*9/5) 70 | encoder := newEncoder(src) 71 | return encoder.encodeV2(dst, enc.encode[:]) 72 | } 73 | 74 | // EncodeToString returns a base62 string representation of src. 75 | func (enc *Encoding) EncodeToString(src []byte) string { 76 | ret := enc.Encode(src) 77 | return b2s(ret) 78 | } 79 | 80 | // EncodeToBuf encodes src using the encoding enc, appending the encoded 81 | // bytes to dst. If dst has not enough capacity, it copies dst and returns 82 | // the extended buffer. 83 | func (enc *Encoding) EncodeToBuf(dst []byte, src []byte) []byte { 84 | if len(src) == 0 { 85 | return []byte{} 86 | } 87 | encoder := newEncoder(src) 88 | return encoder.encodeV2(dst, enc.encode[:]) 89 | } 90 | 91 | type encoder struct { 92 | src []byte 93 | pos int 94 | } 95 | 96 | func newEncoder(src []byte) *encoder { 97 | return &encoder{ 98 | src: src, 99 | pos: len(src) * 8, 100 | } 101 | } 102 | 103 | func (enc *encoder) next() (byte, bool) { 104 | var i, pos int 105 | var j, blen byte 106 | pos = enc.pos - 6 107 | if pos <= 0 { 108 | pos = 0 109 | blen = byte(enc.pos) 110 | } else { 111 | i = pos / 8 112 | j = byte(pos % 8) 113 | blen = byte((i+1)*8 - pos) 114 | if blen > 6 { 115 | blen = 6 116 | } 117 | } 118 | shift := 8 - j - blen 119 | b := enc.src[i] >> shift & (1< 0 { 122 | blen1 := 6 - blen 123 | b = b<>(8-blen1) 124 | } 125 | if b&compactMask == compactMask { 126 | if pos > 0 || b > mask5bits { 127 | pos++ 128 | } 129 | b &= mask5bits 130 | } 131 | enc.pos = pos 132 | 133 | return b, pos > 0 134 | } 135 | 136 | func (enc *encoder) encode(dst []byte, encTable []byte) []byte { 137 | x, hasMore := enc.next() 138 | for { 139 | dst = append(dst, encTable[x]) 140 | if !hasMore { 141 | break 142 | } 143 | x, hasMore = enc.next() 144 | } 145 | return dst 146 | } 147 | 148 | func (enc *encoder) encodeV2(dst []byte, encTable []byte) []byte { 149 | for enc.pos > 0 { 150 | size := 6 151 | b := enc.get6bits() 152 | if b&compactMask == compactMask { 153 | if enc.pos > 6 || b > mask5bits { 154 | size = 5 155 | } 156 | b &= mask5bits 157 | } 158 | dst = append(dst, encTable[b]) 159 | enc.pos -= size 160 | } 161 | return dst 162 | } 163 | 164 | func (enc *encoder) get6bits() byte { 165 | r := enc.pos & 0x7 166 | i := enc.pos >> 3 167 | if r == 0 { 168 | i, r = i-1, 8 169 | } 170 | b := enc.src[i] >> (8 - r) 171 | if r < 6 && i > 0 { 172 | b |= enc.src[i-1] << r 173 | } 174 | return b & mask6bits 175 | } 176 | 177 | type CorruptInputError int64 178 | 179 | func (e CorruptInputError) Error() string { 180 | return "illegal base62 data at input byte " + strconv.FormatInt(int64(e), 10) 181 | } 182 | 183 | // Decode decodes src using the encoding enc, returns the decoded bytes. 184 | // 185 | // If src contains invalid base62 data, it will return nil and CorruptInputError. 186 | func (enc *Encoding) Decode(src []byte) ([]byte, error) { 187 | if len(src) == 0 { 188 | return []byte{}, nil 189 | } 190 | dst := make([]byte, len(src)*6/8+1) 191 | dec := decoder(src) 192 | idx, err := dec.decode(dst, enc.decodeMap[:]) 193 | if err != nil { 194 | return nil, err 195 | } 196 | return dst[idx:], nil 197 | } 198 | 199 | // DecodeString returns the bytes represented by the base62 string src. 200 | func (enc *Encoding) DecodeString(src string) ([]byte, error) { 201 | b := s2b(src) 202 | return enc.Decode(b) 203 | } 204 | 205 | // DecodeToBuf decodes src using the encoding enc, appending the decoded 206 | // bytes to dst. If dst has not enough capacity, it copies dst and returns 207 | // the extended buffer. 208 | // 209 | // If src contains invalid base62 data, it will return nil and CorruptInputError. 210 | func (enc *Encoding) DecodeToBuf(dst []byte, src []byte) ([]byte, error) { 211 | if len(src) == 0 { 212 | return []byte{}, nil 213 | } 214 | oldCap, oldLen := cap(dst), len(dst) 215 | possibleLen := len(src)*6/8 + 1 216 | if oldCap < oldLen+possibleLen { 217 | newBuf := make([]byte, oldLen, oldLen+possibleLen) 218 | copy(newBuf, dst) 219 | dst = newBuf 220 | } 221 | dec := decoder(src) 222 | idx, err := dec.decode(dst[oldLen:cap(dst)], enc.decodeMap[:]) 223 | if err != nil { 224 | return nil, err 225 | } 226 | if idx != 0 { 227 | copy(dst[oldLen:cap(dst)], dst[oldLen+idx:cap(dst)]) 228 | } 229 | dst = dst[:cap(dst)-idx] 230 | return dst, nil 231 | } 232 | 233 | type decoder []byte 234 | 235 | func (dec decoder) decode(dst []byte, decTable []byte) (int, error) { 236 | idx := len(dst) 237 | pos := byte(0) 238 | b := 0 239 | for i, c := range dec { 240 | x := decTable[c] 241 | if x == 0xFF { 242 | return 0, CorruptInputError(i) 243 | } 244 | if i == len(dec)-1 { 245 | b |= int(x) << pos 246 | pos += byte(bits.Len8(x)) 247 | } else if x&compactMask == compactMask { 248 | b |= int(x) << pos 249 | pos += 5 250 | } else { 251 | b |= int(x) << pos 252 | pos += 6 253 | } 254 | if pos >= 8 { 255 | idx-- 256 | dst[idx] = byte(b) 257 | pos %= 8 258 | b >>= 8 259 | } 260 | } 261 | if pos > 0 { 262 | idx-- 263 | dst[idx] = byte(b) 264 | } 265 | return idx, nil 266 | } 267 | 268 | func b2s(b []byte) string { 269 | return *(*string)(unsafe.Pointer(&b)) 270 | } 271 | 272 | func s2b(s string) (b []byte) { 273 | sh := (*reflect.StringHeader)(unsafe.Pointer(&s)) 274 | bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) 275 | bh.Data = sh.Data 276 | bh.Len = sh.Len 277 | bh.Cap = sh.Len 278 | return 279 | } 280 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # base62 2 | 3 | [![GoDoc](https://img.shields.io/badge/api-Godoc-blue.svg)][godoc] 4 | [![Go Report Card](https://goreportcard.com/badge/github.com/jxskiss/base62)][goreport] 5 | [![Issues](https://img.shields.io/github/issues/jxskiss/base62.svg)][issues] 6 | [![GitHub release](http://img.shields.io/github/release/jxskiss/base62.svg)][release] 7 | [![MIT License](http://img.shields.io/badge/license-MIT-blue.svg)][license] 8 | 9 | [godoc]: https://pkg.go.dev/github.com/jxskiss/base62 10 | [goreport]: https://goreportcard.com/report/github.com/jxskiss/base62 11 | [issues]: https://github.com/jxskiss/base62/issues 12 | [release]: https://github.com/jxskiss/base62/releases 13 | [license]: https://github.com/jxskiss/base62/blob/master/LICENSE 14 | 15 | base62 is a correctly implemented, compact and fast implementation of `Base62` encoding/decoding algorithm. 16 | It is inspired by the [java implementation by glowfall](https://github.com/glowfall/base62). 17 | 18 | This `Base62` implementation can encode/decode both integers and bytes of arbitrary length, 19 | the correctness is tested using a large number of randomly generated bytes. 20 | 21 | It is much faster than `big.Int` based implementation and is not much slower than typical `Base64` 22 | implementations. See the benchmark results below. 23 | 24 | ## Why Base62 25 | 26 | In comparison with Base64/Base32, Base62 is more friendly to human: 27 | 28 | - it contains only alpha-numerical symbols, no special characters 29 | - can be validated by eyes and more simple regexp 30 | - can be fully selected by mouse double-click in any text editors and browser address bar 31 | - it's compact and generates shorter strings than Base32 32 | - it's the most natural and unambiguous way to encode your data in human-readable form :) 33 | 34 | Variations of Base62 algorithm cann be widely used to represent authentication data in printable and 35 | easy-copyable form, for example to encode OAuth 2.0 `access_token` data. 36 | 37 | ## Usage 38 | 39 | ```go 40 | // Basic usage. 41 | Encode(src []byte) []byte 42 | EncodeToString(src []byte) string 43 | Decode(src []byte) ([]byte, error) 44 | DecodeString(src string) ([]byte, error) 45 | FormatInt(num int64) []byte 46 | FormatUint(num uint64) []byte 47 | ParseInt(src []byte) (int64, error) 48 | ParseUint(src []byte) (uint64, error) 49 | 50 | // Providing a dst buffer, you may reuse buffers to reduce memory allocation. 51 | EncodeToBuf(dst []byte, src []byte) []byte 52 | DecodeToBuf(dst []byte, src []byte) ([]byte, error) 53 | AppendInt(dst []byte, num int64) []byte 54 | AppendUint(dst []byte, num uint64) []byte 55 | 56 | // Or you may use a custom encoding alphabet. 57 | enc := NewEncoding("...my-62-byte-string-alphabet...") 58 | enc.XXX() 59 | ``` 60 | 61 | ## Benchmark 62 | 63 | ```text 64 | Benchmark_Encode-12 11654754 97.41 ns/op 65 | Benchmark_Decode-12 15481666 73.60 ns/op 66 | Benchmark_EncodeToString-12 11950086 99.54 ns/op 67 | Benchmark_DecodeString-12 16301325 74.36 ns/op 68 | 69 | Benchmark_EncodeToBuf-12 13855840 84.68 ns/op 70 | Benchmark_DecodeToBuf-12 97695962 12.21 ns/op 71 | 72 | Benchmark_EncodeInteger-12 29119437 41.30 ns/op 73 | Benchmark_DecodeInteger-12 120328183 9.917 ns/op 74 | 75 | Benchmark_Encode_BigInt-12 1000000 1048 ns/op 76 | 77 | Benchmark_Base64_EncodeToString-12 17803440 70.12 ns/op 78 | Benchmark_Base64_DecodeString-12 19884616 55.09 ns/op 79 | 80 | Benchmark_Base64_Encode-12 68163142 17.93 ns/op 81 | Benchmark_Base64_Decode-12 41990004 28.25 ns/op 82 | ``` 83 | 84 | ## How it works 85 | 86 | Encoding Base64 and Base32 both are bit-aligned, 6 bits for Base64 (2^6 = 64) 87 | and 5 bits for Base32 (2^5 = 32), but Base62 is not bit-aligned, it's inefficient 88 | to do divmod operation for non-bit-aligned integers, typical Base62 implementations 89 | are BigInt based, which encodes input data block by block to get better performance, 90 | (e.g. https://github.com/keybase/saltpack/blob/master/encoding/basex). 91 | 92 | 64 characters can fully represent 6 bits, but 62 characters can not, if each character 93 | represents 5 bits, that is the Base32 encoding. 94 | 95 | A naive BigInt based algorithm gives the shortest result, which is roughly like this 96 | (e.g. https://github.com/eknkc/basex/blob/6baac8ea8b19cc66d125286d213770fec0691867/basex.go#L46): 97 | 98 | ```go 99 | digits := []int{0} 100 | for i := 0; i < len(src); i++ { 101 | carry := int(src[i]) 102 | 103 | for j := 0; j < len(digits); j++ { 104 | carry += digits[j] << 8 105 | digits[j] = carry % e.base 106 | carry = carry / e.base 107 | } 108 | 109 | for carry > 0 { 110 | digits = append(digits, carry%e.base) 111 | carry = carry / e.base 112 | } 113 | } 114 | // map to encoding alphabet 115 | _ = digits 116 | ``` 117 | 118 | This works but the time complexity is O(n^2) where n is the length of src. If the 119 | input can be very large, the cost is unacceptable. 120 | 121 | Improved algorithm splits the input into blocks, which reduce the time complexity to 122 | O(kn) where k is the block size, and n is the length of src, it generates slightly 123 | longer result than naive BigInt algorithm, but it is worth for the reduced time 124 | complexity. When k is n, it degrades to the naive algorithm, when k is 1, the output 125 | length is twice of the input, we don't want that. 32 is chosen as the block size 126 | in library [saltpack/encoding/basex](https://github.com/keybase/saltpack/blob/master/encoding/basex). 127 | 128 | Inspired by the [java implementation by glowfall](https://github.com/glowfall/base62), 129 | this library is not BigInt based, it encodes and decodes any arbitrary bytes in O(n) 130 | time complexity. Here is a brief description of the algorithm. 131 | 132 | This library uses a variadic length encoding, for each 6 bits, if the value is in range 133 | [0, 62), it can be directly map to the 62 characters, if the value is 62 or 63 which 134 | exceeds 61, we turn to encoding just the lower 5 bits. If we find a way to recognize 135 | the 5 bits pattern, then we can correctly decode it back to the source data. 136 | 137 | The binary representation of 62 and 63 is: 138 | 139 | 62 - 0b_0011_1110 140 | 63 - 0b_0011_1111 141 | 142 | They have a common mask `0b_0011_1110`, in range [0, 62), there are another two integers 143 | have a similar mask `0b_0001_1110`, 30 and 31, which is: 144 | 145 | 30 - 0b_0001_1110 146 | 31 - 0b_0001_1111 147 | 148 | The four integers 30, 31, 62, 63 share a common mask `0b_0001_1110`, while all other 149 | integers in range [0, 64) don't share the mask, i.e. for all other integers, the 150 | expression `value & 0b_0001_1110 == 0b_0001_1110` evaluates to false. 151 | 152 | **This is the key point!** 153 | 154 | We define a `compactMask` as `0b_0001_1110`. 155 | 156 | When encoding, for each 6 bits integer x, if `x & compactMask == compactMask` is true, 157 | it must be one of 30, 31, 62 or 63, we just encode the lower 5 bits, which are 158 | `0b_0001_1110` (30) and `0b_0001_1111` (31), we leave the 6-th bit to next byte. 159 | 160 | When decoding, for each encoded byte x, we check `x & compactMask == compactMask`, when 161 | it is false, we know that the byte represents 6 bits in the raw data, it is in range 162 | [0, 30) or [32, 62), else it represents 5 bits in the raw data, it is 30 or 31. 163 | 164 | That is it, by using variadic length encoding, we successfully limit the value range 165 | to [0, 62), we get very compact result and a simple O(n) time complexity for encoding 166 | and decoding data of arbitrary length. 167 | 168 | ## Compatibility 169 | 170 | This library guarantees that it can correctly decode data encoded by itself. 171 | 172 | The encoded result is not compatible with BigInt based algorithm, 173 | (e.g. [saltpack/encoding/basex, GMP and GnuPG](https://github.com/jxskiss/base62/issues/2)). 174 | 175 | The algorithm may be ported to other languages in future (if someone does a porting, 176 | I'm glad to link it here :-) ). 177 | 178 | ## Changelog 179 | 180 | ### v1.1.0 - 2022/1/3 181 | 182 | 1. Refactor the encoding code to be simpler and cleaner, as a bonus, it gives better 183 | performance which is 1.5X~ faster than the old. 184 | 2. Add a brief description of the algorithm and state the compatibility with other 185 | Base62 implementations. 186 | 187 | ### v1.0.0 - 2021/10/23 188 | 189 | First stable release, the package has been used in several small and medium projects. 190 | 191 | This release adds new APIs which help to reuse buffers to reduce memory allocation. 192 | --------------------------------------------------------------------------------