├── .github ├── fuzzing-linux-x64.yml └── push-check-linux-x64.yml ├── .gitignore ├── .gitmodules ├── LICENSE ├── Makefile ├── README.md ├── base64x.go ├── base64x_test.go ├── cpuid.go ├── faststr.go ├── fuzz_test.go ├── go.mod ├── go.sum ├── native └── native.c ├── native_amd64.go ├── native_subr_amd64.go └── native_text_amd64.go /.github/fuzzing-linux-x64.yml: -------------------------------------------------------------------------------- 1 | name: Fuzz Linux-X64 2 | 3 | on: pull_request 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - name: Clear repository 10 | run: sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE 11 | 12 | - uses: actions/checkout@v2 13 | 14 | - name: Check Branch 15 | run: ./check_branch_name.sh ${{ github.head_ref }} 16 | 17 | - name: Set up Go 18 | uses: actions/setup-go@v2 19 | with: 20 | go-version: 1.18 21 | 22 | - uses: actions/cache@v2 23 | with: 24 | path: ~/go/pkg/mod 25 | key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} 26 | restore-keys: | 27 | ${{ runner.os }}-go- 28 | 29 | - name: Fuzz sonic 30 | run: go test -fuzz . -run ^$ -fuzztime 5m -------------------------------------------------------------------------------- /.github/push-check-linux-x64.yml: -------------------------------------------------------------------------------- 1 | name: Push Check Linux-X64 2 | 3 | on: push 4 | 5 | jobs: 6 | build: 7 | strategy: 8 | matrix: 9 | go-version: [1.16.x, 1.17.x, 1.19.x, 1.20.x] 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Clear repository 13 | run: sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE 14 | 15 | - uses: actions/checkout@v2 16 | with: 17 | fetch-depth: 0 18 | 19 | - name: Set up Go 20 | uses: actions/setup-go@v2 21 | with: 22 | go-version: ${{ matrix.go-version }} 23 | 24 | - uses: actions/cache@v2 25 | with: 26 | path: ~/go/pkg/mod 27 | key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} 28 | restore-keys: | 29 | ${{ runner.os }}-go- 30 | 31 | - name: Unit Test 32 | run: | 33 | go test -race -covermode=atomic -coverprofile=coverage.txt ./... 34 | 35 | - name: Codecov 36 | run: bash <(curl -s https://codecov.io/bash) 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.swp 3 | *.swm 4 | *.swn 5 | *.a 6 | *.so 7 | _obj 8 | _test 9 | *.[568vq] 10 | [568vq].out 11 | *.cgo1.go 12 | *.cgo2.c 13 | _cgo_defun.c 14 | _cgo_gotypes.go 15 | _cgo_export.* 16 | _testmain.go 17 | *.exe 18 | *.exe~ 19 | *.test 20 | *.prof 21 | *.rar 22 | *.zip 23 | *.gz 24 | *.psd 25 | *.bmd 26 | *.cfg 27 | *.pptx 28 | *.log 29 | *nohup.out 30 | *settings.pyc 31 | *.sublime-project 32 | *.sublime-workspace 33 | .DS_Store 34 | /.idea/ 35 | /.vscode/ 36 | /output/ 37 | /vendor/ 38 | /Gopkg.lock 39 | /Gopkg.toml 40 | coverage.html 41 | coverage.out 42 | coverage.xml 43 | junit.xml 44 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tools/asm2asm"] 2 | path = tools/asm2asm 3 | url = https://github.com/chenzhuoyu/asm2asm 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all clean 2 | 3 | CFLAGS := -mavx 4 | CFLAGS += -mavx2 5 | CFLAGS += -mno-bmi 6 | CFLAGS += -mno-red-zone 7 | CFLAGS += -fno-asynchronous-unwind-tables 8 | CFLAGS += -fno-stack-protector 9 | CFLAGS += -fno-exceptions 10 | CFLAGS += -fno-builtin 11 | CFLAGS += -fno-rtti 12 | CFLAGS += -nostdlib 13 | CFLAGS += -O3 14 | 15 | NATIVE_ASM := $(wildcard native/*.S) 16 | NATIVE_SRC := $(wildcard native/*.h) 17 | NATIVE_SRC += $(wildcard native/*.c) 18 | 19 | all: native_amd64.s 20 | 21 | clean: 22 | rm -vf native_text_amd64.go native_subr_amd64.go output/*.s 23 | 24 | native_amd64.s: ${NATIVE_SRC} ${NATIVE_ASM} native_amd64.go 25 | mkdir -p output 26 | clang ${CFLAGS} -S -o output/native.s native/native.c 27 | python3 tools/asm2asm/asm2asm.py -r native_amd64.go output/native.s ${NATIVE_ASM} 28 | awk '{gsub(/Text__native_entry__/, "text__native_entry__")}1' native_text_amd64.go > native_text_amd64.go.tmp && mv native_text_amd64.go.tmp native_text_amd64.go 29 | awk '{gsub(/Funcs/, "funcs")}1' native_subr_amd64.go > native_subr_amd64.go.tmp && mv native_subr_amd64.go.tmp native_subr_amd64.go 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # base64x 2 | 3 | High performance drop-in replacement of the `encoding/base64` library. 4 | 5 | -------------------------------------------------------------------------------- /base64x.go: -------------------------------------------------------------------------------- 1 | package base64x 2 | 3 | import ( 4 | `encoding/base64` 5 | ) 6 | 7 | // An Encoding is a radix 64 encoding/decoding scheme, defined by a 8 | // 64-character alphabet. The most common encoding is the "base64" 9 | // encoding defined in RFC 4648 and used in MIME (RFC 2045) and PEM 10 | // (RFC 1421). RFC 4648 also defines an alternate encoding, which is 11 | // the standard encoding with - and _ substituted for + and /. 12 | type Encoding int 13 | 14 | const ( 15 | _MODE_URL = 1 << 0 16 | _MODE_RAW = 1 << 1 17 | _MODE_AVX2 = 1 << 2 18 | _MODE_JSON = 1 << 3 19 | ) 20 | 21 | // StdEncoding is the standard base64 encoding, as defined in 22 | // RFC 4648. 23 | const StdEncoding Encoding = 0 24 | 25 | // URLEncoding is the alternate base64 encoding defined in RFC 4648. 26 | // It is typically used in URLs and file names. 27 | const URLEncoding Encoding = _MODE_URL 28 | 29 | // RawStdEncoding is the standard raw, unpadded base64 encoding, 30 | // as defined in RFC 4648 section 3.2. 31 | // 32 | // This is the same as StdEncoding but omits padding characters. 33 | const RawStdEncoding Encoding = _MODE_RAW 34 | 35 | // RawURLEncoding is the unpadded alternate base64 encoding defined in RFC 4648. 36 | // It is typically used in URLs and file names. 37 | // 38 | // This is the same as URLEncoding but omits padding characters. 39 | const RawURLEncoding Encoding = _MODE_RAW | _MODE_URL 40 | 41 | // JSONStdEncoding is the StdEncoding and encoded as JSON string as RFC 8259. 42 | const JSONStdEncoding Encoding = _MODE_JSON; 43 | 44 | var ( 45 | archFlags = 0 46 | ) 47 | 48 | /** Encoder Functions **/ 49 | 50 | // Encode encodes src using the specified encoding, writing 51 | // EncodedLen(len(src)) bytes to out. 52 | // 53 | // The encoding pads the output to a multiple of 4 bytes, 54 | // so Encode is not appropriate for use on individual blocks 55 | // of a large data stream. 56 | // 57 | // If out is not large enough to contain the encoded result, 58 | // it will panic. 59 | func (self Encoding) Encode(out []byte, src []byte) { 60 | if len(src) != 0 { 61 | if buf := out[:0:len(out)]; self.EncodedLen(len(src)) <= len(out) { 62 | self.EncodeUnsafe(&buf, src) 63 | } else { 64 | panic("encoder output buffer is too small") 65 | } 66 | } 67 | } 68 | 69 | // EncodeUnsafe behaves like Encode, except it does NOT check if 70 | // out is large enough to contain the encoded result. 71 | // 72 | // It will also update the length of out. 73 | func (self Encoding) EncodeUnsafe(out *[]byte, src []byte) { 74 | b64encode(out, &src, int(self) | archFlags) 75 | } 76 | 77 | // EncodeToString returns the base64 encoding of src. 78 | func (self Encoding) EncodeToString(src []byte) string { 79 | nbs := len(src) 80 | ret := make([]byte, 0, self.EncodedLen(nbs)) 81 | 82 | /* encode in native code */ 83 | self.EncodeUnsafe(&ret, src) 84 | return mem2str(ret) 85 | } 86 | 87 | // EncodedLen returns the length in bytes of the base64 encoding 88 | // of an input buffer of length n. 89 | func (self Encoding) EncodedLen(n int) int { 90 | if (self & _MODE_RAW) == 0 { 91 | return (n + 2) / 3 * 4 92 | } else { 93 | return (n * 8 + 5) / 6 94 | } 95 | } 96 | 97 | /** Decoder Functions **/ 98 | 99 | // Decode decodes src using the encoding enc. It writes at most 100 | // DecodedLen(len(src)) bytes to out and returns the number of bytes 101 | // written. If src contains invalid base64 data, it will return the 102 | // number of bytes successfully written and base64.CorruptInputError. 103 | // 104 | // New line characters (\r and \n) are ignored. 105 | // 106 | // If out is not large enough to contain the encoded result, 107 | // it will panic. 108 | func (self Encoding) Decode(out []byte, src []byte) (int, error) { 109 | if len(src) == 0 { 110 | return 0, nil 111 | } else if buf := out[:0:len(out)]; self.DecodedLen(len(src)) <= len(out) { 112 | return self.DecodeUnsafe(&buf, src) 113 | } else { 114 | panic("decoder output buffer is too small") 115 | } 116 | } 117 | 118 | // DecodeUnsafe behaves like Decode, except it does NOT check if 119 | // out is large enough to contain the decoded result. 120 | // 121 | // It will also update the length of out. 122 | func (self Encoding) DecodeUnsafe(out *[]byte, src []byte) (int, error) { 123 | if n := b64decode(out, mem2addr(src), len(src), int(self) | archFlags); n >= 0 { 124 | return n, nil 125 | } else { 126 | return 0, base64.CorruptInputError(-n - 1) 127 | } 128 | } 129 | 130 | // DecodeString returns the bytes represented by the base64 string s. 131 | func (self Encoding) DecodeString(s string) ([]byte, error) { 132 | src := str2mem(s) 133 | ret := make([]byte, 0, self.DecodedLen(len(s))) 134 | 135 | /* decode into the allocated buffer */ 136 | if _, err := self.DecodeUnsafe(&ret, src); err != nil { 137 | return nil, err 138 | } else { 139 | return ret, nil 140 | } 141 | } 142 | 143 | // DecodedLen returns the maximum length in bytes of the decoded data 144 | // corresponding to n bytes of base64-encoded data. 145 | func (self Encoding) DecodedLen(n int) int { 146 | if (self & _MODE_RAW) == 0 { 147 | return n / 4 * 3 148 | } else { 149 | return n * 6 / 8 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /base64x_test.go: -------------------------------------------------------------------------------- 1 | package base64x 2 | 3 | import ( 4 | `crypto/rand` 5 | `encoding/base64` 6 | `io` 7 | `reflect` 8 | `strings` 9 | `testing` 10 | `unsafe` 11 | ) 12 | 13 | type TestPair struct { 14 | decoded string 15 | encoded string 16 | } 17 | 18 | type EncodingTest struct { 19 | enc Encoding // Encoding to test 20 | conv func(string) string // Reference string converter 21 | } 22 | 23 | var pairs = []TestPair{ 24 | // RFC 3548 examples 25 | {"\x14\xfb\x9c\x03\xd9\x7e", "FPucA9l+"}, 26 | {"\x14\xfb\x9c\x03\xd9", "FPucA9k="}, 27 | {"\x14\xfb\x9c\x03", "FPucAw=="}, 28 | 29 | // RFC 4648 examples 30 | {"", ""}, 31 | {"f", "Zg=="}, 32 | {"fo", "Zm8="}, 33 | {"foo", "Zm9v"}, 34 | {"foob", "Zm9vYg=="}, 35 | {"fooba", "Zm9vYmE="}, 36 | {"foobar", "Zm9vYmFy"}, 37 | 38 | // Wikipedia examples 39 | {"sure.", "c3VyZS4="}, 40 | {"sure", "c3VyZQ=="}, 41 | {"sur", "c3Vy"}, 42 | {"su", "c3U="}, 43 | {"leasure.", "bGVhc3VyZS4="}, 44 | {"easure.", "ZWFzdXJlLg=="}, 45 | {"asure.", "YXN1cmUu"}, 46 | {"sure.", "c3VyZS4="}, 47 | 48 | // Relatively long strings 49 | { 50 | "Twas brillig, and the slithy toves", 51 | "VHdhcyBicmlsbGlnLCBhbmQgdGhlIHNsaXRoeSB0b3Zlcw==", 52 | }, { 53 | "\x9dyH\xd2Y\x9e^e\x9e\xb1\x9a\x9a\x12\xfe\x8a\x07\xc7\x07\xcc\xe8l\x81" + 54 | "\xf2\xd9\xe3\x89\xb5\x98\xee\xbd\x8etQ`2>\\t:_\xd7w\xe6\xb5\x96\xc7\xff\x9c", 55 | "nXlI0lmeXmWesZqaEv6KB8cHzOhsgfLZ44m1mO69jnRRYDI+XHQ6X9d35rWWx/+c", 56 | }, 57 | } 58 | 59 | var crlf_pairs = []TestPair{ 60 | // RFC 3548 examples 61 | {"\x14\xfb\x9c\x03\xd9\x7e", "FPuc\r\nA9l+"}, 62 | {"\x14\xfb\x9c\x03\xd9", "FP\r\r\r\rucA9k="}, 63 | {"\x14\xfb\x9c\x03", "\r\nFPucAw=\r=\n"}, 64 | 65 | // RFC 4648 examples 66 | {"", "\r"}, 67 | {"f", "Zg\r\n=="}, 68 | {"fo", "Zm\r\n8="}, 69 | {"fooba", "Zm\r\n9vY\r\nmE="}, 70 | 71 | // Wikipedia examples 72 | {"su", "c3U\r="}, 73 | {"leasure.", "bGVhc3VyZ\nS4="}, 74 | {"easure.", "ZW\r\nFzdXJlLg=\r=\r\n"}, 75 | {"asure.", "YXN1cmUu"}, 76 | {"sure.", "c3VyZ\r\nS4="}, 77 | 78 | // Relatively long strings 79 | { 80 | "Twas brillig, and the slithy toves", 81 | "VHdhcyBicmlsbGlnLCBhbmQgdGhlIHNsaXRoeSB0b3Zlcw\r\n==\r\n", 82 | }, { 83 | "\x9dyH\xd2Y\x9e^e\x9e\xb1\x9a\x9a\x12\xfe\x8a\x07\xc7\x07\xcc\xe8l\x81" + 84 | "\xf2\xd9\xe3\x89\xb5\x98\xee\xbd\x8etQ`2>\\t:_\xd7w\xe6\xb5\x96\xc7\xff\x9c", 85 | "nXlI0lmeXmWesZqaEv6KB8cHzOhsg\r\nfLZ44m1mO69jnRRYDI+XH\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\nQ6X9d35rWWx/\r\n+c", 86 | }, 87 | } 88 | 89 | var json_pairs = []TestPair{ 90 | // RFC 3548 examples 91 | {"\x14\xfb\x9c\x03\xd9\x7e", `FPu\rcA9l+\n`}, 92 | {"\x14\xfb\x9c\x03\xd9\x7e", `FPuc\u00419l+`}, 93 | {"\x14\xfb\x9c\x03\xd9", `FPucA9k\u003d`}, 94 | {"\x14\xfb\x9c\x03\xd9", `FPucA\u0039k\u003d`}, 95 | {"\x14\xfb\x9c\x03", `FPucAw\u003d\u003d`}, 96 | 97 | // RFC 4648 examples 98 | {"", ""}, 99 | {"f", "Zg=="}, 100 | {"fo", "Zm8="}, 101 | {"foo", "Zm9v"}, 102 | {"foob", "Zm9vYg=="}, 103 | {"fooba", "Zm9vYmE="}, 104 | {"foobar", "Zm9vYmFy"}, 105 | 106 | // Wikipedia examples 107 | {"sure.", "c3VyZS4="}, 108 | {"sure", "c3VyZQ=="}, 109 | {"sur", "c3Vy"}, 110 | {"su", "c3U="}, 111 | {"leasure.", "bGVhc3VyZS4="}, 112 | {"easure.", "ZWFzdXJlLg=="}, 113 | {"asure.", "YXN1cmUu"}, 114 | {"sure.", "c3VyZS4="}, 115 | 116 | // Relatively long strings 117 | { 118 | "Twas brillig, and the slithy toves", 119 | "VHdhcyBicmlsbGlnLCBhbmQgdGhlIHNsaXRoeSB0b3Zlcw==", 120 | }, { 121 | "\x9dyH\xd2Y\x9e^e\x9e\xb1\x9a\x9a\x12\xfe\x8a\x07\xc7\x07\xcc\xe8l\x81" + 122 | "\xf2\xd9\xe3\x89\xb5\x98\xee\xbd\x8etQ`2>\\t:_\xd7w\xe6\xb5\x96\xc7\xff\x9c", 123 | `nXlI0lmeXmWesZqaEv6KB8cHzOhsgfLZ44m1mO\u0036\u0039jnRRYDI+XHQ6X9d35rWWx\/+c`, 124 | }, 125 | } 126 | 127 | // Do nothing to a reference base64 string (leave in standard format) 128 | func stdRef(ref string) string { 129 | return ref 130 | } 131 | 132 | // Convert a reference string to URL-encoding 133 | func urlRef(ref string) string { 134 | ref = strings.ReplaceAll(ref, "+", "-") 135 | ref = strings.ReplaceAll(ref, "/", "_") 136 | return ref 137 | } 138 | 139 | // Convert a reference string to raw, unpadded format 140 | func rawRef(ref string) string { 141 | return strings.ReplaceAll(ref, "=", "") 142 | } 143 | 144 | // Both URL and unpadding conversions 145 | func rawURLRef(ref string) string { 146 | return rawRef(urlRef(ref)) 147 | } 148 | 149 | var encodingTests = []EncodingTest{ 150 | {StdEncoding, stdRef}, 151 | {URLEncoding, urlRef}, 152 | {RawStdEncoding, rawRef}, 153 | {RawURLEncoding, rawURLRef}, 154 | } 155 | 156 | func testEqual(t *testing.T, msg string, args ...interface{}) bool { 157 | t.Helper() 158 | if args[len(args) - 2] != args[len(args) - 1] { 159 | t.Errorf(msg, args...) 160 | return false 161 | } 162 | return true 163 | } 164 | 165 | func TestEncoderRecover(t *testing.T) { 166 | t.Run("nil dst", func(t *testing.T) { 167 | in := []byte("abc") 168 | defer func(){ 169 | if v := recover(); v != nil { 170 | println("recover:", v) 171 | } else { 172 | t.Fatal("not recover") 173 | } 174 | }() 175 | b64encode(nil, &in, int(StdEncoding)) 176 | }) 177 | t.Run("nil src", func(t *testing.T) { 178 | in := []byte("abc") 179 | (*reflect.SliceHeader)(unsafe.Pointer(&in)).Data = uintptr(0) 180 | out := make([]byte, 0, 10) 181 | defer func(){ 182 | if v := recover(); v != nil { 183 | println("recover:", v) 184 | } else { 185 | t.Fatal("not recover") 186 | } 187 | }() 188 | b64encode(&out, &in, int(StdEncoding)) 189 | }) 190 | } 191 | 192 | func TestEncoder(t *testing.T) { 193 | for _, p := range pairs { 194 | for _, tt := range encodingTests { 195 | got := tt.enc.EncodeToString([]byte(p.decoded)) 196 | testEqual(t, "Encode(%q) = %q, want %q", p.decoded, got, tt.conv(p.encoded)) 197 | } 198 | } 199 | } 200 | 201 | func benchmarkStdlibWithSize(b *testing.B, nb int) { 202 | buf := make([]byte, nb) 203 | dst := make([]byte, base64.StdEncoding.EncodedLen(nb)) 204 | _, _ = io.ReadFull(rand.Reader, buf) 205 | b.SetBytes(int64(nb)) 206 | b.ResetTimer() 207 | b.RunParallel(func(pb *testing.PB) { 208 | for pb.Next() { 209 | base64.StdEncoding.Encode(dst, buf) 210 | } 211 | }) 212 | } 213 | 214 | func benchmarkBase64xWithSize(b *testing.B, nb int) { 215 | buf := make([]byte, nb) 216 | dst := make([]byte, StdEncoding.EncodedLen(nb)) 217 | _, _ = io.ReadFull(rand.Reader, buf) 218 | b.SetBytes(int64(nb)) 219 | b.ResetTimer() 220 | b.RunParallel(func(pb *testing.PB) { 221 | for pb.Next() { 222 | StdEncoding.Encode(dst, buf) 223 | } 224 | }) 225 | } 226 | 227 | func BenchmarkEncoderStdlib_16B (b *testing.B) { benchmarkStdlibWithSize(b, 16) } 228 | func BenchmarkEncoderStdlib_56B (b *testing.B) { benchmarkStdlibWithSize(b, 56) } 229 | func BenchmarkEncoderStdlib_128B (b *testing.B) { benchmarkStdlibWithSize(b, 128) } 230 | func BenchmarkEncoderStdlib_4kB (b *testing.B) { benchmarkStdlibWithSize(b, 4 * 1024) } 231 | func BenchmarkEncoderStdlib_256kB (b *testing.B) { benchmarkStdlibWithSize(b, 256 * 1024) } 232 | func BenchmarkEncoderStdlib_1MB (b *testing.B) { benchmarkStdlibWithSize(b, 1024 * 1024) } 233 | 234 | func BenchmarkEncoderBase64x_16B (b *testing.B) { benchmarkBase64xWithSize(b, 16) } 235 | func BenchmarkEncoderBase64x_56B (b *testing.B) { benchmarkBase64xWithSize(b, 56) } 236 | func BenchmarkEncoderBase64x_128B (b *testing.B) { benchmarkBase64xWithSize(b, 128) } 237 | func BenchmarkEncoderBase64x_4kB (b *testing.B) { benchmarkBase64xWithSize(b, 4 * 1024) } 238 | func BenchmarkEncoderBase64x_256kB (b *testing.B) { benchmarkBase64xWithSize(b, 256 * 1024) } 239 | func BenchmarkEncoderBase64x_1MB (b *testing.B) { benchmarkBase64xWithSize(b, 1024 * 1024) } 240 | 241 | func TestDecoder(t *testing.T) { 242 | for _, p := range pairs { 243 | for _, tt := range encodingTests { 244 | encoded := tt.conv(p.encoded) 245 | dbuf := make([]byte, tt.enc.DecodedLen(len(encoded))) 246 | count, err := tt.enc.Decode(dbuf, []byte(encoded)) 247 | testEqual(t, "Decode(%q) = error %v, want %v", encoded, err, error(nil)) 248 | testEqual(t, "Decode(%q) = length %v, want %v", encoded, count, len(p.decoded)) 249 | testEqual(t, "Decode(%q) = %q, want %q", encoded, string(dbuf[0:count]), p.decoded) 250 | 251 | dbuf, err = tt.enc.DecodeString(encoded) 252 | testEqual(t, "DecodeString(%q) = error %v, want %v", encoded, err, error(nil)) 253 | testEqual(t, "DecodeString(%q) = %q, want %q", encoded, string(dbuf), p.decoded) 254 | } 255 | } 256 | } 257 | 258 | func TestDecoderRecover(t *testing.T) { 259 | t.Run("nil dst", func(t *testing.T) { 260 | in := []byte("abc") 261 | defer func(){ 262 | if v := recover(); v != nil { 263 | println("recover:", v) 264 | } else { 265 | t.Fatal("not recover") 266 | } 267 | }() 268 | b64decode(nil, unsafe.Pointer(&in[0]), len(in), int(StdEncoding)) 269 | }) 270 | t.Run("nil src", func(t *testing.T) { 271 | out := make([]byte, 0, 10) 272 | defer func(){ 273 | if v := recover(); v != nil { 274 | println("recover:", v) 275 | } else { 276 | t.Fatal("not recover") 277 | } 278 | }() 279 | b64decode(&out, nil, 5, int(StdEncoding)) 280 | }) 281 | } 282 | 283 | func TestDecoderCRLF(t *testing.T) { 284 | for _, p := range crlf_pairs { 285 | for _, tt := range encodingTests { 286 | encoded := tt.conv(p.encoded) 287 | dbuf := make([]byte, tt.enc.DecodedLen(len(encoded))) 288 | count, err := tt.enc.Decode(dbuf, []byte(encoded)) 289 | testEqual(t, "Decode(%q) = error %v, want %v", encoded, err, error(nil)) 290 | testEqual(t, "Decode(%q) = length %v, want %v", encoded, count, len(p.decoded)) 291 | testEqual(t, "Decode(%q) = %q, want %q", encoded, string(dbuf[0:count]), p.decoded) 292 | 293 | dbuf, err = tt.enc.DecodeString(encoded) 294 | testEqual(t, "DecodeString(%q) = error %v, want %v", encoded, err, error(nil)) 295 | testEqual(t, "DecodeString(%q) = %q, want %q", encoded, string(dbuf), p.decoded) 296 | } 297 | } 298 | } 299 | 300 | func TestDecoderJSON(t *testing.T) { 301 | for _, p := range json_pairs { 302 | encoded := p.encoded 303 | dbuf := make([]byte, JSONStdEncoding.DecodedLen(len(encoded))) 304 | count, err := JSONStdEncoding.Decode(dbuf, []byte(encoded)) 305 | testEqual(t, "Decode(%q) = error %v, want %v", encoded, err, error(nil)) 306 | testEqual(t, "Decode(%q) = length %v, want %v", encoded, count, len(p.decoded)) 307 | testEqual(t, "Decode(%q) = %q, want %q", encoded, string(dbuf[0:count]), p.decoded) 308 | 309 | dbuf, err = JSONStdEncoding.DecodeString(encoded) 310 | testEqual(t, "DecodeString(%q) = error %v, want %v", encoded, err, error(nil)) 311 | testEqual(t, "DecodeString(%q) = %q, want %q", encoded, string(dbuf), p.decoded) 312 | } 313 | } 314 | 315 | func TestDecoderError(t *testing.T) { 316 | _, err := StdEncoding.DecodeString("!aGVsbG8sIHdvcmxk") 317 | if err != base64.CorruptInputError(0) { 318 | panic(err) 319 | } 320 | _, err = StdEncoding.DecodeString("aGVsbG8!sIHdvcmxk") 321 | if err != base64.CorruptInputError(7) { 322 | panic(err) 323 | } 324 | _, err = StdEncoding.DecodeString("123456") 325 | if err != base64.CorruptInputError(6) { 326 | panic(err) 327 | } 328 | _, err = StdEncoding.DecodeString("1234;6") 329 | if err != base64.CorruptInputError(4) { 330 | panic(err) 331 | } 332 | _, err = StdEncoding.DecodeString("F\xaa\xaa\xaa\xaaDDDDDDDDDDDDD//z") 333 | if err != base64.CorruptInputError(1) { 334 | panic(err) 335 | } 336 | } 337 | 338 | func benchmarkStdlibDecoder(b *testing.B, v string) { 339 | src := []byte(v) 340 | dst := make([]byte, base64.StdEncoding.DecodedLen(len(v))) 341 | b.SetBytes(int64(len(v))) 342 | b.ResetTimer() 343 | b.RunParallel(func(pb *testing.PB) { 344 | for pb.Next() { 345 | _, _ = base64.StdEncoding.Decode(dst, src) 346 | } 347 | }) 348 | } 349 | 350 | func benchmarkBase64xDecoder(b *testing.B, v string) { 351 | src := []byte(v) 352 | dst := make([]byte, StdEncoding.DecodedLen(len(v))) 353 | b.SetBytes(int64(len(v))) 354 | b.ResetTimer() 355 | b.RunParallel(func(pb *testing.PB) { 356 | for pb.Next() { 357 | _, _ = StdEncoding.Decode(dst, src) 358 | } 359 | }) 360 | } 361 | 362 | var data = `////////////////////////////////////////////////////////////////` 363 | func BenchmarkDecoderStdLib (b *testing.B) { benchmarkStdlibDecoder(b, data) } 364 | func BenchmarkDecoderBase64x (b *testing.B) { benchmarkBase64xDecoder(b, data) } 365 | -------------------------------------------------------------------------------- /cpuid.go: -------------------------------------------------------------------------------- 1 | package base64x 2 | 3 | import ( 4 | `fmt` 5 | `os` 6 | 7 | `github.com/klauspost/cpuid/v2` 8 | ) 9 | 10 | func hasAVX2() bool { 11 | switch v := os.Getenv("B64X_MODE"); v { 12 | case "" : fallthrough 13 | case "auto" : return cpuid.CPU.Has(cpuid.AVX2) 14 | case "noavx2" : return false 15 | default : panic(fmt.Sprintf("invalid mode: '%s', should be one of 'auto', 'noavx2'", v)) 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /faststr.go: -------------------------------------------------------------------------------- 1 | package base64x 2 | 3 | import ( 4 | `reflect` 5 | `unsafe` 6 | ) 7 | 8 | func mem2str(v []byte) (s string) { 9 | (*reflect.StringHeader)(unsafe.Pointer(&s)).Len = (*reflect.SliceHeader)(unsafe.Pointer(&v)).Len 10 | (*reflect.StringHeader)(unsafe.Pointer(&s)).Data = (*reflect.SliceHeader)(unsafe.Pointer(&v)).Data 11 | return 12 | } 13 | 14 | func str2mem(s string) (v []byte) { 15 | (*reflect.SliceHeader)(unsafe.Pointer(&v)).Cap = (*reflect.StringHeader)(unsafe.Pointer(&s)).Len 16 | (*reflect.SliceHeader)(unsafe.Pointer(&v)).Len = (*reflect.StringHeader)(unsafe.Pointer(&s)).Len 17 | (*reflect.SliceHeader)(unsafe.Pointer(&v)).Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data 18 | return 19 | } 20 | 21 | func mem2addr(v []byte) unsafe.Pointer { 22 | return *(*unsafe.Pointer)(unsafe.Pointer(&v)) 23 | } 24 | 25 | // NoEscape hides a pointer from escape analysis. NoEscape is 26 | // the identity function but escape analysis doesn't think the 27 | // output depends on the input. NoEscape is inlined and currently 28 | // compiles down to zero instructions. 29 | // USE CAREFULLY! 30 | //go:nosplit 31 | //goland:noinspection GoVetUnsafePointer 32 | func noEscape(p unsafe.Pointer) unsafe.Pointer { 33 | x := uintptr(p) 34 | return unsafe.Pointer(x ^ 0) 35 | } 36 | -------------------------------------------------------------------------------- /fuzz_test.go: -------------------------------------------------------------------------------- 1 | package base64x 2 | 3 | import ( 4 | `encoding/base64` 5 | `encoding/json` 6 | `testing` 7 | `github.com/stretchr/testify/require` 8 | `github.com/davecgh/go-spew/spew` 9 | ) 10 | 11 | func FuzzMain(f *testing.F) { 12 | var corpus = []string { 13 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", 14 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", 15 | "=", 16 | `\/`, 17 | "\r\n", 18 | `\r\n`, 19 | `\u0036`, `\u0039`, `\u003d`, 20 | `"\u0036"`, `"\u003d\u003d"`, 21 | } 22 | for _, c := range(corpus) { 23 | f.Add([]byte(c)) 24 | } 25 | f.Fuzz(fuzzBase64Impl) 26 | } 27 | 28 | func fuzzBase64Impl(t *testing.T, data []byte) { 29 | fuzzBase64CommonImpl(t, data) 30 | fuzzBase64JsonImpl(t, data) 31 | } 32 | 33 | type EncodeFuzzPairs struct { 34 | ours Encoding 35 | stdlib *base64.Encoding 36 | } 37 | 38 | var fuzzPairs = []EncodeFuzzPairs { 39 | {StdEncoding, base64.StdEncoding}, 40 | {URLEncoding, base64.URLEncoding}, 41 | {RawStdEncoding, base64.RawStdEncoding}, 42 | {RawURLEncoding, base64.RawURLEncoding}, 43 | } 44 | 45 | func fuzzBase64CommonImpl(t *testing.T, data []byte) { 46 | for _, fp := range(fuzzPairs) { 47 | // fuzz encode 48 | encoded0 := fp.ours.EncodeToString(data) 49 | encoded1 := fp.stdlib.EncodeToString(data) 50 | require.Equalf(t, encoded0, encoded1, "encode from %s", spew.Sdump(data)) 51 | // fuzz decode 52 | encoded := encoded1 53 | dbuf0 := make([]byte, fp.ours.DecodedLen(len(encoded))) 54 | dbuf1 := make([]byte, fp.stdlib.DecodedLen(len(encoded))) 55 | count0, err0 := fp.ours.Decode(dbuf0, []byte(encoded)) 56 | count1, err1 := fp.stdlib.Decode(dbuf1, []byte(encoded)) 57 | require.Equalf(t, dbuf0, dbuf1, "decode from %s", spew.Sdump(encoded)) 58 | require.Equalf(t, err0 != nil, err1 != nil, "decode from %s", spew.Sdump(encoded)) 59 | require.Equalf(t, count0, count1, "decode from %s", spew.Sdump(encoded)) 60 | } 61 | } 62 | 63 | func fuzzBase64JsonImpl(t *testing.T, data []byte) { 64 | // fuzz valid JSON-encoded base64 65 | jencoded, _ := json.Marshal(data) 66 | var dbuf0, dbuf1 []byte 67 | dbuf0 = make([]byte, JSONStdEncoding.DecodedLen(len(jencoded))) 68 | count0, err0 := JSONStdEncoding.Decode(dbuf0, jencoded[1:len(jencoded) - 1]) 69 | err1 := json.Unmarshal(jencoded, &dbuf1) 70 | require.Equalf(t, dbuf0[:count0], dbuf1, "decode json from %s", spew.Sdump(jencoded)) 71 | require.Equalf(t, err0 != nil, err1 != nil, "decode json from %s", spew.Sdump(jencoded)) 72 | } -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/chenzhuoyu/base64x 2 | 3 | go 1.16 4 | 5 | require ( 6 | github.com/bytedance/sonic v1.10.0-rc 7 | github.com/davecgh/go-spew v1.1.1 8 | github.com/klauspost/cpuid/v2 v2.0.9 9 | github.com/stretchr/testify v1.8.1 10 | ) 11 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= 2 | github.com/bytedance/sonic v1.10.0-rc h1:3S5HeWxjX08CUqNrXtEittExpJsEKBNzrV5UnrzHxVQ= 3 | github.com/bytedance/sonic v1.10.0-rc/go.mod h1:ElCzW+ufi8qKqNW0FY314xriJhyJhuoJ3gFZdAHF7NM= 4 | github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= 5 | github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= 6 | github.com/chenzhuoyu/iasm v0.9.0 h1:9fhXjVzq5hUy2gkhhgHl95zG2cEAhw9OSGs8toWWAwo= 7 | github.com/chenzhuoyu/iasm v0.9.0/go.mod h1:Xjy2NpN3h7aUqeqM+woSuuvxmIe6+DDsiNLIrkAmYog= 8 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 9 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 10 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 11 | github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= 12 | github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= 13 | github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= 14 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 15 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 16 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 17 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 18 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 19 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 20 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 21 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 22 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 23 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 24 | github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= 25 | github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= 26 | golang.org/x/arch v0.0.0-20210923205945-b76863e36670 h1:18EFjUmQOcUvxNYSkA6jO9VAiXCnxFY6NyDX0bHDmkU= 27 | golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= 28 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 29 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 30 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 31 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 32 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 33 | nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= 34 | rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= 35 | -------------------------------------------------------------------------------- /native/native.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define MODE_URL 1 6 | #define MODE_RAW 2 7 | #define MODE_AVX2 4 8 | #define MODE_JSON 8 9 | 10 | #define as_m32v(v) (*(uint32_t *)(v)) 11 | #define as_m64v(v) (*(uint64_t *)(v)) 12 | 13 | #define as_m128p(v) ((__m128i *)(v)) 14 | #define as_m256p(v) ((__m256i *)(v)) 15 | 16 | #define as_m8c(v) ((const uint8_t *)(v)) 17 | #define as_m128c(v) ((const __m128i *)(v)) 18 | #define as_m256c(v) ((const __m256i *)(v)) 19 | 20 | #define always_inline inline __attribute__((always_inline)) 21 | 22 | struct slice_t { 23 | char * buf; 24 | size_t len; 25 | size_t cap; 26 | }; 27 | 28 | /** Exported Functions **/ 29 | 30 | void b64encode(struct slice_t *out, const struct slice_t *src, int mode); 31 | ssize_t b64decode(struct slice_t *out, const char *src, size_t nb, int mode); 32 | 33 | /** Encoder Helper Functions **/ 34 | 35 | static const char TabEncodeCharsetStd[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 36 | static const char TabEncodeCharsetURL[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; 37 | 38 | static const uint8_t VecEncodeShuffles[32] = { 39 | 1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10, 40 | 1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10, 41 | }; 42 | 43 | static const uint8_t VecEncodeCharsetStd[32] = { 44 | 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, 45 | '0' - 52, '0' - 52, '0' - 52, '+' - 62, '/' - 63, 'A' , 0, 0, 46 | 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, 47 | '0' - 52, '0' - 52, '0' - 52, '+' - 62, '/' - 63, 'A' , 0, 0, 48 | }; 49 | 50 | static const uint8_t VecEncodeCharsetURL[32] = { 51 | 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, 52 | '0' - 52, '0' - 52, '0' - 52, '-' - 62, '_' - 63, 'A' , 0, 0, 53 | 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, 54 | '0' - 52, '0' - 52, '0' - 52, '-' - 62, '_' - 63, 'A' , 0, 0, 55 | }; 56 | 57 | static always_inline __m256i encode_avx2(__m128i v0, __m128i v1, const uint8_t *tab) { 58 | __m256i vv = _mm256_set_m128i (v1, v0); 59 | __m256i sh = _mm256_loadu_si256 (as_m256c(VecEncodeShuffles)); 60 | __m256i in = _mm256_shuffle_epi8 (vv, sh); 61 | __m256i t0 = _mm256_and_si256 (in, _mm256_set1_epi32(0x0fc0fc00)); 62 | __m256i t1 = _mm256_mulhi_epu16 (t0, _mm256_set1_epi32(0x04000040)); 63 | __m256i t2 = _mm256_and_si256 (in, _mm256_set1_epi32(0x003f03f0)); 64 | __m256i t3 = _mm256_mullo_epi16 (t2, _mm256_set1_epi32(0x01000010)); 65 | __m256i vi = _mm256_or_si256 (t1, t3); 66 | __m256i s0 = _mm256_cmpgt_epi8 (_mm256_set1_epi8(26), vi); 67 | __m256i s1 = _mm256_and_si256 (_mm256_set1_epi8(13), s0); 68 | __m256i s2 = _mm256_loadu_si256 (as_m256c(tab)); 69 | __m256i r0 = _mm256_subs_epu8 (vi, _mm256_set1_epi8(51)); 70 | __m256i r1 = _mm256_or_si256 (r0, s1); 71 | __m256i r2 = _mm256_shuffle_epi8 (s2, r1); 72 | __m256i r3 = _mm256_add_epi8 (vi, r2); 73 | return r3; 74 | } 75 | 76 | /** Function Implementations **/ 77 | 78 | void b64encode(struct slice_t *out, const struct slice_t *src, int mode) { 79 | char * ob = out->buf + out->len; 80 | char * op = out->buf + out->len; 81 | const char * ip = src->buf; 82 | const char * ie = src->buf + src->len; 83 | const char * st = TabEncodeCharsetStd; 84 | const uint8_t * vt = VecEncodeCharsetStd; 85 | 86 | /* check for empty string */ 87 | if (src->len == 0) { 88 | return; 89 | } 90 | 91 | /* check for URL encoding */ 92 | if (mode & MODE_URL) { 93 | st = TabEncodeCharsetURL; 94 | vt = VecEncodeCharsetURL; 95 | } 96 | 97 | /* SIMD 24 bytes loop, but the SIMD instruction will load 4 bytes 98 | * past the end, so it's safe only if there are 28 bytes or more left */ 99 | while ((ip <= ie - 28) && (mode & MODE_AVX2) != 0) { 100 | __m128i v0 = _mm_loadu_si128 (as_m128c(ip)); 101 | __m128i v1 = _mm_loadu_si128 (as_m128c(ip + 12)); 102 | __m256i vv = encode_avx2 (v0, v1, vt); 103 | 104 | /* store the result, and advance buffer pointers */ 105 | _mm256_storeu_si256(as_m256p(op), vv); 106 | op += 32; 107 | ip += 24; 108 | } 109 | 110 | /* can do one more 24 bytes round, but needs special handling */ 111 | if ((ip <= ie - 24) && (mode & MODE_AVX2) != 0) { 112 | __m128i v0 = _mm_loadu_si128 (as_m128c(ip)); 113 | __m128i v1 = _mm_loadu_si128 (as_m128c(ip + 8)); 114 | __m128i v2 = _mm_srli_si128 (v1, 4); 115 | __m256i vv = encode_avx2 (v0, v2, vt); 116 | 117 | /* store the result, and advance buffer pointers */ 118 | _mm256_storeu_si256(as_m256p(op), vv); 119 | op += 32; 120 | ip += 24; 121 | } 122 | 123 | /* no more bytes */ 124 | if (ip == ie) { 125 | out->len += op - ob; 126 | return; 127 | } 128 | 129 | /* handle the remaining bytes with scalar code (with 4 bytes load) */ 130 | while (ip <= ie - 4) { 131 | uint32_t v0 = __builtin_bswap32(*(const uint32_t *)ip); 132 | uint8_t v1 = (v0 >> 26) & 0x3f; 133 | uint8_t v2 = (v0 >> 20) & 0x3f; 134 | uint8_t v3 = (v0 >> 14) & 0x3f; 135 | uint8_t v4 = (v0 >> 8) & 0x3f; 136 | 137 | /* encode the characters, and move to next block */ 138 | ip += 3; 139 | *op++ = st[v1]; 140 | *op++ = st[v2]; 141 | *op++ = st[v3]; 142 | *op++ = st[v4]; 143 | } 144 | 145 | /* load the last bytes */ 146 | size_t dp = ie - ip; 147 | uint32_t v0 = (uint32_t)(uint8_t)ip[0] << 16; 148 | 149 | #define B2 v0 |= (uint32_t)(uint8_t)ip[2] 150 | #define B1 v0 |= (uint32_t)(uint8_t)ip[1] << 8 151 | 152 | #define R4 *op++ = st[(v0 >> 0) & 0x3f] 153 | #define R3 *op++ = st[(v0 >> 6) & 0x3f] 154 | #define R2 *op++ = st[(v0 >> 12) & 0x3f] 155 | #define R1 *op++ = st[(v0 >> 18) & 0x3f] 156 | 157 | #define NB { out->len += op - ob; } 158 | #define PD { if ((mode & MODE_RAW) == 0) { *op++ = '='; } } 159 | 160 | /* encode the last few bytes */ 161 | switch (dp) { 162 | case 3 : B2; B1; R1; R2; R3; R4; NB; break; 163 | case 2 : B1; R1; R2; R3; PD; NB; break; 164 | case 1 : R1; R2; PD; PD; NB; break; 165 | default : NB; break; 166 | } 167 | 168 | #undef PD 169 | #undef NB 170 | #undef R1 171 | #undef R2 172 | #undef R3 173 | #undef R4 174 | #undef B1 175 | #undef B2 176 | } 177 | 178 | /** Decoder Helper Functions **/ 179 | 180 | static const uint8_t VecPacking[32] = { 181 | 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, 128, 128, 128, 128, 182 | 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, 128, 128, 128, 128 183 | }; 184 | 185 | static const uint8_t VecDecodeBits[32] = { 186 | 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 187 | 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 188 | }; 189 | 190 | static const uint8_t VecDecodeTableStd[128] = { 191 | 0x00, 0x00, 0x13, 0x04, 0xbf, 0xbf, 0xb9, 0xb9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 192 | 0x00, 0x00, 0x13, 0x04, 0xbf, 0xbf, 0xb9, 0xb9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 193 | 0xa8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf0, 0x54, 0x50, 0x50, 0x50, 0x54, 194 | 0xa8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf0, 0x54, 0x50, 0x50, 0x50, 0x54, 195 | 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 196 | 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 197 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 198 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 199 | }; 200 | 201 | static const uint8_t VecDecodeTableURL[128] = { 202 | 0x00, 0x00, 0x11, 0x04, 0xbf, 0xbf, 0xb9, 0xb9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 203 | 0x00, 0x00, 0x11, 0x04, 0xbf, 0xbf, 0xb9, 0xb9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 204 | 0xa8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf0, 0x50, 0x50, 0x54, 0x50, 0x70, 205 | 0xa8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf0, 0x50, 0x50, 0x54, 0x50, 0x70, 206 | 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 207 | 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 208 | 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 209 | 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 210 | }; 211 | 212 | static const uint8_t VecDecodeCharsetStd[256] = { 213 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 214 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 215 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 62, 0xff, 0xff, 0xff, 63, 216 | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 217 | 0xff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 218 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0xff, 0xff, 0xff, 0xff, 0xff, 219 | 0xff, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 220 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0xff, 0xff, 0xff, 0xff, 0xff, 221 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 222 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 223 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 224 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 225 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 226 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 227 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 228 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 229 | }; 230 | 231 | static const uint8_t VecDecodeCharsetURL[256] = { 232 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 233 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 234 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 62, 0xff, 0xff, 235 | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 236 | 0xff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 237 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0xff, 0xff, 0xff, 0xff, 63, 238 | 0xff, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 239 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0xff, 0xff, 0xff, 0xff, 0xff, 240 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 241 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 242 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 243 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 244 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 245 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 246 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 247 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 248 | }; 249 | 250 | static always_inline void memcopy_24(char *dp, const uint8_t *sp) { 251 | *(uint64_t *)(dp + 0) = *(const uint64_t *)(sp + 0); 252 | *(uint64_t *)(dp + 8) = *(const uint64_t *)(sp + 8); 253 | *(uint64_t *)(dp + 16) = *(const uint64_t *)(sp + 16); 254 | } 255 | 256 | static always_inline __m256i decode_avx2(__m256i v0, int *pos, const uint8_t *tab) { 257 | __m256i v1 = _mm256_srli_epi32 (v0, 4); 258 | __m256i vl = _mm256_and_si256 (v0, _mm256_set1_epi8(0x0f)); 259 | __m256i vh = _mm256_and_si256 (v1, _mm256_set1_epi8(0x0f)); 260 | __m256i st = _mm256_loadu_si256 (as_m256c(tab)); 261 | __m256i mt = _mm256_loadu_si256 (as_m256c(tab + 32)); 262 | __m256i et = _mm256_loadu_si256 (as_m256c(tab + 64)); 263 | __m256i rt = _mm256_loadu_si256 (as_m256c(tab + 96)); 264 | __m256i pt = _mm256_loadu_si256 (as_m256c(VecPacking)); 265 | __m256i bt = _mm256_loadu_si256 (as_m256c(VecDecodeBits)); 266 | __m256i sh = _mm256_shuffle_epi8 (st, vh); 267 | __m256i eq = _mm256_cmpeq_epi8 (v0, et); 268 | __m256i sv = _mm256_blendv_epi8 (sh, rt, eq); 269 | __m256i bm = _mm256_shuffle_epi8 (mt, vl); 270 | __m256i bv = _mm256_shuffle_epi8 (bt, vh); 271 | __m256i mr = _mm256_and_si256 (bm, bv); 272 | __m256i nm = _mm256_cmpeq_epi8 (mr, _mm256_setzero_si256()); 273 | __m256i sr = _mm256_add_epi8 (v0, sv); 274 | __m256i r0 = _mm256_and_si256 (sr, _mm256_set1_epi8(0x3f)); 275 | __m256i r1 = _mm256_maddubs_epi16 (r0, _mm256_set1_epi32(0x01400140)); 276 | __m256i r2 = _mm256_madd_epi16 (r1, _mm256_set1_epi32(0x00011000)); 277 | __m256i r3 = _mm256_shuffle_epi8 (r2, pt); 278 | __m256i r4 = _mm256_permutevar8x32_epi32 (r3, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, 3, 7)); 279 | int64_t mp = _mm256_movemask_epi8 (nm); 280 | int32_t np = __builtin_ctzll (mp | 0xffffffff00000000); 281 | return (*pos = np), r4; 282 | } 283 | 284 | 285 | #define ALL_01h (~0ul / 255) 286 | #define ALL_7fh (ALL_01h * 127) 287 | #define ALL_80h (ALL_01h * 128) 288 | 289 | static always_inline uint32_t hasless(uint32_t x, uint8_t n) { 290 | return (x - ALL_01h * n) & ~x & ALL_80h; 291 | } 292 | 293 | static always_inline uint32_t hasmore(uint32_t x, uint8_t n) { 294 | return (x + ALL_01h * (127 - n) | x) & ALL_80h; 295 | } 296 | 297 | static always_inline uint32_t hasbetween(uint32_t x, uint8_t m, uint8_t n) { 298 | return (ALL_01h * (127 + n) - (x & ALL_7fh) & ~x & (x & ALL_7fh) + ALL_01h * (127 - m)) & ALL_80h; 299 | } 300 | 301 | #undef ALL_01h 302 | #undef ALL_7fh 303 | #undef ALL_80h 304 | 305 | static always_inline char unhex16_is(const uint8_t *s) { 306 | uint32_t v = *(uint32_t *)s; 307 | return !(hasless(v, '0') || hasmore(v, 'f') || hasbetween(v, '9', 'A') || hasbetween(v, 'F', 'a')); 308 | } 309 | 310 | static always_inline uint32_t unhex16_fast(const uint8_t *s) { 311 | uint32_t a = __builtin_bswap32(*(uint32_t *)s); 312 | uint32_t b = 9 * ((~a & 0x10101010) >> 4) + (a & 0x0f0f0f0f); 313 | uint32_t c = (b >> 4) | b; 314 | uint32_t d = ((c >> 8) & 0xff00) | (c & 0x00ff); 315 | return d; 316 | } 317 | 318 | static always_inline uint8_t unescape_asc(const uint8_t * ie, const uint8_t ** ipp) { 319 | const uint8_t * ee = (*ipp) + 1; 320 | uint32_t ch = 0xff; 321 | /* check eof */ 322 | if (ee > ie) { 323 | return 0xff; 324 | } 325 | switch (ee[-1]) { 326 | case 'r': ch = '\r'; break; 327 | case 'n': ch = '\n'; break; 328 | case '/': ch = '/'; break; 329 | case 'u': /* neee more 4 bytes */ 330 | if (ie - ee >= 4 && unhex16_is(ee)) { 331 | ch = unhex16_fast(ee); 332 | /* if not ascii, as 0xff */ 333 | ch = ch < 128 ? ch : 0xff; 334 | ee += 4; 335 | } 336 | break; 337 | } 338 | *ipp = ee; 339 | return ch; 340 | } 341 | 342 | /* Return 0 if success, otherwise return the error position + 1 */ 343 | static always_inline int64_t decode_block( 344 | const uint8_t * ie, 345 | const uint8_t ** ipp, 346 | char ** opp, 347 | const uint8_t * tab, 348 | int mode 349 | ) { 350 | int nb = 0; 351 | uint32_t v0 = 0; 352 | 353 | /* buffer pointers */ 354 | char * op = *opp; 355 | const uint8_t * ip = *ipp; 356 | uint8_t id = 0; 357 | uint8_t ch = 0; 358 | int pad = 0; 359 | 360 | #define may_unescape() { if (ch == '\\' && (mode & MODE_JSON)) ch = unescape_asc(ie, &ip); } 361 | #define skip_newlines() { if (ch == '\r' || ch == '\n') continue; } 362 | 363 | /* load up to 4 characters */ 364 | while (ip < ie && nb < 4) { 365 | ch = *ip++; 366 | may_unescape(); 367 | skip_newlines(); 368 | 369 | /* lookup the index, and check for invalid characters */ 370 | if ((id = tab[ch]) == 0xff) { 371 | if ((mode & MODE_RAW) || ch != '=' || nb < 2) goto error; 372 | pad++; goto tail; 373 | } 374 | 375 | /* decode the character */ 376 | v0 = (v0 << 6) | id; 377 | nb++; 378 | } 379 | 380 | if (nb == 0) { 381 | /* update the pointers */ 382 | *ipp = ip; 383 | return 0; 384 | } 385 | 386 | /* check eof, MODE_STD need paddings */ 387 | if (ip >= ie && nb != 4) { 388 | if (!(mode & MODE_RAW) || nb == 1) goto error; 389 | } 390 | 391 | decode: 392 | v0 <<= 6 * (4 - nb); 393 | /* ends with eof or 4 characters, decode into output */ 394 | switch (nb) { 395 | case 4: op[2] = (v0 >> 0) & 0xff; 396 | case 3: op[1] = (v0 >> 8) & 0xff; 397 | case 2: op[0] = (v0 >> 16) & 0xff; 398 | } 399 | 400 | /* update the pointers */ 401 | *ipp = ip; 402 | *opp = op + nb - 1; 403 | return 0; 404 | 405 | tail: 406 | /* loop for more paddings */ 407 | while (ip < ie) { 408 | ch = *ip++; 409 | may_unescape(); 410 | skip_newlines(); 411 | if (ch != '=') goto error; 412 | if (++pad + nb > 4) goto error; 413 | } 414 | goto decode; 415 | #undef may_unescape 416 | #undef skip_newlines 417 | 418 | error: 419 | /* update eof error position */ 420 | if (ip == ie) ip++; 421 | return ip - *ipp; 422 | 423 | 424 | } 425 | 426 | ssize_t b64decode(struct slice_t *out, const char *src, size_t nb, int mode) { 427 | int ep; 428 | __m256i vv; 429 | int64_t dv; 430 | uint8_t buf[32] = {0}; 431 | 432 | /* check for empty input */ 433 | if (nb == 0) { 434 | return 0; 435 | } 436 | 437 | /* output buffer */ 438 | char *ob = out->buf + out->len; 439 | char *op = out->buf + out->len; 440 | char *oe = out->buf + out->cap; 441 | 442 | /* input buffer */ 443 | const uint8_t *dt = VecDecodeTableStd; 444 | const uint8_t *st = VecDecodeCharsetStd; 445 | const uint8_t *ib = (const uint8_t *)src; 446 | const uint8_t *ip = (const uint8_t *)src; 447 | const uint8_t *ie = (const uint8_t *)src + nb; 448 | 449 | /* check for URL encoding */ 450 | if (mode & MODE_URL) { 451 | dt = VecDecodeTableURL; 452 | st = VecDecodeCharsetURL; 453 | } 454 | 455 | /* decode every 32 bytes, the final round should be handled separately, because the 456 | * SIMD instruction performs 32-byte store, and it might store past the end of the 457 | * output buffer */ 458 | if ((mode & MODE_AVX2) == 0) { 459 | goto scalar; 460 | } 461 | while ((ip <= ie - 32) && (op <= oe - 32)) { 462 | vv = _mm256_loadu_si256(as_m256c(ip)); 463 | vv = decode_avx2(vv, &ep, dt); 464 | 465 | /* check for invalid characters (or '=' paddings) */ 466 | if (ep < 32) { 467 | if ((dv = decode_block(ie, &ip, &op, st, mode)) != 0) { 468 | return ib - ip - dv; 469 | } else { 470 | continue; 471 | } 472 | } 473 | 474 | _mm256_storeu_si256(as_m256p(op), vv); 475 | 476 | /* move to next block */ 477 | ip += 32; 478 | op += 24; 479 | } 480 | 481 | scalar: 482 | /* handle the remaining bytes with scalar code (8 byte loop) */ 483 | while (ip <= ie - 8 && op <= oe - 8) { 484 | uint8_t v0 = st[ip[0]]; 485 | uint8_t v1 = st[ip[1]]; 486 | uint8_t v2 = st[ip[2]]; 487 | uint8_t v3 = st[ip[3]]; 488 | uint8_t v4 = st[ip[4]]; 489 | uint8_t v5 = st[ip[5]]; 490 | uint8_t v6 = st[ip[6]]; 491 | uint8_t v7 = st[ip[7]]; 492 | 493 | /* check for invalid bytes */ 494 | if ((v0 | v1 | v2 | v3 | v4 | v5 | v6 | v7) == 0xff) { 495 | if ((dv = decode_block(ie, &ip, &op, st, mode)) != 0) { 496 | return ib - ip - dv; 497 | } else { 498 | continue; 499 | } 500 | } 501 | 502 | /* construct the characters */ 503 | uint64_t vv = __builtin_bswap64( 504 | ((uint64_t)v0 << 58) | 505 | ((uint64_t)v1 << 52) | 506 | ((uint64_t)v2 << 46) | 507 | ((uint64_t)v3 << 40) | 508 | ((uint64_t)v4 << 34) | 509 | ((uint64_t)v5 << 28) | 510 | ((uint64_t)v6 << 22) | 511 | ((uint64_t)v7 << 16) 512 | ); 513 | 514 | /* store the result, and move to next block */ 515 | as_m64v(op) = vv; 516 | ip += 8; 517 | op += 6; 518 | } 519 | 520 | /* handle the remaining bytes with scalar code (4 byte loop) */ 521 | while (ip <= ie - 4 && op <= oe - 4) { 522 | uint8_t v0 = st[ip[0]]; 523 | uint8_t v1 = st[ip[1]]; 524 | uint8_t v2 = st[ip[2]]; 525 | uint8_t v3 = st[ip[3]]; 526 | 527 | /* check for invalid bytes */ 528 | if ((v0 | v1 | v2 | v3) == 0xff) { 529 | if ((dv = decode_block(ie, &ip, &op, st, mode)) != 0) { 530 | return ib - ip - dv; 531 | } else { 532 | continue; 533 | } 534 | } 535 | 536 | /* construct the characters */ 537 | uint32_t vv = __builtin_bswap32( 538 | ((uint32_t)v0 << 26) | 539 | ((uint32_t)v1 << 20) | 540 | ((uint32_t)v2 << 14) | 541 | ((uint32_t)v3 << 8) 542 | ); 543 | 544 | /* store the result, and move to next block */ 545 | as_m32v(op) = vv; 546 | ip += 4; 547 | op += 3; 548 | } 549 | 550 | /* decode the last few bytes */ 551 | while (ip < ie) { 552 | if ((dv = decode_block(ie, &ip, &op, st, mode)) != 0) { 553 | return ib - ip - dv; 554 | } 555 | } 556 | 557 | /* update the result length */ 558 | out->len += op - ob; 559 | return op - ob; 560 | } 561 | -------------------------------------------------------------------------------- /native_amd64.go: -------------------------------------------------------------------------------- 1 | //go:generate make 2 | package base64x 3 | 4 | import ( 5 | `unsafe` 6 | 7 | `github.com/bytedance/sonic/loader` 8 | ) 9 | 10 | //go:nosplit 11 | func b64encode(out *[]byte, src *[]byte, mode int) { 12 | __b64encode(noEscape(unsafe.Pointer(out)), noEscape(unsafe.Pointer(src)), mode) 13 | } 14 | 15 | //go:nosplit 16 | func b64decode(out *[]byte, src unsafe.Pointer, len int, mode int) (ret int) { 17 | return __b64decode(noEscape(unsafe.Pointer(out)), noEscape(unsafe.Pointer(src)), len, mode) 18 | } 19 | 20 | // asm2asm templates 21 | var ( 22 | __b64encode func(out unsafe.Pointer, src unsafe.Pointer, mod int) 23 | __b64decode func(out unsafe.Pointer, src unsafe.Pointer, len int, mod int) (ret int) 24 | ) 25 | 26 | // directly jump PCs 27 | var ( 28 | _subr__b64encode uintptr 29 | _subr__b64decode uintptr 30 | ) 31 | 32 | var stubs = []loader.GoC{ 33 | {"_b64encode", &_subr__b64encode, &__b64encode}, 34 | {"_b64decode", &_subr__b64decode, &__b64decode}, 35 | } 36 | 37 | func init() { 38 | if hasAVX2() { 39 | archFlags = _MODE_AVX2 40 | } 41 | loader.WrapGoC(text__native_entry__, funcs, stubs, "base64x", "base64x/native.c") 42 | } 43 | -------------------------------------------------------------------------------- /native_subr_amd64.go: -------------------------------------------------------------------------------- 1 | // +build !noasm !appengine 2 | // Code generated by asm2asm, DO NOT EDIT. 3 | 4 | package base64x 5 | 6 | import ( 7 | `github.com/bytedance/sonic/loader` 8 | ) 9 | 10 | const ( 11 | _entry__b64decode = 1328 12 | _entry__b64encode = 256 13 | ) 14 | 15 | const ( 16 | _stack__b64decode = 152 17 | _stack__b64encode = 40 18 | ) 19 | 20 | const ( 21 | _size__b64decode = 17616 22 | _size__b64encode = 864 23 | ) 24 | 25 | var ( 26 | _pcsp__b64decode = [][2]uint32{ 27 | {1, 0}, 28 | {4, 8}, 29 | {6, 16}, 30 | {8, 24}, 31 | {10, 32}, 32 | {12, 40}, 33 | {13, 48}, 34 | {17560, 152}, 35 | {17564, 48}, 36 | {17565, 40}, 37 | {17567, 32}, 38 | {17569, 24}, 39 | {17571, 16}, 40 | {17573, 8}, 41 | {17577, 0}, 42 | {17608, 152}, 43 | } 44 | _pcsp__b64encode = [][2]uint32{ 45 | {1, 0}, 46 | {4, 8}, 47 | {6, 16}, 48 | {8, 24}, 49 | {10, 32}, 50 | {852, 40}, 51 | {853, 32}, 52 | {855, 24}, 53 | {857, 16}, 54 | {859, 8}, 55 | {864, 0}, 56 | } 57 | ) 58 | 59 | var funcs = []loader.CFunc{ 60 | {"__native_entry__", 0, 67, 0, nil}, 61 | {"_b64decode", _entry__b64decode, _size__b64decode, _stack__b64decode, _pcsp__b64decode}, 62 | {"_b64encode", _entry__b64encode, _size__b64encode, _stack__b64encode, _pcsp__b64encode}, 63 | } 64 | --------------------------------------------------------------------------------