├── .github ├── ISSUE_TEMPLATE │ ├── BUG-REPORT.yml │ ├── NEW-REGISTRATION.yml │ └── config.yml └── workflows │ ├── generated-pr.yml │ └── stale.yml ├── README.md ├── multibase.csv ├── rfcs ├── Base10.md ├── Base2.md ├── Base256Emoji.md ├── Base36.md ├── Base8.md └── Proquint.md └── tests ├── README.md ├── basic.csv ├── case_insensitivity.csv ├── leading_zero.csv └── two_leading_zeros.csv /.github/ISSUE_TEMPLATE/BUG-REPORT.yml: -------------------------------------------------------------------------------- 1 | name: "Bug Report - documentation or registry" 2 | description: Report possible bugs in multibase spec, process docs, and/or the multibase registry. 3 | title: "🐛 [DOC/PROCESS BUG] - " 4 | labels: [ 5 | "bug" 6 | ] 7 | body: 8 | - type: textarea 9 | id: description 10 | attributes: 11 | label: "Description" 12 | description: Please enter an explicit description of your issue, 13 | placeholder: Short and explicit description of your incident, ideally with commit-specific link to lines 14 | validations: 15 | required: true 16 | - type: input 17 | id: reprod-url 18 | attributes: 19 | label: "Reproduction URL" 20 | description: Please enter your GitHub URL to provide a reproduction of the issue 21 | placeholder: ex. https://github.com/multiformats/multibase/ 22 | validations: 23 | required: false 24 | - type: textarea 25 | id: context 26 | attributes: 27 | label: "Context" 28 | description: Please provide additional context 29 | placeholder: "Context or external links needed to explain the possible mistake" 30 | validations: 31 | required: false -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/NEW-REGISTRATION.yml: -------------------------------------------------------------------------------- 1 | name: "New Registration" 2 | description: Express interest in registering a new encoding 3 | title: "📚 [NEW REGISTRATION] - <title>" 4 | labels: [ 5 | "Registration" 6 | ] 7 | body: 8 | - type: input 9 | id: encoding-name 10 | attributes: 11 | label: "Name of encoding" 12 | description: Name this library or system 13 | placeholder: acronyms and abbreviations are fine 14 | validations: 15 | required: false 16 | - type: checkboxes 17 | attributes: 18 | label: "Have read contributing" 19 | description: I have read the [contributing](https://github.com/multiformats/multiformats/blob/master/contributing.md) document, including the policies about deprecating stale, incomplete, or unimplemented/underutilized registrations 20 | options: 21 | - label: I read it! 22 | validations: 23 | required: true 24 | - type: checkboxes 25 | attributes: 26 | label: "Have checked table" 27 | description: I have reviewed the [multiformats mega-table](https://github.com/multiformats/multicodec/blob/master/table.csv) to assess viable sub-namespace for a registry if applicable 28 | options: 29 | - label: I read it! 30 | - type: checkboxes 31 | attributes: 32 | label: "Willing to open a PR" 33 | description: Once my questions are answered and my plan is confirmed, I will open a PR myself that adds the registration and be its change controller, or close this issue myself if I cannot 34 | options: 35 | - label: I will own this registration 36 | - type: input 37 | id: codepoint 38 | attributes: 39 | label: "Proposed codepoint" 40 | description: Please put here the prefix in the target encoding. By tradition, the highest binary value in the encoding alphabet works well and has a built-in mnemonic if it doesn't conflict with any other entries 41 | placeholder: x 42 | validations: 43 | required: true 44 | - type: input 45 | id: varint-value 46 | attributes: 47 | label: "Proposed varint value for registration in multiformats" 48 | description: Please put here the UTF-8 value that corresponds to that target encoding, for inclusion in the multiformats table, formatted as an [unsigned varint](https://github.com/multiformats/unsigned-varint) 49 | placeholder: See mf/unsigned-varint 50 | validations: 51 | required: true 52 | - type: textarea 53 | id: use-case 54 | attributes: 55 | label: "use-case" 56 | description: Please describe the possible use-cases where this additional codec would be helpful, where this encoding is used currently in the wild, etc. 57 | placeholder: Feel free to provide links for context and use-case descriptions 58 | validations: 59 | required: true 60 | - type: textarea 61 | id: specification 62 | attributes: 63 | label: "Description of relevant prior art and status quo" 64 | description: Please describe relevant prior art and, if already specified in a static public document, the algorithms and configurations needed to deterministically encode/decode 65 | placeholder: Links welcome 66 | validations: 67 | required: true 68 | - type: textarea 69 | id: solution_and_rationale 70 | attributes: 71 | label: "Proposed solution and rationale" 72 | description: Please describe at a high level what you are exploring building and current open research questions. 73 | placeholder: Detail welcome 74 | validations: 75 | required: true 76 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: Protocol Labs Vulnerability Disclosure Team 4 | url: mailto:security@ipfs.io 5 | about: Please do NOT open issues related to security of implementations or spec here without contacting the IPFS security team first. 6 | - name: Multiformats Repo Discussions 7 | url: https://github.com/multiformats/multiformats/discussions 8 | about: If you'd like to discuss an implementation of multibase you're working on, please use the discussions section on the core multiformats repo instead of this one. -------------------------------------------------------------------------------- /.github/workflows/generated-pr.yml: -------------------------------------------------------------------------------- 1 | name: Close Generated PRs 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * *' 6 | workflow_dispatch: 7 | 8 | permissions: 9 | issues: write 10 | pull-requests: write 11 | 12 | jobs: 13 | stale: 14 | uses: ipdxco/unified-github-workflows/.github/workflows/reusable-generated-pr.yml@v1 15 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Close Stale Issues 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * *' 6 | workflow_dispatch: 7 | 8 | permissions: 9 | issues: write 10 | pull-requests: write 11 | 12 | jobs: 13 | stale: 14 | uses: ipdxco/unified-github-workflows/.github/workflows/reusable-stale-issue.yml@v1 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Multibase 2 | 3 | [![](https://img.shields.io/badge/made%20by-Protocol%20Labs-blue.svg?style=flat-square)](http://ipn.io) 4 | [![](https://img.shields.io/badge/project-multiformats-blue.svg?style=flat-square)](https://github.com/multiformats/multiformats) 5 | [![](https://img.shields.io/badge/freenode-%23ipfs-blue.svg?style=flat-square)](https://webchat.freenode.net/?channels=%23ipfs) 6 | [![](https://img.shields.io/badge/readme%20style-standard-brightgreen.svg?style=flat-square)](https://github.com/RichardLitt/standard-readme) 7 | 8 | > Self-identifying base encodings 9 | 10 | Multibase is a protocol for disambiguating the "base encoding" used to express binary data in text formats (e.g., base32, base36, base64, base58, etc.) from the expression alone. 11 | 12 | When text is encoded as bytes, we can usually use a one-size-fits-all encoding (UTF-8) because we're always encoding to the same set of 256 bytes (+/- the NUL byte). 13 | When that doesn't work, usually for historical or performance reasons, we can usually infer the encoding from the context. 14 | 15 | However, when bytes are encoded as text (using a base encoding), the choice of base encoding (and alphabet, and other factors) is often restricted by the context. 16 | Worse, these restrictions can change based on where the data appears in the text. 17 | In some cases, we can only use `[a-z0-9]`; in others, we can use a larger set of characters but need a compact encoding. 18 | This has lead to a large set of "base encodings", almost one for every use-case. 19 | Unlike the case of encoding text to bytes, it is impractical to standardize widely around a single base encoding because there is no optimal encoding for all cases. 20 | 21 | As data travels beyond its context, it becomes quite hard to ascertain *which* base encoding of the many possible ones were used; that's where multibase comes in. 22 | Where the data has been prefixed before leaving its context behind, it answers the question: 23 | 24 | > Given binary data `d` encoded into text `s`, what base `b` was used to encode it? 25 | 26 | To answer this question, a single code point is prepended to `s` at time of encoding, which signals in that new context which `b` can be used to reconstruct `d`. 27 | 28 | ## Table of Contents 29 | 30 | - [Format](#format) 31 | - [Multibase Table](#multibase-table) 32 | - [Specifications](#specifications) 33 | - [Status](#status) 34 | - [Reserved Terms](#reserved-terms) 35 | - [Multibase By Example](#multibase-by-example) 36 | - [FAQ](#faq) 37 | - [Implementations:](#implementations) 38 | - [Disclaimers](#disclaimers) 39 | - [Contribute](#contribute) 40 | - [License](#license) 41 | 42 | ## Format 43 | 44 | The Format is: 45 | 46 | ``` 47 | <base-encoding-code-point><base-encoded-data> 48 | ``` 49 | 50 | Where `<base-encoding-code-point>` is a code representing an entry in the multibase table. 51 | 52 | ### Multibase Table 53 | 54 | The current multibase table is [here](multibase.csv): 55 | 56 | ``` 57 | Unicode, character, encoding, description, status 58 | U+0000, NUL, none, (No base encoding), reserved 59 | U+0030, 0, base2, Binary (01010101), experimental 60 | U+0031, 1, none, (No base encoding) reserved 61 | U+0037, 7, base8, Octal, draft 62 | U+0039, 9, base10, Decimal, draft 63 | U+0066, f, base16, Hexadecimal (lowercase), final 64 | U+0046, F, base16upper, Hexadecimal (uppercase), final 65 | U+0076, v, base32hex, RFC4648 case-insensitive - no padding - highest char, experimental 66 | U+0056, V, base32hexupper, RFC4648 case-insensitive - no padding - highest char, experimental 67 | U+0074, t, base32hexpad, RFC4648 case-insensitive - with padding, experimental 68 | U+0054, T, base32hexpadupper, RFC4648 case-insensitive - with padding, experimental 69 | U+0062, b, base32, RFC4648 case-insensitive - no padding, final 70 | U+0042, B, base32upper, RFC4648 case-insensitive - no padding, final 71 | U+0063, c, base32pad, RFC4648 case-insensitive - with padding, draft 72 | U+0043, C, base32padupper, RFC4648 case-insensitive - with padding, draft 73 | U+0068, h, base32z, z-base-32 (used by Tahoe-LAFS), draft 74 | U+006b, k, base36, Base36 [0-9a-z] case-insensitive - no padding, draft 75 | U+004b, K, base36upper, Base36 [0-9a-z] case-insensitive - no padding, draft 76 | U+0052, R, base45, Base45 RFC9285, draft 77 | U+007a, z, base58btc, Base58 Bitcoin, final 78 | U+005a, Z, base58flickr, Base58 Flicker, experimental 79 | U+006d, m, base64, RFC4648 no padding, final 80 | U+004d, M, base64pad, RFC4648 with padding - MIME encoding, experimental 81 | U+0075, u, base64url, RFC4648 no padding, final 82 | U+0055, U, base64urlpad, RFC4648 with padding, final 83 | U+0070, p, proquint, Proquint (https://arxiv.org/html/0901.4016), experimental 84 | U+0051, Q, none, (no base encoding) reserved 85 | U+002F, /, none, (no base encoding) reserved 86 | U+1F680, 🚀, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental 87 | ``` 88 | 89 | **NOTE:** Multibase-prefixes are encoding agnostic. "z" is "z", not 0x7a ("z" encoded as ASCII/UTF-8). In UTF-32, for example, that same "z" would be `[0x7a, 0x00, 0x00, 0x00]` not `[0x7a]`, so detecting and dropping an initial byte of `0x7a` would not suffice to confirm the rest was `base58btc`-encoded bytes; `[0x7a, 0x00, 0x00, 0x00]` would instead be the UTF-32 bytes that correspond to the `z` codepoint for that entry, and the entire byte array would need to be detected and dropped. Also note the difference between `0x00` (codepoint 0 or 0x00) and `0` (codepoint 48 or 0x30). 90 | 91 | ## Specifications 92 | 93 | Below is a list of specs for the underlying base encodings: 94 | 95 | - `base2` [Base2 RFC](rfcs/Base2.md) 96 | - `base8` [Base8 RFC](rfcs/Base8.md), similar to [rfc4648](https://datatracker.ietf.org/doc/html/rfc4648.html) 97 | - `base10` [Base10 RFC](rfcs/Base10.md) 98 | - `base36` [Base36 RFC](rfcs/Base36.md) 99 | - `base16*` [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648.html) 100 | - `base32*` (Except for `base32z`) [rfc4648](https://datatracker.ietf.org/doc/html/rfc4648.html) 101 | - `base32z` [Human-oriented base32 spec](https://philzimmermann.com/docs/human-oriented-base-32-encoding.txt) 102 | - `base45` [RFC9285](https://datatracker.ietf.org/doc/html/rfc9285.html) 103 | - `base64*` [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648.html) 104 | - `base58btc` https://datatracker.ietf.org/doc/html/draft-msporny-base58-02 105 | - `base58flickr` https://datatracker.ietf.org/doc/html/draft-msporny-base58-02, but using a different alphabet 106 | - `proquint` [Proquint RFC](rfcs/Proquint.md), which is the [original spec](https://arxiv.org/html/0901.4016) with an added prefix for legibility 107 | - `base256emoji` [Base256Emoji RFC](rfcs/Base256Emoji.md) 108 | 109 | 110 | ## Status 111 | 112 | Each multibase encoding has a status: 113 | 114 | * reserved - for functional reasons or to avoid collisions with other multi-* registries, this registry cannot accept registrations at this code-point and implementing one unregistered is discouraged for interoperability reasons 115 | * experimental - these encodings have been proposed but are not widely implemented and may be removed. 116 | * draft - these encodings are mature and widely implemented but may not be implemented by all implementations. 117 | * final - these encodings should be implemented by all implementations and are widely used. 118 | * deprecated - this entry will likely be removed and reassigned in the future and it will not likely become a `final` registration 119 | 120 | ### Reserved Terms 121 | 122 | The following codes are _reserved_ and cannot be registered in the `multibase` table. Note that all three of the Unicode entries, expressed as the [unsigned varint] expression of that Unicode code-point in UTF-8, correspond to widely-used entries in the [multiformats registry group] that could create confusions for some legacy systems handling both binary and multibased structures from other multiformats. While technically the multibase registry is not part of the [multiformats registry group], these reservations minimize risk of confusion when composing multiple multiformats in one data system. 123 | 124 | * `NUL` (n/a) - Legacy data may be found with null-byte-prefixed binary structures mixed in among multibase-encoded ones in arrays of data, although support for this is no longer mandated by conformant implementations. 125 | * `/` (U+002F) - Separator used by [multiaddr]. 126 | * `1` (U+0031) - Base58-encoded identity multihashes used by libp2p peer IDs. 127 | * `Q` (U+0051) - Base58-encoded sha2-256 multihashes used by libp2p/ipfs for peer IDs and CIDv0. 128 | 129 | ## Multibase By Example 130 | 131 | Consider the following encodings of the same binary string: 132 | 133 | ``` 134 | 4D756C74696261736520697320617765736F6D6521205C6F2F # base16 (hex) 135 | JV2WY5DJMJQXGZJANFZSAYLXMVZW63LFEEQFY3ZP # base32 136 | 3IY8QKL64VUGCX009XWUHKF6GBBTS3TVRXFRA5R # base36 137 | TZ9:VDNEDHECDZC+ED944A4FVQEF$DK84%UB21 # base45 138 | YAjKoNbau5KiqmHPmSxYCvn66dA1vLmwbt # base58 139 | TXVsdGliYXNlIGlzIGF3ZXNvbWUhIFxvLw== # base64 140 | ``` 141 | 142 | And consider the same encodings with their multibase prefix 143 | 144 | ``` 145 | F4D756C74696261736520697320617765736F6D6521205C6F2F # base16 F 146 | BJV2WY5DJMJQXGZJANFZSAYLXMVZW63LFEEQFY3ZP # base32 B 147 | K3IY8QKL64VUGCX009XWUHKF6GBBTS3TVRXFRA5R # base36 K 148 | RTZ9:VDNEDHECDZC+ED944A4FVQEF$DK84%UB21 # base45 R 149 | zYAjKoNbau5KiqmHPmSxYCvn66dA1vLmwbt # base58 z 150 | MTXVsdGliYXNlIGlzIGF3ZXNvbWUhIFxvLw== # base64 M 151 | ``` 152 | 153 | The base prefixes used are: `F, B, K, R, z, M`. 154 | 155 | 156 | ## FAQ 157 | 158 | > Is this a real problem? 159 | 160 | Yes. If i give you `"1214314321432165"` is that decimal? or hex? or something else? See also: 161 | - https://en.wikipedia.org/wiki/8-bit_clean 162 | - https://en.wikipedia.org/wiki/MIME#Content-Transfer-Encoding 163 | - http://stackoverflow.com/questions/8571501/how-to-check-whether-the-string-is-base64-encoded-or-not 164 | 165 | > Why the strange selection of codes / characters? 166 | 167 | The code values are selected such that they are included in the alphabets of the base they represent. 168 | For example, `f` is the base code for `base16 (hex)`, because `f` is in hex's 16 character alphabet. 169 | Note that most of the alphabets used can be encoded in UTF-8, and most but not all can be encoded in ASCII. 170 | We have yet not found a case needing something else. 171 | 172 | > Don't we have to agree on a table of base encodings? 173 | 174 | Yes, but we already have to agree on base encodings, so this is not hard. 175 | The table even leaves some room for custom encodings and is intended to work both in contexts where the encodings are known or agreed on and open-world or brownfield contexts where these may vary. 176 | 177 | ## Implementations: 178 | 179 | - [go-multibase](https://github.com/multiformats/go-multibase) 180 | - [js-multibase](https://github.com/multiformats/js-multibase) 181 | - C# 182 | - [cs-multibase](https://github.com/tabrath/cs-multibase) 183 | - [SimpleBase](https://github.com/ssg/SimpleBase) 184 | - [rust-multibase](https://github.com/multiformats/rust-multibase) 185 | - Java 186 | - [java-multibase](https://github.com/multiformats/java-multibase) 187 | - [copper-multibase](https://github.com/filip26/copper-multibase) 188 | - [py-multibase](https://github.com/multiformats/py-multibase) 189 | - [haskell-multibase](https://github.com/multiformats/haskell-multibase) 190 | - [net-ipfs-core](https://github.com/richardschneider/net-ipfs-core) 191 | - [elixir-multibase](https://github.com/nocursor/ex-multibase) 192 | - [scala-multibase](https://github.com/fluency03/scala-multibase) 193 | - [cpp-multibase](https://github.com/cpp-ipfs/cpp-multibase) 194 | - [ruby-multibase](https://github.com/sleeplessbyte/ruby-multibase) 195 | - [dart-multibase](https://github.com/heacare/dart-multibase) 196 | - [yoclib-multibase-php](https://github.com/yocto/yoclib-multibase-php) 197 | - `multibase` sub-module of Python module [multiformats](https://github.com/hashberg-io/multiformats) 198 | - Kotlin 199 | - [kotlin-multibase](https://github.com/changjiashuai/kotlin-multibase) 200 | - `multibase` part of Kotlin project [multiformat](https://github.com/erwin-kok/multiformat) 201 | - [zig-multibase](https://github.com/zen-eth/multiformats-zig) 202 | - `Multibase` part of the [MultiformatsKit](https://github.com/ATProtoKit/MultiformatsKit) Swift package 203 | - [Add yours here!](https://github.com/multiformats/multibase/edit/master/README.md) 204 | 205 | 206 | ## Disclaimers 207 | 208 | Warning: **obviously multibase changes the first character depending on the encoding**. 209 | Do not expect the value to be exactly the same. 210 | Remove the multibase prefix before using the value. 211 | 212 | ## Contribute 213 | 214 | Contributions welcome. 215 | Please check out [the issues](https://github.com/multiformats/multibase/issues) and reading the [contributing document](https://github.com/multiformats/multiformats/blob/master/contributing.md) for the greater multiformats project before opening your first issue, as the workflow and the relation of multibase to the greater project both benefit from this context. 216 | more information on how we work, and about contributing in general. 217 | 218 | If you'd like to switch a project over to multibase, whether by creating a new multibase implementation or building on one of those listed above, please file an issue in this repository using the "Interested in implementing" issue template. 219 | If would also like to reserve a prefix for compatibility, please file a separate issue in this repository using the "New Registration" issue template. 220 | 221 | ## License 222 | 223 | This repository is only for documents. 224 | All of these are licensed under the [CC-BY-SA 3.0](https://ipfs.io/ipfs/QmVreNvKsQmQZ83T86cWSjPu2vR3yZHGPm5jnxFuunEB9u) license © 2016 Protocol Labs Inc. 225 | Any code is under a [MIT](LICENSE) © 2016 Protocol Labs Inc. 226 | 227 | [multiaddr]: https://github.com/multiformats/multiaddr 228 | [multiformats registry group]: https://github.com/multiformats/multicodec/blob/master/table.csv 229 | [unsigned varint]: https://github.com/multiformats/unsigned-varint 230 | [code point]: https://infra.spec.whatwg.org/#code-points 231 | -------------------------------------------------------------------------------- /multibase.csv: -------------------------------------------------------------------------------- 1 | Unicode, character, encoding, description, status 2 | U+0000, NUL, none, (No base encoding), reserved 3 | U+0030, 0, base2, Binary (01010101), experimental 4 | U+0031, 1, none, (No base encoding), reserved 5 | U+0037, 7, base8, Octal, draft 6 | U+0039, 9, base10, Decimal, draft 7 | U+0066, f, base16, Hexadecimal (lowercase), final 8 | U+0046, F, base16upper, Hexadecimal (uppercase), final 9 | U+0076, v, base32hex, RFC4648 case-insensitive - no padding - highest char, experimental 10 | U+0056, V, base32hexupper, RFC4648 case-insensitive - no padding - highest char, experimental 11 | U+0074, t, base32hexpad, RFC4648 case-insensitive - with padding, experimental 12 | U+0054, T, base32hexpadupper, RFC4648 case-insensitive - with padding, experimental 13 | U+0062, b, base32, RFC4648 case-insensitive - no padding, final 14 | U+0042, B, base32upper, RFC4648 case-insensitive - no padding, final 15 | U+0063, c, base32pad, RFC4648 case-insensitive - with padding, draft 16 | U+0043, C, base32padupper, RFC4648 case-insensitive - with padding, draft 17 | U+0068, h, base32z, z-base-32 (used by Tahoe-LAFS), draft 18 | U+006b, k, base36, Base36 [0-9a-z] case-insensitive - no padding, draft 19 | U+004b, K, base36upper, Base36 [0-9a-z] case-insensitive - no padding, draft 20 | U+0052, R, base45, Base45 RFC9285, draft 21 | U+007a, z, base58btc, Base58 Bitcoin, final 22 | U+005a, Z, base58flickr, Base58 Flicker, experimental 23 | U+006d, m, base64, RFC4648 no padding, final 24 | U+004d, M, base64pad, RFC4648 with padding - MIME encoding, experimental 25 | U+0075, u, base64url, RFC4648 no padding, final 26 | U+0055, U, base64urlpad, RFC4648 with padding, final 27 | U+0070, p, proquint, Proquint (https://arxiv.org/html/0901.4016), experimental 28 | U+0051, Q, none, (no base encoding), reserved 29 | U+002F, /, none, (no base encoding), reserved 30 | U+1F680, 🚀, base256emoji, base256 with custom alphabet using variable-sized-codepoints, experimental 31 | -------------------------------------------------------------------------------- /rfcs/Base10.md: -------------------------------------------------------------------------------- 1 | # Base10 2 | 3 | The multibase base10 prefix is the character `9`. 4 | 5 | ## Encoding 6 | 7 | A byte array is encoded to multibase base10 by prefixing its base10 encoding 8 | with the character `9`. 9 | 10 | A byte array is encoded to base10 by: 11 | 12 | 1. Counting the number of leading 0 bytes (Z). 13 | 2. Interpreting the rest of the byte array as a big-endian unsigned integer (N). 14 | 3. Concatenating a length Z string of '0' characters with the decimal 15 | representation of N. 16 | 17 | ## Decoding 18 | 19 | A multibase base10 encoding is decoded by first dropping the multibase prefix 20 | (which must be `9`). 21 | 22 | 23 | The remaining characters are then converted to a byte array by: 24 | 25 | 1. Counting the number of leading '0' characters (Z). 26 | 2. Interpreting the rest of the character sequence as a base10 unsigned integer 27 | (N). 28 | 3. Concatenating a length Z array of NULL (0x00) bytes with N encoded as a 29 | big-endian unsigned integer. 30 | 31 | ## Examples 32 | 33 | Byte Array <-> Base10 Multibase: 34 | 35 | * `[0x00, 0x01]` <-> `"901"` 36 | * `[0x00, 0x00, 0xff]` <-> `"900255"` 37 | * `[0x01, 0x00]` <-> `"9256"` 38 | * `[0x00, 0x01, 0x00]` <-> `"90256"` 39 | -------------------------------------------------------------------------------- /rfcs/Base2.md: -------------------------------------------------------------------------------- 1 | # Base2 2 | 3 | The multibase base2 prefix is the character `0`. Canonical multibase-base2 4 | encoded data will always be `8k+1` characters long. 5 | 6 | ## Encoding 7 | 8 | A byte array is encoded to multibase base2 by prefixing its base2 encoding with 9 | the character `0`. 10 | 11 | A byte array is encoded to base2 by concatenating the base2 representations of 12 | each byte in the array, in order. 13 | 14 | The base2 representation of a byte is a byte array of length 8 in big-endian 15 | order, where each byte of the array is set to the character `1`, if the 16 | corresponding bit in the byte is set, and the character `0` if the corresponding 17 | bit is unset. 18 | 19 | For example, `[0x58, 0x59, 0x5a]` can be converted to multibase base2 as 20 | follows: 21 | 22 | ``` 23 | map each byte to the base2 representation: 24 | ["01011000", "01011001", "01011010"] 25 | 26 | concatenate: 27 | "010110000101100101011010" 28 | 29 | prefix with '0': 30 | "0010110000101100101011010" 31 | ``` 32 | 33 | ## Decoding 34 | 35 | The canonical format of multibase base2 encoding is decoded by dropping the 36 | multibase prefix (which must be the character `0`), dividing the remaining 37 | character sequence into _k_ sections of length _8_, then converting each section 38 | into it's corresponding byte value. 39 | 40 | The byte value of each section is obtained by mapping each '0' or '1' character 41 | to a 0 or 1 bit in the resulting byte (MSB first). 42 | 43 | For example, the byte value of `"00110111"` is `0x37`. 44 | 45 | The process, with input `"00100000101000010"`: 46 | 47 | ``` 48 | drop the leading zero: 49 | "0100000101000010" 50 | divide to sections: 51 | ["01000001", "01000010"] 52 | allocate output buffer: 53 | [0x00, 0x00] 54 | set the correct bits: 55 | [0x41, 0x42] or "AB" 56 | ``` 57 | 58 | A non-canonical encoding of multibase base2 whose length is not equal to _8k+1_ 59 | characters for some _k_ may be canonicalized by left-padding the string with '0' 60 | characters (until the length is _8k+1_). The string can then be decoded normally. 61 | 62 | For example, the non-canonical encoding `"011010"` may be canonicalized to 63 | `"000011010"` (decoded as `[0x1a]`). A standard multibase implementation must 64 | output base2 in the *canonical* encoding only. 65 | -------------------------------------------------------------------------------- /rfcs/Base256Emoji.md: -------------------------------------------------------------------------------- 1 | # Base256Emoji 2 | 3 | This base is a benchmark / test / torture for implementations that want 4 | to support Unicode. 5 | 6 | ## Encoding 7 | 8 | Since both buffers and base256 items have 256 permutations per item the 9 | encoding is trivial, there is a one to one correspondence between one UTF-32 10 | character and one byte value and you don't need to deal with any overflow or 11 | padding. 12 | 13 | First, allocate a UTF-32 output string with a codepoint length of your input 14 | buffer. 15 | 16 | Then, for each index lookup in the correspondence table using the current byte 17 | value as an index and write the codepoint you found to your output buffer at the 18 | same index. 19 | 20 | You can find out the correspondence using this table: 21 | 22 | | Emoji | Unicode codepoint | Byte Value | 23 | |:-:|:-:|:-:| 24 | | 🚀 | U+1F680 | 0 | 25 | | 🪐 | U+1FA90 | 1 | 26 | | ☄ | U+2604 | 2 | 27 | | 🛰 | U+1F6F0 | 3 | 28 | | 🌌 | U+1F30C | 4 | 29 | | 🌑 | U+1F311 | 5 | 30 | | 🌒 | U+1F312 | 6 | 31 | | 🌓 | U+1F313 | 7 | 32 | | 🌔 | U+1F314 | 8 | 33 | | 🌕 | U+1F315 | 9 | 34 | | 🌖 | U+1F316 | 10 | 35 | | 🌗 | U+1F317 | 11 | 36 | | 🌘 | U+1F318 | 12 | 37 | | 🌍 | U+1F30D | 13 | 38 | | 🌏 | U+1F30F | 14 | 39 | | 🌎 | U+1F30E | 15 | 40 | | 🐉 | U+1F409 | 16 | 41 | | ☀ | U+2600 | 17 | 42 | | 💻 | U+1F4BB | 18 | 43 | | 🖥 | U+1F5A5 | 19 | 44 | | 💾 | U+1F4BE | 20 | 45 | | 💿 | U+1F4BF | 21 | 46 | | 😂 | U+1F602 | 22 | 47 | | ❤ | U+2764 | 23 | 48 | | 😍 | U+1F60D | 24 | 49 | | 🤣 | U+1F923 | 25 | 50 | | 😊 | U+1F60A | 26 | 51 | | 🙏 | U+1F64F | 27 | 52 | | 💕 | U+1F495 | 28 | 53 | | 😭 | U+1F62D | 29 | 54 | | 😘 | U+1F618 | 30 | 55 | | 👍 | U+1F44D | 31 | 56 | | 😅 | U+1F605 | 32 | 57 | | 👏 | U+1F44F | 33 | 58 | | 😁 | U+1F601 | 34 | 59 | | 🔥 | U+1F525 | 35 | 60 | | 🥰 | U+1F970 | 36 | 61 | | 💔 | U+1F494 | 37 | 62 | | 💖 | U+1F496 | 38 | 63 | | 💙 | U+1F499 | 39 | 64 | | 😢 | U+1F622 | 40 | 65 | | 🤔 | U+1F914 | 41 | 66 | | 😆 | U+1F606 | 42 | 67 | | 🙄 | U+1F644 | 43 | 68 | | 💪 | U+1F4AA | 44 | 69 | | 😉 | U+1F609 | 45 | 70 | | ☺ | U+263A | 46 | 71 | | 👌 | U+1F44C | 47 | 72 | | 🤗 | U+1F917 | 48 | 73 | | 💜 | U+1F49C | 49 | 74 | | 😔 | U+1F614 | 50 | 75 | | 😎 | U+1F60E | 51 | 76 | | 😇 | U+1F607 | 52 | 77 | | 🌹 | U+1F339 | 53 | 78 | | 🤦 | U+1F926 | 54 | 79 | | 🎉 | U+1F389 | 55 | 80 | | 💞 | U+1F49E | 56 | 81 | | ✌ | U+270C | 57 | 82 | | ✨ | U+2728 | 58 | 83 | | 🤷 | U+1F937 | 59 | 84 | | 😱 | U+1F631 | 60 | 85 | | 😌 | U+1F60C | 61 | 86 | | 🌸 | U+1F338 | 62 | 87 | | 🙌 | U+1F64C | 63 | 88 | | 😋 | U+1F60B | 64 | 89 | | 💗 | U+1F497 | 65 | 90 | | 💚 | U+1F49A | 66 | 91 | | 😏 | U+1F60F | 67 | 92 | | 💛 | U+1F49B | 68 | 93 | | 🙂 | U+1F642 | 69 | 94 | | 💓 | U+1F493 | 70 | 95 | | 🤩 | U+1F929 | 71 | 96 | | 😄 | U+1F604 | 72 | 97 | | 😀 | U+1F600 | 73 | 98 | | 🖤 | U+1F5A4 | 74 | 99 | | 😃 | U+1F603 | 75 | 100 | | 💯 | U+1F4AF | 76 | 101 | | 🙈 | U+1F648 | 77 | 102 | | 👇 | U+1F447 | 78 | 103 | | 🎶 | U+1F3B6 | 79 | 104 | | 😒 | U+1F612 | 80 | 105 | | 🤭 | U+1F92D | 81 | 106 | | ❣ | U+2763 | 82 | 107 | | 😜 | U+1F61C | 83 | 108 | | 💋 | U+1F48B | 84 | 109 | | 👀 | U+1F440 | 85 | 110 | | 😪 | U+1F62A | 86 | 111 | | 😑 | U+1F611 | 87 | 112 | | 💥 | U+1F4A5 | 88 | 113 | | 🙋 | U+1F64B | 89 | 114 | | 😞 | U+1F61E | 90 | 115 | | 😩 | U+1F629 | 91 | 116 | | 😡 | U+1F621 | 92 | 117 | | 🤪 | U+1F92A | 93 | 118 | | 👊 | U+1F44A | 94 | 119 | | 🥳 | U+1F973 | 95 | 120 | | 😥 | U+1F625 | 96 | 121 | | 🤤 | U+1F924 | 97 | 122 | | 👉 | U+1F449 | 98 | 123 | | 💃 | U+1F483 | 99 | 124 | | 😳 | U+1F633 | 100 | 125 | | ✋ | U+270B | 101 | 126 | | 😚 | U+1F61A | 102 | 127 | | 😝 | U+1F61D | 103 | 128 | | 😴 | U+1F634 | 104 | 129 | | 🌟 | U+1F31F | 105 | 130 | | 😬 | U+1F62C | 106 | 131 | | 🙃 | U+1F643 | 107 | 132 | | 🍀 | U+1F340 | 108 | 133 | | 🌷 | U+1F337 | 109 | 134 | | 😻 | U+1F63B | 110 | 135 | | 😓 | U+1F613 | 111 | 136 | | ⭐ | U+2B50 | 112 | 137 | | ✅ | U+2705 | 113 | 138 | | 🥺 | U+1F97A | 114 | 139 | | 🌈 | U+1F308 | 115 | 140 | | 😈 | U+1F608 | 116 | 141 | | 🤘 | U+1F918 | 117 | 142 | | 💦 | U+1F4A6 | 118 | 143 | | ✔ | U+2714 | 119 | 144 | | 😣 | U+1F623 | 120 | 145 | | 🏃 | U+1F3C3 | 121 | 146 | | 💐 | U+1F490 | 122 | 147 | | ☹ | U+2639 | 123 | 148 | | 🎊 | U+1F38A | 124 | 149 | | 💘 | U+1F498 | 125 | 150 | | 😠 | U+1F620 | 126 | 151 | | ☝ | U+261D | 127 | 152 | | 😕 | U+1F615 | 128 | 153 | | 🌺 | U+1F33A | 129 | 154 | | 🎂 | U+1F382 | 130 | 155 | | 🌻 | U+1F33B | 131 | 156 | | 😐 | U+1F610 | 132 | 157 | | 🖕 | U+1F595 | 133 | 158 | | 💝 | U+1F49D | 134 | 159 | | 🙊 | U+1F64A | 135 | 160 | | 😹 | U+1F639 | 136 | 161 | | 🗣 | U+1F5E3 | 137 | 162 | | 💫 | U+1F4AB | 138 | 163 | | 💀 | U+1F480 | 139 | 164 | | 👑 | U+1F451 | 140 | 165 | | 🎵 | U+1F3B5 | 141 | 166 | | 🤞 | U+1F91E | 142 | 167 | | 😛 | U+1F61B | 143 | 168 | | 🔴 | U+1F534 | 144 | 169 | | 😤 | U+1F624 | 145 | 170 | | 🌼 | U+1F33C | 146 | 171 | | 😫 | U+1F62B | 147 | 172 | | ⚽ | U+26BD | 148 | 173 | | 🤙 | U+1F919 | 149 | 174 | | ☕ | U+2615 | 150 | 175 | | 🏆 | U+1F3C6 | 151 | 176 | | 🤫 | U+1F92B | 152 | 177 | | 👈 | U+1F448 | 153 | 178 | | 😮 | U+1F62E | 154 | 179 | | 🙆 | U+1F646 | 155 | 180 | | 🍻 | U+1F37B | 156 | 181 | | 🍃 | U+1F343 | 157 | 182 | | 🐶 | U+1F436 | 158 | 183 | | 💁 | U+1F481 | 159 | 184 | | 😲 | U+1F632 | 160 | 185 | | 🌿 | U+1F33F | 161 | 186 | | 🧡 | U+1F9E1 | 162 | 187 | | 🎁 | U+1F381 | 163 | 188 | | ⚡ | U+26A1 | 164 | 189 | | 🌞 | U+1F31E | 165 | 190 | | 🎈 | U+1F388 | 166 | 191 | | ❌ | U+274C | 167 | 192 | | ✊ | U+270A | 168 | 193 | | 👋 | U+1F44B | 169 | 194 | | 😰 | U+1F630 | 170 | 195 | | 🤨 | U+1F928 | 171 | 196 | | 😶 | U+1F636 | 172 | 197 | | 🤝 | U+1F91D | 173 | 198 | | 🚶 | U+1F6B6 | 174 | 199 | | 💰 | U+1F4B0 | 175 | 200 | | 🍓 | U+1F353 | 176 | 201 | | 💢 | U+1F4A2 | 177 | 202 | | 🤟 | U+1F91F | 178 | 203 | | 🙁 | U+1F641 | 179 | 204 | | 🚨 | U+1F6A8 | 180 | 205 | | 💨 | U+1F4A8 | 181 | 206 | | 🤬 | U+1F92C | 182 | 207 | | ✈ | U+2708 | 183 | 208 | | 🎀 | U+1F380 | 184 | 209 | | 🍺 | U+1F37A | 185 | 210 | | 🤓 | U+1F913 | 186 | 211 | | 😙 | U+1F619 | 187 | 212 | | 💟 | U+1F49F | 188 | 213 | | 🌱 | U+1F331 | 189 | 214 | | 😖 | U+1F616 | 190 | 215 | | 👶 | U+1F476 | 191 | 216 | | 🥴 | U+1F974 | 192 | 217 | | ▶ | U+25B6 | 193 | 218 | | ➡ | U+27A1 | 194 | 219 | | ❓ | U+2753 | 195 | 220 | | 💎 | U+1F48E | 196 | 221 | | 💸 | U+1F4B8 | 197 | 222 | | ⬇ | U+2B07 | 198 | 223 | | 😨 | U+1F628 | 199 | 224 | | 🌚 | U+1F31A | 200 | 225 | | 🦋 | U+1F98B | 201 | 226 | | 😷 | U+1F637 | 202 | 227 | | 🕺 | U+1F57A | 203 | 228 | | ⚠ | U+26A0 | 204 | 229 | | 🙅 | U+1F645 | 205 | 230 | | 😟 | U+1F61F | 206 | 231 | | 😵 | U+1F635 | 207 | 232 | | 👎 | U+1F44E | 208 | 233 | | 🤲 | U+1F932 | 209 | 234 | | 🤠 | U+1F920 | 210 | 235 | | 🤧 | U+1F927 | 211 | 236 | | 📌 | U+1F4CC | 212 | 237 | | 🔵 | U+1F535 | 213 | 238 | | 💅 | U+1F485 | 214 | 239 | | 🧐 | U+1F9D0 | 215 | 240 | | 🐾 | U+1F43E | 216 | 241 | | 🍒 | U+1F352 | 217 | 242 | | 😗 | U+1F617 | 218 | 243 | | 🤑 | U+1F911 | 219 | 244 | | 🌊 | U+1F30A | 220 | 245 | | 🤯 | U+1F92F | 221 | 246 | | 🐷 | U+1F437 | 222 | 247 | | ☎ | U+260E | 223 | 248 | | 💧 | U+1F4A7 | 224 | 249 | | 😯 | U+1F62F | 225 | 250 | | 💆 | U+1F486 | 226 | 251 | | 👆 | U+1F446 | 227 | 252 | | 🎤 | U+1F3A4 | 228 | 253 | | 🙇 | U+1F647 | 229 | 254 | | 🍑 | U+1F351 | 230 | 255 | | ❄ | U+2744 | 231 | 256 | | 🌴 | U+1F334 | 232 | 257 | | 💣 | U+1F4A3 | 233 | 258 | | 🐸 | U+1F438 | 234 | 259 | | 💌 | U+1F48C | 235 | 260 | | 📍 | U+1F4CD | 236 | 261 | | 🥀 | U+1F940 | 237 | 262 | | 🤢 | U+1F922 | 238 | 263 | | 👅 | U+1F445 | 239 | 264 | | 💡 | U+1F4A1 | 240 | 265 | | 💩 | U+1F4A9 | 241 | 266 | | 👐 | U+1F450 | 242 | 267 | | 📸 | U+1F4F8 | 243 | 268 | | 👻 | U+1F47B | 244 | 269 | | 🤐 | U+1F910 | 245 | 270 | | 🤮 | U+1F92E | 246 | 271 | | 🎼 | U+1F3BC | 247 | 272 | | 🥵 | U+1F975 | 248 | 273 | | 🚩 | U+1F6A9 | 249 | 274 | | 🍎 | U+1F34E | 250 | 275 | | 🍊 | U+1F34A | 251 | 276 | | 👼 | U+1F47C | 252 | 277 | | 💍 | U+1F48D | 253 | 278 | | 📣 | U+1F4E3 | 254 | 279 | | 🥂 | U+1F942 | 255 | 280 | 281 | ## Decoding 282 | 283 | It is the same as encoding but the other way around. 284 | 285 | Note it is not recommended to use a 8 gigabytes `UTF-32 codepoint` -> 286 | `struct {bool, byte}`, it might be wise to a hash map instead. 287 | -------------------------------------------------------------------------------- /rfcs/Base36.md: -------------------------------------------------------------------------------- 1 | # Base36 2 | 3 | The multibase base36 prefix is the character `k` or `K`. The digit-alphabet 4 | consists of 0..9 and then the case insensitive range a..z for the values 10..35 5 | 6 | ## Encoding 7 | 8 | A byte array is encoded to base36 by: 9 | 10 | 1. Counting the number of leading 0 bytes (Z). 11 | 2. Interpreting the rest of the byte array as a big-endian unsigned integer (N). 12 | 3. Concatenating a length Z string of '0' characters with the base36 13 | representation of N. 14 | 15 | A byte array is encoded to multibase base36 by prefixing its base36 encoding 16 | with the character `k`. 17 | 18 | ## Decoding 19 | 20 | A multibase base36 encoded string is decoded by first dropping the multibase 21 | prefix (which must be `k` or `K`). 22 | 23 | The remaining characters are then converted to a byte array by: 24 | 25 | 1. Counting the number of leading '0' characters (Z). 26 | 2. Interpreting the rest of the character sequence as a base36 unsigned integer 27 | (N). 28 | 3. Concatenating a length Z array of NULL (0x00) bytes with N encoded as a 29 | big-endian unsigned integer. 30 | 31 | ## Examples 32 | 33 | Byte Array <-> Base36 Multibase: 34 | 35 | | Bytes | == | LC Base36 | OR | UC Base36 | 36 | |---|---|---|---|---| 37 | | `[0x00, 0x01]` | == | `"k01"` | | `"K01"` | 38 | | `[0x00, 0x00, 0xff]` | == | `"k0073"` | | `"K0073"` | 39 | | `[0x01, 0x00]` | == | `"k74"` | | `"K74"` | 40 | | `[0x00, 0x01, 0x00]` | == | `"k074"` | | `"K074"` | 41 | -------------------------------------------------------------------------------- /rfcs/Base8.md: -------------------------------------------------------------------------------- 1 | # Base8 2 | 3 | The multibase base8 prefix is the character `7`. This spec is derived from 4 | RFC4648. 5 | 6 | ## Encoding 7 | 8 | Map each 3 bytes (8-bit word), with the most significant bit on the left side 9 | (big-endian), to 8 3-bit words as follows: 10 | 11 | `[⁰b₇₆₅₄₃₂₁₀, ¹b₇₆₅₄₃₂₁₀, ²b₇₆₅₄₃₂₁₀]` 12 | 13 | `[⁰b₇₆₅, ⁰b₄₃₂, ⁰b₁₀¹b₇, ¹b₆₅₄, ¹b₃₂₁, ¹b₀²b₇₆, ²b₅₄₃, ²b₂₁₀]` 14 | 15 | Then map their values as big-endian unsigned ints to their chars: 16 | 17 | ``` 18 | 000 → '0' 19 | 001 → '1' 20 | ... 21 | 111 → '7' 22 | ``` 23 | 24 | Treat missing subbits as zero and optionally pad. 25 | 26 | ## Decoding 27 | 28 | Map chars to the following 3-bit words: 29 | 30 | ``` 31 | '0' → 000 32 | '1' → 001 33 | ... 34 | '7' → 111 35 | ``` 36 | 37 | Then map each 8 3-bit words, with the most significant bit on the left side 38 | (big-endian), to 3 bytes (8-bit word) as follows: 39 | 40 | `[⁰b₂₁₀, ¹b₂₁₀, ²b₂₁₀, ³b₂₁₀, ⁴b₂₁₀, ⁵b₂₁₀, ⁶b₂₁₀, ⁷b₂₁₀]` 41 | 42 | `[⁰b₂₁₀¹b₂₁₀²b₂₁, ²b₀³b₂₁₀⁴b₂₁₀⁵b₂, ⁵b₁₀⁶b₂₁₀⁷b₂₁₀]` 43 | 44 | If there are not enough bits to complete the last 8-bit word then drop that last 45 | incomplete 8-bit word. 46 | -------------------------------------------------------------------------------- /rfcs/Proquint.md: -------------------------------------------------------------------------------- 1 | # PRO-QUINT 2 | 3 | For the original proquint specification, see: https://arxiv.org/html/0901.4016 ([/ipfs/bafybeib5jsyi5igjwhi7hzkfebpvnq2ykbwpxeaaxlkyfyxqvcecoao4qa](https://dweb.link/ipfs/bafybeib5jsyi5igjwhi7hzkfebpvnq2ykbwpxeaaxlkyfyxqvcecoao4qa)). 4 | 5 | The multibase prefix for proquints is the character `p`. The base encoded data is the encoded data according to the original specification, with an additional `ro-` prefix: 6 | 7 | ``` 8 | <multibase-prefix-character><additional-prefix-characters><proquint-encoded-data> 9 | ``` 10 | 11 | The resulting full prefix for the actual proquint encoded data is `pro-`, making multibase-encoded proquints easily pronouncable. 12 | For example, the proquint encoding of the bytestring `[127, 0, 0, 1]` (the data for the IPv4 address `127.0.0.1`) is `lusab-babad`, so the corresponding multibase-encoded proquint bytestring is: 13 | 14 | ``` 15 | pro-lusab-babad 16 | ``` -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | Multibase test vectors 2 | ====================== 3 | 4 | This directory contains test vectors that should be used by every implementation to check if it correctly implements the Base encoding as specified by Multibase. 5 | 6 | | Filename | String to encode | Description 7 | | -------- | ---------------- | ----------- 8 | | `basic.csv` | `yes mani !` | Basic case 9 | | `leading_zero.csv` | `\x00yes mani !` | Leading zero byte 10 | | `two_leading_zeros.csv` | `\x00\x00yes mani !` | Two leading zero bytes (leading zeros are a source of bugs, hence there are two test vectors for them) 11 | | `case_insensitivity.csv` | `hello world` | Differently cased than expected, must decode without errors 12 | -------------------------------------------------------------------------------- /tests/basic.csv: -------------------------------------------------------------------------------- 1 | encoding, "yes mani !" 2 | base2, "001111001011001010111001100100000011011010110000101101110011010010010000000100001" 3 | base8, "7362625631006654133464440102" 4 | base10, "9573277761329450583662625" 5 | base16, "f796573206d616e692021" 6 | base16upper, "F796573206D616E692021" 7 | base32, "bpfsxgidnmfxgsibb" 8 | base32upper, "BPFSXGIDNMFXGSIBB" 9 | base32hex, "vf5in683dc5n6i811" 10 | base32hexupper, "VF5IN683DC5N6I811" 11 | base32pad, "cpfsxgidnmfxgsibb" 12 | base32padupper, "CPFSXGIDNMFXGSIBB" 13 | base32hexpad, "tf5in683dc5n6i811" 14 | base32hexpadupper, "TF5IN683DC5N6I811" 15 | base32z, "hxf1zgedpcfzg1ebb" 16 | base36, "k2lcpzo5yikidynfl" 17 | base36upper, "K2LCPZO5YIKIDYNFL" 18 | base58flickr, "Z7Pznk19XTTzBtx" 19 | base58btc, "z7paNL19xttacUY" 20 | base64, "meWVzIG1hbmkgIQ" 21 | base64pad, "MeWVzIG1hbmkgIQ==" 22 | base64url, "ueWVzIG1hbmkgIQ" 23 | base64urlpad, "UeWVzIG1hbmkgIQ==" 24 | base256emoji, "🚀🏃✋🌈😅🌷🤤😻🌟😅👏" 25 | -------------------------------------------------------------------------------- /tests/case_insensitivity.csv: -------------------------------------------------------------------------------- 1 | non-canonical encoding, "hello world" 2 | base16, "f68656c6c6f20776F726C64" 3 | base16upper, "F68656c6c6f20776F726C64" 4 | base32, "bnbswy3dpeB3W64TMMQ" 5 | base32upper, "Bnbswy3dpeB3W64TMMQ" 6 | base32hex, "vd1imor3f41RMUSJCCG" 7 | base32hexupper, "Vd1imor3f41RMUSJCCG" 8 | base32pad, "cnbswy3dpeB3W64TMMQ======" 9 | base32padupper, "Cnbswy3dpeB3W64TMMQ======" 10 | base32hexpad, "td1imor3f41RMUSJCCG======" 11 | base32hexpadupper, "Td1imor3f41RMUSJCCG======" 12 | base36, "kfUvrsIvVnfRbjWaJo" 13 | base36upper, "KfUVrSIVVnFRbJWAJo" 14 | -------------------------------------------------------------------------------- /tests/leading_zero.csv: -------------------------------------------------------------------------------- 1 | encoding, "\x00yes mani !" 2 | base2, "00000000001111001011001010111001100100000011011010110000101101110011010010010000000100001" 3 | base8, "7000745453462015530267151100204" 4 | base10, "90573277761329450583662625" 5 | base16, "f00796573206d616e692021" 6 | base16upper, "F00796573206D616E692021" 7 | base32, "bab4wk4zanvqw42jaee" 8 | base32upper, "BAB4WK4ZANVQW42JAEE" 9 | base32hex, "v01smasp0dlgmsq9044" 10 | base32hexupper, "V01SMASP0DLGMSQ9044" 11 | base32pad, "cab4wk4zanvqw42jaee======" 12 | base32padupper, "CAB4WK4ZANVQW42JAEE======" 13 | base32hexpad, "t01smasp0dlgmsq9044======" 14 | base32hexpadupper, "T01SMASP0DLGMSQ9044======" 15 | base32z, "hybhskh3ypiosh4jyrr" 16 | base36, "k02lcpzo5yikidynfl" 17 | base36upper, "K02LCPZO5YIKIDYNFL" 18 | base58flickr, "Z17Pznk19XTTzBtx" 19 | base58btc, "z17paNL19xttacUY" 20 | base64, "mAHllcyBtYW5pICE" 21 | base64pad, "MAHllcyBtYW5pICE=" 22 | base64url, "uAHllcyBtYW5pICE" 23 | base64urlpad, "UAHllcyBtYW5pICE=" 24 | base256emoji, "🚀🚀🏃✋🌈😅🌷🤤😻🌟😅👏" 25 | -------------------------------------------------------------------------------- /tests/two_leading_zeros.csv: -------------------------------------------------------------------------------- 1 | encoding, "\x00\x00yes mani !" 2 | base2, "0000000000000000001111001011001010111001100100000011011010110000101101110011010010010000000100001" 3 | base8, "700000171312714403326055632220041" 4 | base10, "900573277761329450583662625" 5 | base16, "f0000796573206d616e692021" 6 | base16upper, "F0000796573206D616E692021" 7 | base32, "baaahszltebwwc3tjeaqq" 8 | base32upper, "BAAAHSZLTEBWWC3TJEAQQ" 9 | base32hex, "v0007ipbj41mm2rj940gg" 10 | base32hexupper, "V0007IPBJ41MM2RJ940GG" 11 | base32pad, "caaahszltebwwc3tjeaqq====" 12 | base32padupper, "CAAAHSZLTEBWWC3TJEAQQ====" 13 | base32hexpad, "t0007ipbj41mm2rj940gg====" 14 | base32hexpadupper, "T0007IPBJ41MM2RJ940GG====" 15 | base32z, "hyyy813murbssn5ujryoo" 16 | base36, "k002lcpzo5yikidynfl" 17 | base36upper, "K002LCPZO5YIKIDYNFL" 18 | base58flickr, "Z117Pznk19XTTzBtx" 19 | base58btc, "z117paNL19xttacUY" 20 | base64, "mAAB5ZXMgbWFuaSAh" 21 | base64pad, "MAAB5ZXMgbWFuaSAh" 22 | base64url, "uAAB5ZXMgbWFuaSAh" 23 | base64urlpad, "UAAB5ZXMgbWFuaSAh" 24 | base256emoji, "🚀🚀🚀🏃✋🌈😅🌷🤤😻🌟😅👏" 25 | --------------------------------------------------------------------------------