├── dune-project ├── .ocp-indent ├── .gitignore ├── bench ├── dune └── benchmarks.ml ├── config ├── dune └── config.ml ├── Makefile ├── src ├── dune ├── base64.mli ├── base64_rfc2045.mli ├── base64.ml └── base64_rfc2045.ml ├── .ocamlformat ├── test ├── dune └── test.ml ├── fuzz ├── dune ├── fuzz_rfc2045.ml └── fuzz_rfc4648.ml ├── README.md ├── LICENSE.md ├── .travis-ci.sh ├── base64.opam └── CHANGES.md /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 2.3) 2 | (name base64) 3 | -------------------------------------------------------------------------------- /.ocp-indent: -------------------------------------------------------------------------------- 1 | strict_with=always,match_clause=4,strict_else=never 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | _tests 3 | tmp 4 | *~ 5 | \.\#* 6 | \#*# 7 | *.install 8 | *.native 9 | *.byte 10 | .merlin -------------------------------------------------------------------------------- /bench/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name benchmarks) 3 | (enabled_if 4 | (= %{profile} benchmark)) 5 | (libraries base64 core_bench)) 6 | -------------------------------------------------------------------------------- /config/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name config)) 3 | 4 | (rule 5 | (with-stdout-to 6 | which-unsafe-file 7 | (run ./config.exe))) 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all test clean 2 | 3 | all: 4 | dune build 5 | 6 | test: 7 | dune runtest 8 | 9 | clean: 10 | dune clean 11 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name base64) 3 | (modules base64) 4 | (public_name base64)) 5 | 6 | (library 7 | (name base64_rfc2045) 8 | (modules base64_rfc2045) 9 | (public_name base64.rfc2045)) 10 | -------------------------------------------------------------------------------- /.ocamlformat: -------------------------------------------------------------------------------- 1 | version = 0.16.0 2 | break-infix = fit-or-vertical 3 | parse-docstrings = true 4 | indicate-multiline-delimiters=no 5 | nested-match=align 6 | sequence-style=separator 7 | break-before-in=auto 8 | if-then-else=keyword-first 9 | -------------------------------------------------------------------------------- /test/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (modes byte exe) 3 | (name test) 4 | (libraries base64 base64.rfc2045 rresult alcotest bos)) 5 | 6 | (rule 7 | (alias runtest) 8 | (deps 9 | (:exe test.exe)) 10 | (action 11 | (run %{exe} --color=always))) 12 | -------------------------------------------------------------------------------- /config/config.ml: -------------------------------------------------------------------------------- 1 | let parse s = Scanf.sscanf s "%d.%d" (fun major minor -> (major, minor)) 2 | 3 | let () = 4 | let version = parse Sys.ocaml_version in 5 | if version >= (4, 7) 6 | then print_string "unsafe_stable.ml" 7 | else print_string "unsafe_pre407.ml" 8 | -------------------------------------------------------------------------------- /fuzz/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name fuzz_rfc2045) 3 | (enabled_if 4 | (= %{profile} fuzz)) 5 | (modules fuzz_rfc2045) 6 | (libraries astring crowbar fmt base64.rfc2045)) 7 | 8 | (executable 9 | (name fuzz_rfc4648) 10 | (enabled_if 11 | (= %{profile} fuzz)) 12 | (modules fuzz_rfc4648) 13 | (libraries astring crowbar fmt base64)) 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Base64 for OCaml 2 | ================ 3 | 4 | Base64 is a group of similar binary-to-text encoding schemes that represent 5 | binary data in an ASCII string format by translating it into a radix-64 6 | representation. It is specified in [RFC 4648][rfc4648]. 7 | 8 | See also [documentation][docs]. 9 | 10 | [rfc4648]: https://tools.ietf.org/html/rfc4648 11 | [docs]: http://mirage.github.io/ocaml-base64/base64/ 12 | 13 | ## Example 14 | 15 | Simple encoding and decoding. 16 | 17 | ```shell 18 | utop # #require "base64";; 19 | utop # let enc = Base64.encode_exn "OCaml rocks!";; 20 | val enc : string = "T0NhbWwgcm9ja3Mh" 21 | utop # let plain = Base64.decode_exn enc;; 22 | val plain : string = "OCaml rocks!" 23 | ``` 24 | 25 | ## License 26 | 27 | [ISC](https://www.isc.org/downloads/software-support-policy/isc-license/) 28 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2006-2009 Citrix Systems Inc. 2 | Copyright (c) 2010 Thomas Gazagnaire 3 | 4 | Permission to use, copy, modify, and distribute this software for any 5 | purpose with or without fee is hereby granted, provided that the above 6 | copyright notice and this permission notice appear in all copies. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | -------------------------------------------------------------------------------- /.travis-ci.sh: -------------------------------------------------------------------------------- 1 | case "$OCAML_VERSION,$OPAM_VERSION" in 2 | 4.00.1,1.1.0) ppa=avsm/ocaml40+opam11 ;; 3 | 4.00.1,1.2.0) ppa=avsm/ocaml40+opam12 ;; 4 | 4.01.0,1.1.0) ppa=avsm/ocaml41+opam11 ;; 5 | 4.01.0,1.2.0) ppa=avsm/ocaml41+opam12 ;; 6 | 4.02.1,1.1.0) ppa=avsm/ocaml42+opam11 ;; 7 | 4.02.1,1.2.0) ppa=avsm/ocaml42+opam12 ;; 8 | *) echo Unknown $OCAML_VERSION,$OPAM_VERSION; exit 1 ;; 9 | esac 10 | 11 | echo "yes" | sudo add-apt-repository ppa:$ppa 12 | sudo apt-get update -qq 13 | sudo apt-get install -qq ocaml ocaml-native-compilers camlp4-extra opam time 14 | 15 | export OPAMYES=1 16 | export OPAMVERBOSE=1 17 | echo OCaml version 18 | ocaml -version 19 | echo OPAM versions 20 | opam --version 21 | opam --git-version 22 | 23 | opam init git://github.com/ocaml/opam-repository >/dev/null 2>&1 24 | opam install ocamlfind 25 | eval `opam config env` 26 | make 27 | make install 28 | -------------------------------------------------------------------------------- /base64.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | maintainer: "mirageos-devel@lists.xenproject.org" 3 | authors: [ "Thomas Gazagnaire" 4 | "Anil Madhavapeddy" "Calascibetta Romain" 5 | "Peter Zotov" ] 6 | license: "ISC" 7 | homepage: "https://github.com/mirage/ocaml-base64" 8 | doc: "https://mirage.github.io/ocaml-base64/" 9 | bug-reports: "https://github.com/mirage/ocaml-base64/issues" 10 | dev-repo: "git+https://github.com/mirage/ocaml-base64.git" 11 | synopsis: "Base64 encoding for OCaml" 12 | description: """ 13 | Base64 is a group of similar binary-to-text encoding schemes that represent 14 | binary data in an ASCII string format by translating it into a radix-64 15 | representation. It is specified in RFC 4648. 16 | """ 17 | depends: [ 18 | "ocaml" {>= "4.07.0"} 19 | "dune" {>= "2.3"} 20 | "fmt" {with-test & >= "0.8.7"} 21 | "bos" {with-test} 22 | "rresult" {with-test} 23 | "alcotest" {with-test} 24 | ] 25 | build: [ 26 | ["dune" "subst"] {dev} 27 | ["dune" "build" "-p" name "-j" jobs] 28 | ["dune" "runtest" "-p" name "-j" jobs] {with-test} 29 | ] 30 | x-maintenance-intent: [ "(latest)" ] 31 | -------------------------------------------------------------------------------- /bench/benchmarks.ml: -------------------------------------------------------------------------------- 1 | module Old_version = struct 2 | let default_alphabet = 3 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" 4 | 5 | let uri_safe_alphabet = 6 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" 7 | 8 | let padding = '=' 9 | 10 | let of_char ?(alphabet = default_alphabet) x = 11 | if x = padding then 0 else String.index alphabet x 12 | 13 | let to_char ?(alphabet = default_alphabet) x = alphabet.[x] 14 | 15 | let decode ?alphabet input = 16 | let length = String.length input in 17 | let input = 18 | if length mod 4 = 0 19 | then input 20 | else input ^ String.make (4 - (length mod 4)) padding in 21 | let length = String.length input in 22 | let words = length / 4 in 23 | let padding = 24 | match length with 25 | | 0 -> 0 26 | | _ when input.[length - 2] = padding -> 2 27 | | _ when input.[length - 1] = padding -> 1 28 | | _ -> 0 in 29 | let output = Bytes.make ((words * 3) - padding) '\000' in 30 | for i = 0 to words - 1 do 31 | let a = of_char ?alphabet input.[(4 * i) + 0] 32 | and b = of_char ?alphabet input.[(4 * i) + 1] 33 | and c = of_char ?alphabet input.[(4 * i) + 2] 34 | and d = of_char ?alphabet input.[(4 * i) + 3] in 35 | let n = (a lsl 18) lor (b lsl 12) lor (c lsl 6) lor d in 36 | let x = (n lsr 16) land 255 37 | and y = (n lsr 8) land 255 38 | and z = n land 255 in 39 | Bytes.set output ((3 * i) + 0) (char_of_int x) ; 40 | if i <> words - 1 || padding < 2 41 | then Bytes.set output ((3 * i) + 1) (char_of_int y) ; 42 | if i <> words - 1 || padding < 1 43 | then Bytes.set output ((3 * i) + 2) (char_of_int z) 44 | done ; 45 | Bytes.unsafe_to_string output 46 | 47 | let decode_opt ?alphabet input = 48 | try Some (decode ?alphabet input) with Not_found -> None 49 | 50 | let encode ?(pad = true) ?alphabet input = 51 | let length = String.length input in 52 | let words = (length + 2) / 3 (* rounded up *) in 53 | let padding_len = if length mod 3 = 0 then 0 else 3 - (length mod 3) in 54 | let output = Bytes.make (words * 4) '\000' in 55 | let get i = if i >= length then 0 else int_of_char input.[i] in 56 | for i = 0 to words - 1 do 57 | let x = get ((3 * i) + 0) 58 | and y = get ((3 * i) + 1) 59 | and z = get ((3 * i) + 2) in 60 | let n = (x lsl 16) lor (y lsl 8) lor z in 61 | let a = (n lsr 18) land 63 62 | and b = (n lsr 12) land 63 63 | and c = (n lsr 6) land 63 64 | and d = n land 63 in 65 | Bytes.set output ((4 * i) + 0) (to_char ?alphabet a) ; 66 | Bytes.set output ((4 * i) + 1) (to_char ?alphabet b) ; 67 | Bytes.set output ((4 * i) + 2) (to_char ?alphabet c) ; 68 | Bytes.set output ((4 * i) + 3) (to_char ?alphabet d) 69 | done ; 70 | for i = 1 to padding_len do 71 | Bytes.set output (Bytes.length output - i) padding 72 | done ; 73 | if pad 74 | then Bytes.unsafe_to_string output 75 | else Bytes.sub_string output 0 (Bytes.length output - padding_len) 76 | end 77 | 78 | let random len = 79 | let ic = open_in "/dev/urandom" in 80 | let rs = Bytes.create len in 81 | really_input ic rs 0 len ; 82 | close_in ic ; 83 | Bytes.unsafe_to_string rs 84 | 85 | open Core 86 | open Core_bench 87 | 88 | let b64_encode_and_decode len = 89 | let input = random len in 90 | Staged.stage @@ fun () -> 91 | let encoded = Base64.encode_exn input in 92 | let _decoded = Base64.decode_exn encoded in 93 | () 94 | 95 | let old_encode_and_decode len = 96 | let input = random len in 97 | Staged.stage @@ fun () -> 98 | let encoded = Old_version.encode input in 99 | let _decoded = Old_version.decode encoded in 100 | () 101 | 102 | let args = [ 0; 10; 50; 100; 500; 1000; 2500; 5000 ] 103 | 104 | let test_b64 = 105 | Bench.Test.create_indexed ~name:"Base64" ~args b64_encode_and_decode 106 | 107 | let test_old = Bench.Test.create_indexed ~name:"Old" ~args old_encode_and_decode 108 | 109 | let command = Bench.make_command [ test_b64; test_old ] 110 | 111 | let () = Command.run command 112 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | ### v3.5.2 (2025-09-19) 2 | 3 | - Add `x-maintenance-intent` into the OPAM file (@hannesm, #54) 4 | - Remove the support of OCaml 4.07 (@copy, #55) 5 | 6 | ### v3.5.1 (2023-01-24) 7 | 8 | - Few fixes about benchmarks and tests (#51, @tbrk, @dinosaure) 9 | - Add missing dependency about `fmt` and fix the compilation for OCaml 5.0 (#52, @kit-ty-kate) 10 | 11 | ### v3.5.0 (2021-02-08) 12 | 13 | - Fix support for `x-compilation` (@samoht, #44) 14 | - Update to `dune.2.0` and apply `ocamlformat` (@samoht, #45) 15 | - Select `unsafe.ml` only with `dune` (@emillon, #46) 16 | - Remove indirect dependecy to `ocamlfind` (@kit-ty-kate, #49) 17 | - Hide internals of `base64` and return a `string` as the alphabet (@reynir, #48) 18 | **breaking chnages** `Base64.alphabet` is updated and return a simple `string` now 19 | 20 | ### v3.4.0 (2020-03-13) 21 | 22 | - Fix tests about `alcotest.1.0.0` (@dinosaure, #40) 23 | - Be more strict about padding when we decode a base64 input (@dinosaure, @hannesm, @cfcs, #43) 24 | - Remove `fmt` dependency (#43) 25 | 26 | ### v3.3.0 (2019-01-30) 27 | 28 | - Remove `build` directive on dune dependency (@CraigFe, #35) 29 | - Make error poly-variant open (@copy, #39) 30 | - Use `unsafe_bytes_set16u` instead `unsafe_string_set16u` (@dinosaure, @hhugo, @avsm, #37) 31 | 32 | ### v3.2.0 (2019-04-04) 33 | 34 | * `Base64_rfc2045.decode` can now progress on many input errors, allowing 35 | clients to make forward progress by discarding a single character and 36 | trying to continue. This allows, for example, newlines and other invalid 37 | characters to be discarded. (#34 @tiash, review by @dinosaure @avsm) 38 | * Add more test cases for RFC2045 (#34 @dinosaure) 39 | * Improve README toplevel output example (#28 @djs55) 40 | 41 | ### v3.1.0 (2019-02-03) 42 | 43 | * Add `Base64.encode_string` that doesn't raise or return an error. 44 | This makes it easier to port pre-3.0 code to the new interface (#26 @avsm) 45 | 46 | ### v3.0.0 (2018-01-21) 47 | 48 | * Implementation of Base64 according to RFC 2045 (available on base64.rfc2045) 49 | * New implementation of Base64 according to RFC 4648 from nocrypto's implementation 50 | * Fix bad access with `String.iter` on the old implementation of Base64 (@dinosaure, #23) 51 | * Check isomorphism between `encode` & `decode` function (@hannesm, @dinosaure, #20) 52 | * Add tests from RFC 3548 and from PHP impl. (@hannesm, @dinosaure, #24) 53 | * Add fuzzer on both implementations 54 | - check isomorphism 55 | - check bijection 56 | - check if `decode` does not raise any exception 57 | * __break-api__, `B64` was renamed to `Base64` (@copy, @avsm, @dinosaure, #17) 58 | * __break-api__, `Base64.decode` and `Base64.encode` returns a result type instead to raise an exception (@hannesm, @dinosaure, #21) 59 | * __break-api__, Add `sub` type to avoid allocation to the end-user (@avsm, @dinosaure, #24) 60 | * __break-api__, Add `pad` argument on `decode` function to check if input is well-padded or not (@hannesm, @dinosaure, #24) 61 | * __break-api__, Add `off` and `len` optional arguments on `encode` & `decode` functions to compute a part of input (@cfcs, @dinosaure, #24) 62 | * Better performance (see #24) (@dinosaure) 63 | * Review of code by @cfcs (see #24) 64 | 65 | ### v2.3.0 (2018-11-23) 66 | 67 | * Add a `decode_opt` function that is a non-raising variant of `decode`. 68 | * Reformat the code with ocamlformat (@dinosaure) 69 | * Port build to dune from jbuilder (@dinosaure 70 | 71 | ### v2.2.0 (2017-06-20) 72 | 73 | * Switch to jbuilder (#13, @rgrinberg) 74 | 75 | ### v2.1.2 (2016-10-18) 76 | 77 | * Fix version number (#11, @hannesm) 78 | 79 | ### v2.1.1 (2016-10-03) 80 | 81 | * Switch build to `topkg` and obey the `odig` conventions 82 | for installing metadata files. 83 | * Add a test suite based on RFC4648 test vectors. 84 | * Improve Travis CI tests to be multidistro. 85 | 86 | ### v2.0.0 (2014-12-24) 87 | 88 | * Switch the top-level `Base64` module to `B64` to avoid 89 | clashing with various other similarly named modules in 90 | `extlib` and some other libraries. This is obviously 91 | backwards compatibility breaking with all current users 92 | of this library. (#3). 93 | 94 | ### 1.1.0 (2014-12-16) 95 | 96 | * Allow specifying a different alphabet during encoding or 97 | decoding, and supply a URI-safe alphabet along with the 98 | default Base64 standard. 99 | * Add OCaml 4.02 `safe-string` compatibility. 100 | * Optionally support encoding without padding. 101 | 102 | ### 1.0.0 (2014-08-03) 103 | 104 | * Initial public release. 105 | -------------------------------------------------------------------------------- /src/base64.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Copyright (c) 2006-2009 Citrix Systems Inc. 3 | * Copyright (c) 2010 Thomas Gazagnaire 4 | * Copyright (c) 2014-2016 Anil Madhavapeddy 5 | * Copyright (c) 2018 Romain Calascibetta 6 | * 7 | * Permission to use, copy, modify, and distribute this software for any 8 | * purpose with or without fee is hereby granted, provided that the above 9 | * copyright notice and this permission notice appear in all copies. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 | * 19 | *) 20 | 21 | (** Base64 RFC4648 implementation. 22 | 23 | Base64 is a group of similar binary-to-text encoding schemes that represent 24 | binary data in an ASCII string format by translating it into a radix-64 25 | representation. It is specified in RFC 4648. 26 | 27 | {e Release %%VERSION%% - %%PKG_HOMEPAGE%%} *) 28 | 29 | type alphabet 30 | (** Type of alphabet. *) 31 | 32 | type sub = string * int * int 33 | (** Type of sub-string: [str, off, len]. *) 34 | 35 | val default_alphabet : alphabet 36 | (** A 64-character alphabet specifying the regular Base64 alphabet. *) 37 | 38 | val uri_safe_alphabet : alphabet 39 | (** A 64-character alphabet specifying the URI- and filename-safe Base64 40 | alphabet. *) 41 | 42 | val make_alphabet : string -> alphabet 43 | (** Make a new alphabet. *) 44 | 45 | val length_alphabet : alphabet -> int 46 | (** Returns length of the alphabet, should be 64. *) 47 | 48 | val alphabet : alphabet -> string 49 | (** Returns the alphabet. *) 50 | 51 | val decode_exn : 52 | ?pad:bool -> ?alphabet:alphabet -> ?off:int -> ?len:int -> string -> string 53 | (** [decode_exn ?off ?len s] decodes [len] bytes (defaults to 54 | [String.length s - off]) of the string [s] starting from [off] (defaults to 55 | [0]) that is encoded in Base64 format. Will leave trailing NULLs on the 56 | string, padding it out to a multiple of 3 characters. [alphabet] defaults to 57 | {!default_alphabet}. [pad = true] specifies to check if [s] is padded or 58 | not, otherwise, it raises an exception. 59 | 60 | Decoder can fail when character of [s] is not a part of [alphabet] or is not 61 | [padding] character. If input is not padded correctly, decoder does the 62 | best-effort but it does not ensure [decode_exn (encode ~pad:false x) = x]. 63 | 64 | @raise if Invalid_argument [s] is not a valid Base64 string. *) 65 | 66 | val decode_sub : 67 | ?pad:bool -> 68 | ?alphabet:alphabet -> 69 | ?off:int -> 70 | ?len:int -> 71 | string -> 72 | (sub, [> `Msg of string ]) result 73 | (** Same as {!decode_exn} but it returns a result type instead to raise an 74 | exception. Then, it returns a {!sub} string. Decoded input [(str, off, len)] 75 | will starting to [off] and will have [len] bytes - by this way, we ensure to 76 | allocate only one time result. *) 77 | 78 | val decode : 79 | ?pad:bool -> 80 | ?alphabet:alphabet -> 81 | ?off:int -> 82 | ?len:int -> 83 | string -> 84 | (string, [> `Msg of string ]) result 85 | (** Same as {!decode_exn}, but returns an explicit error message {!result} if it 86 | fails. *) 87 | 88 | val encode : 89 | ?pad:bool -> 90 | ?alphabet:alphabet -> 91 | ?off:int -> 92 | ?len:int -> 93 | string -> 94 | (string, [> `Msg of string ]) result 95 | (** [encode s] encodes the string [s] into base64. If [pad] is false, no 96 | trailing padding is added. [pad] defaults to [true], and [alphabet] to 97 | {!default_alphabet}. 98 | 99 | [encode] fails when [off] and [len] do not designate a valid range of [s]. *) 100 | 101 | val encode_string : ?pad:bool -> ?alphabet:alphabet -> string -> string 102 | (** [encode_string s] encodes the string [s] into base64. If [pad] is false, no 103 | trailing padding is added. [pad] defaults to [true], and [alphabet] to 104 | {!default_alphabet}. *) 105 | 106 | val encode_sub : 107 | ?pad:bool -> 108 | ?alphabet:alphabet -> 109 | ?off:int -> 110 | ?len:int -> 111 | string -> 112 | (sub, [> `Msg of string ]) result 113 | (** Same as {!encode} but return a {!sub}-string instead a plain result. By this 114 | way, we ensure to allocate only one time result. *) 115 | 116 | val encode_exn : 117 | ?pad:bool -> ?alphabet:alphabet -> ?off:int -> ?len:int -> string -> string 118 | (** Same as {!encode} but raises an invalid argument exception if we retrieve an 119 | error. *) 120 | -------------------------------------------------------------------------------- /src/base64_rfc2045.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Copyright (c) 2014-2016 Anil Madhavapeddy 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | * 16 | *) 17 | 18 | (** Decode *) 19 | 20 | val default_alphabet : string 21 | (** A 64-character string specifying the regular Base64 alphabet. *) 22 | 23 | type decoder 24 | (** The type for decoders. *) 25 | 26 | type src = [ `Manual | `Channel of in_channel | `String of string ] 27 | (** The type for input sources. With a [`Manual] source the client must provide 28 | input with {!src}. *) 29 | 30 | type decode = 31 | [ `Await | `End | `Flush of string | `Malformed of string | `Wrong_padding ] 32 | 33 | val src : decoder -> Bytes.t -> int -> int -> unit 34 | (** [src d s j l] provides [d] with [l] bytes to read, starting at [j] in [s]. 35 | This byte range is read by calls to {!decode} with [d] until [`Await] is 36 | returned. To signal the end of input, call the function with [l = 0]. *) 37 | 38 | val decoder : src -> decoder 39 | (** [decoder src] is a decoder that inputs from [src]. *) 40 | 41 | val decode : decoder -> decode 42 | (** [decode d] is: 43 | 44 | - [`Await] if [d] has a [`Manual] input source and awaits for more input. 45 | The client must use {!src} to provide it. 46 | - [`End] if the end of input was reached 47 | - [`Malformed bytes] if the [bytes] sequence is malformed according to the 48 | decoded base64 encoding scheme. If you are interested in a best-effort 49 | decoding, you can still continue to decode after an error until the decode 50 | synchronizes again on valid bytes. 51 | - [`Flush data] if a [data] sequence value was decoded. 52 | - [`Wrong_padding] if decoder retrieve a wrong padding at the end of the 53 | input. 54 | 55 | {b Note}. Repeated invocation always eventually returns [`End], even in case 56 | of errors. *) 57 | 58 | val decoder_byte_count : decoder -> int 59 | (** [decoder_byte_count d] is the number of characters already decoded on [d] 60 | (included malformed ones). This is the last {!decode}'s end output offset 61 | counting from beginning of the stream. *) 62 | 63 | val decoder_src : decoder -> src 64 | (** [decoder_src d] is [d]'s input source. *) 65 | 66 | val decoder_dangerous : decoder -> bool 67 | (** [decoder_dangerous d] returns [true] if encoded input does not respect the 68 | 80-columns rule. If your are interested in a best-effort decoding you can 69 | still continue to decode even if [decoder_dangerous d] returns [true]. 70 | Nothing grow automatically internally in this state. *) 71 | 72 | type dst = [ `Channel of out_channel | `Buffer of Buffer.t | `Manual ] 73 | (** The type for output destinations. With a [`Manual] destination the client 74 | must provide output storage with {!dst}. *) 75 | 76 | type encode = [ `Await | `End | `Char of char ] 77 | 78 | type encoder 79 | (** The type for Base64 (RFC2045) encoder. *) 80 | 81 | val encoder : dst -> encoder 82 | (** [encoder dst] is an encoder for Base64 (RFC2045) that outputs to [dst]. *) 83 | 84 | val encode : encoder -> encode -> [ `Ok | `Partial ] 85 | (** [encode e v]: is 86 | 87 | - [`Partial] iff [e] has a [`Manual] destination and needs more output 88 | storage. The client must use {!dst} to provide a new buffer and then call 89 | {!encode} with [`Await] until [`Ok] is returned. 90 | - [`Ok] when the encoder is ready to encode a new [`Char] or [`End] 91 | 92 | For [`Manual] destination, encoding [`End] always return [`Partial], the 93 | client should continue as usual with [`Await] until [`Ok] is returned at 94 | which point {!dst_rem} [encoder] is guaranteed to be the size of the last 95 | provided buffer (i.e. nothing was written). 96 | 97 | {b Raises.} [Invalid_argument] if a [`Char] or [`End] is encoded after a 98 | [`Partial] encode. *) 99 | 100 | val encoder_dst : encoder -> dst 101 | (** [encoder_dst encoder] is [encoder]'s output destination. *) 102 | 103 | val dst : encoder -> Bytes.t -> int -> int -> unit 104 | (** [dst e s j l] provides [e] with [l] bytes to write, starting at [j] in [s]. 105 | This byte range is written by calls to {!encode} with [e] until [`Partial] 106 | is returned. Use {!dst_rem} to know the remaining number of non-written free 107 | bytes in [s]. *) 108 | 109 | val dst_rem : encoder -> int 110 | (** [dst_rem e] is the remaining number of non-written, free bytes in the last 111 | buffer provided with {!dst}. *) 112 | -------------------------------------------------------------------------------- /fuzz/fuzz_rfc2045.ml: -------------------------------------------------------------------------------- 1 | open Crowbar 2 | 3 | exception Encode_error of string 4 | 5 | exception Decode_error of string 6 | 7 | (** Pretty printers *) 8 | 9 | let register_printer () = 10 | Printexc.register_printer (function 11 | | Encode_error err -> Some (Fmt.str "(Encoding error: %s)" err) 12 | | Decode_error err -> Some (Fmt.str "(Decoding error: %s)" err) 13 | | _ -> None) 14 | 15 | let pp_chr = 16 | let escaped = function ' ' .. '~' as c -> String.make 1 c | _ -> "." in 17 | Fmt.using escaped Fmt.string 18 | 19 | let pp_scalar : 20 | type buffer. 21 | get:(buffer -> int -> char) -> length:(buffer -> int) -> buffer Fmt.t = 22 | fun ~get ~length ppf b -> 23 | let l = length b in 24 | for i = 0 to l / 16 do 25 | Fmt.pf ppf "%08x: " (i * 16) ; 26 | let j = ref 0 in 27 | while !j < 16 do 28 | if (i * 16) + !j < l 29 | then Fmt.pf ppf "%02x" (Char.code @@ get b ((i * 16) + !j)) 30 | else Fmt.pf ppf " " ; 31 | if !j mod 2 <> 0 then Fmt.pf ppf " " ; 32 | incr j 33 | done ; 34 | Fmt.pf ppf " " ; 35 | j := 0 ; 36 | while !j < 16 do 37 | if (i * 16) + !j < l 38 | then Fmt.pf ppf "%a" pp_chr (get b ((i * 16) + !j)) 39 | else Fmt.pf ppf " " ; 40 | incr j 41 | done ; 42 | Fmt.pf ppf "@\n" 43 | done 44 | 45 | let pp = pp_scalar ~get:String.get ~length:String.length 46 | 47 | (** Encoding and decoding *) 48 | 49 | let check_encode str = 50 | let subs = Astring.String.cuts ~sep:"\r\n" str in 51 | let check str = 52 | if String.length str > 78 53 | then raise (Encode_error "too long string returned") in 54 | List.iter check subs ; 55 | str 56 | 57 | let encode input = 58 | let buf = Buffer.create 80 in 59 | let encoder = Base64_rfc2045.encoder (`Buffer buf) in 60 | String.iter 61 | (fun c -> 62 | let ret = Base64_rfc2045.encode encoder (`Char c) in 63 | match ret with `Ok -> () | _ -> assert false) 64 | (* XXX(dinosaure): [`Partial] can never occur. *) 65 | input ; 66 | let encode = Base64_rfc2045.encode encoder `End in 67 | match encode with 68 | | `Ok -> Buffer.contents buf |> check_encode 69 | | _ -> (* XXX(dinosaure): [`Partial] can never occur. *) assert false 70 | 71 | let decode input = 72 | let decoder = Base64_rfc2045.decoder (`String input) in 73 | let rec go acc = 74 | if Base64_rfc2045.decoder_dangerous decoder 75 | then raise (Decode_error "Dangerous input") ; 76 | match Base64_rfc2045.decode decoder with 77 | | `End -> List.rev acc 78 | | `Flush output -> go (output :: acc) 79 | | `Malformed _ -> raise (Decode_error "Malformed") 80 | | `Wrong_padding -> raise (Decode_error "Wrong padding") 81 | | _ -> (* XXX(dinosaure): [`Await] can never occur. *) assert false in 82 | String.concat "" (go []) 83 | 84 | (** String generators *) 85 | 86 | let bytes_fixed_range : string gen = dynamic_bind (range 78) bytes_fixed 87 | 88 | let char_from_alpha alpha : string gen = 89 | map [ range (String.length alpha) ] (fun i -> alpha.[i] |> String.make 1) 90 | 91 | let string_from_alpha n = 92 | let acc = const "" in 93 | let alpha = Base64_rfc2045.default_alphabet in 94 | let rec add_char_from_alpha alpha acc = function 95 | | 0 -> acc 96 | | n -> 97 | add_char_from_alpha alpha 98 | (concat_gen_list (const "") [ acc; char_from_alpha alpha ]) 99 | (n - 1) in 100 | add_char_from_alpha alpha acc n 101 | 102 | let random_string_from_alpha n = dynamic_bind (range n) string_from_alpha 103 | 104 | let bytes_fixed_range_from_alpha : string gen = 105 | dynamic_bind (range 78) bytes_fixed 106 | 107 | let set_canonic str = 108 | let l = String.length str in 109 | let to_drop = l * 6 mod 8 in 110 | if to_drop = 6 111 | (* XXX(clecat): Case when we need to drop 6 bits which means a whole letter *) 112 | then String.sub str 0 (l - 1) 113 | else if to_drop <> 0 114 | (* XXX(clecat): Case when we need to drop 2 or 4 bits: we apply a mask droping the bits *) 115 | then ( 116 | let buf = Bytes.of_string str in 117 | let value = 118 | String.index Base64_rfc2045.default_alphabet (Bytes.get buf (l - 1)) in 119 | let canonic = 120 | Base64_rfc2045.default_alphabet.[value land lnot ((1 lsl to_drop) - 1)] 121 | in 122 | Bytes.set buf (l - 1) canonic ; 123 | Bytes.unsafe_to_string buf) 124 | else str 125 | 126 | let add_padding str = 127 | let str = set_canonic str in 128 | let str = str ^ "===" in 129 | String.sub str 0 (String.length str / 4 * 4) 130 | 131 | (** Tests *) 132 | 133 | let e2d inputs = 134 | let input = String.concat "\r\n" inputs in 135 | let encode = encode input in 136 | let decode = decode encode in 137 | check_eq ~pp ~cmp:String.compare ~eq:String.equal input decode 138 | 139 | let d2e inputs end_input = 140 | let end_input = add_padding end_input in 141 | let inputs = inputs @ [ end_input ] in 142 | let input = 143 | List.fold_left 144 | (fun acc s -> if String.length s <> 0 then acc ^ "\r\n" ^ s else acc) 145 | (List.hd inputs) (List.tl inputs) in 146 | let decode = decode input in 147 | let encode = encode decode in 148 | check_eq ~pp ~cmp:String.compare ~eq:String.equal input encode 149 | 150 | let () = 151 | register_printer () ; 152 | add_test ~name:"rfc2045: encode -> decode" [ list bytes_fixed_range ] e2d ; 153 | add_test ~name:"rfc2045: decode -> encode" 154 | [ list (string_from_alpha 76); random_string_from_alpha 76 ] 155 | d2e 156 | -------------------------------------------------------------------------------- /fuzz/fuzz_rfc4648.ml: -------------------------------------------------------------------------------- 1 | open Crowbar 2 | 3 | let pp_chr = 4 | let escaped = function ' ' .. '~' as c -> String.make 1 c | _ -> "." in 5 | Fmt.using escaped Fmt.string 6 | 7 | let pp_scalar : 8 | type buffer. 9 | get:(buffer -> int -> char) -> length:(buffer -> int) -> buffer Fmt.t = 10 | fun ~get ~length ppf b -> 11 | let l = length b in 12 | for i = 0 to l / 16 do 13 | Fmt.pf ppf "%08x: " (i * 16) ; 14 | let j = ref 0 in 15 | while !j < 16 do 16 | if (i * 16) + !j < l 17 | then Fmt.pf ppf "%02x" (Char.code @@ get b ((i * 16) + !j)) 18 | else Fmt.pf ppf " " ; 19 | if !j mod 2 <> 0 then Fmt.pf ppf " " ; 20 | incr j 21 | done ; 22 | Fmt.pf ppf " " ; 23 | j := 0 ; 24 | while !j < 16 do 25 | if (i * 16) + !j < l 26 | then Fmt.pf ppf "%a" pp_chr (get b ((i * 16) + !j)) 27 | else Fmt.pf ppf " " ; 28 | incr j 29 | done ; 30 | Fmt.pf ppf "@\n" 31 | done 32 | 33 | let pp = pp_scalar ~get:String.get ~length:String.length 34 | 35 | let ( <.> ) f g x = f (g x) 36 | 37 | let char_from_alphabet alphabet : string gen = 38 | map [ range 64 ] (String.make 1 <.> String.get (Base64.alphabet alphabet)) 39 | 40 | let random_string_from_alphabet alphabet len : string gen = 41 | let rec add_char_from_alphabet acc = function 42 | | 0 -> acc 43 | | n -> 44 | add_char_from_alphabet 45 | (concat_gen_list (const "") [ acc; char_from_alphabet alphabet ]) 46 | (n - 1) in 47 | add_char_from_alphabet (const "") len 48 | 49 | let random_string_from_alphabet ~max alphabet = 50 | dynamic_bind (range max) @@ fun real_len -> 51 | dynamic_bind (random_string_from_alphabet alphabet real_len) @@ fun input -> 52 | if real_len <= 1 53 | then const (input, 0, real_len) 54 | else 55 | dynamic_bind (range (real_len / 2)) @@ fun off -> 56 | map [ range (real_len - off) ] (fun len -> (input, off, len)) 57 | 58 | let encode_and_decode (input, off, len) = 59 | match Base64.encode ~pad:true ~off ~len input with 60 | | Error (`Msg err) -> fail err 61 | | Ok result -> 62 | match Base64.decode ~pad:true result with 63 | | Error (`Msg err) -> fail err 64 | | Ok result -> 65 | check_eq ~pp ~cmp:String.compare ~eq:String.equal result 66 | (String.sub input off len) 67 | 68 | let decode_and_encode (input, off, len) = 69 | match Base64.decode ~pad:true ~off ~len input with 70 | | Error (`Msg err) -> fail err 71 | | Ok result -> 72 | match Base64.encode ~pad:true result with 73 | | Error (`Msg err) -> fail err 74 | | Ok result -> 75 | check_eq ~pp:Fmt.string ~cmp:String.compare ~eq:String.equal result 76 | (String.sub input off len) 77 | 78 | let ( // ) x y = 79 | if y < 1 then raise Division_by_zero ; 80 | if x > 0 then 1 + ((x - 1) / y) else 0 81 | [@@inline] 82 | 83 | let canonic alphabet = 84 | let dmap = Array.make 256 (-1) in 85 | String.iteri (fun i x -> dmap.(Char.code x) <- i) (Base64.alphabet alphabet) ; 86 | fun (input, off, len) -> 87 | let real_len = String.length input in 88 | let input_len = len in 89 | let normalized_len = input_len // 4 * 4 in 90 | if normalized_len = input_len 91 | then (input, off, input_len) 92 | else if normalized_len - input_len = 3 93 | then (input, off, input_len - 1) 94 | else 95 | let remainder_len = normalized_len - input_len in 96 | let last = input.[off + input_len - 1] in 97 | let output = Bytes.make (max real_len (off + normalized_len)) '=' in 98 | 99 | Bytes.blit_string input 0 output 0 (off + input_len) ; 100 | if off + normalized_len < real_len 101 | then 102 | Bytes.blit_string input (off + normalized_len) output 103 | (off + normalized_len) 104 | (real_len - (off + normalized_len)) ; 105 | 106 | let mask = 107 | match remainder_len with 1 -> 0x3c | 2 -> 0x30 | _ -> assert false in 108 | let decoded = dmap.(Char.code last) in 109 | let canonic = decoded land mask in 110 | let encoded = (Base64.alphabet alphabet).[canonic] in 111 | Bytes.set output (off + input_len - 1) encoded ; 112 | (Bytes.unsafe_to_string output, off, normalized_len) 113 | 114 | let isomorphism0 (input, off, len) = 115 | (* x0 = decode(input) && x1 = decode(encode(x0)) && x0 = x1 *) 116 | match Base64.decode ~pad:false ~off ~len input with 117 | | Error (`Msg err) -> fail err 118 | | Ok result0 -> ( 119 | let result1 = Base64.encode_exn result0 in 120 | match Base64.decode ~pad:true result1 with 121 | | Error (`Msg err) -> fail err 122 | | Ok result2 -> 123 | check_eq ~pp ~cmp:String.compare ~eq:String.equal result0 result2) 124 | 125 | let isomorphism1 (input, off, len) = 126 | let result0 = Base64.encode_exn ~off ~len input in 127 | match Base64.decode ~pad:true result0 with 128 | | Error (`Msg err) -> fail err 129 | | Ok result1 -> 130 | let result2 = Base64.encode_exn result1 in 131 | check_eq ~pp:Fmt.string ~cmp:String.compare ~eq:String.equal result0 132 | result2 133 | 134 | let bytes_and_range : (string * int * int) gen = 135 | dynamic_bind bytes @@ fun t -> 136 | let real_length = String.length t in 137 | if real_length <= 1 138 | then const (t, 0, real_length) 139 | else 140 | dynamic_bind (range (real_length / 2)) @@ fun off -> 141 | map [ range (real_length - off) ] (fun len -> (t, off, len)) 142 | 143 | let range_of_max max : (int * int) gen = 144 | dynamic_bind (range (max / 2)) @@ fun off -> 145 | map [ range (max - off) ] (fun len -> (off, len)) 146 | 147 | let failf fmt = Fmt.kstr fail fmt 148 | 149 | let no_exception pad off len input = 150 | try 151 | let _ = 152 | Base64.decode ?pad ?off ?len ~alphabet:Base64.default_alphabet input in 153 | () 154 | with exn -> failf "decode fails with: %s." (Printexc.to_string exn) 155 | 156 | let () = 157 | add_test ~name:"rfc4648: encode -> decode" [ bytes_and_range ] 158 | encode_and_decode ; 159 | add_test ~name:"rfc4648: decode -> encode" 160 | [ random_string_from_alphabet ~max:1000 Base64.default_alphabet ] 161 | (decode_and_encode <.> canonic Base64.default_alphabet) ; 162 | add_test ~name:"rfc4648: x = decode(encode(x))" 163 | [ random_string_from_alphabet ~max:1000 Base64.default_alphabet ] 164 | isomorphism0 ; 165 | add_test ~name:"rfc4648: x = encode(decode(x))" [ bytes_and_range ] 166 | isomorphism1 ; 167 | add_test ~name:"rfc4648: no exception leak" 168 | [ option bool; option int; option int; bytes ] 169 | no_exception 170 | -------------------------------------------------------------------------------- /test/test.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Copyright (c) 2016 Anil Madhavapeddy 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | * 16 | *) 17 | 18 | open Printf 19 | open Rresult 20 | 21 | (* Test vectors from RFC4648 22 | BASE64("") = "" 23 | BASE64("f") = "Zg==" 24 | BASE64("fo") = "Zm8=" 25 | BASE64("foo") = "Zm9v" 26 | BASE64("foob") = "Zm9vYg==" 27 | BASE64("fooba") = "Zm9vYmE=" 28 | BASE64("foobar") = "Zm9vYmFy" 29 | *) 30 | 31 | let rfc4648_tests = 32 | [ 33 | ("", ""); 34 | ("f", "Zg=="); 35 | ("fo", "Zm8="); 36 | ("foo", "Zm9v"); 37 | ("foob", "Zm9vYg=="); 38 | ("fooba", "Zm9vYmE="); 39 | ("foobar", "Zm9vYmFy"); 40 | ] 41 | 42 | let hannes_tests = 43 | [ 44 | ("dummy", "ZHVtbXk="); 45 | ("dummy", "ZHVtbXk"); 46 | ("dummy", "ZHVtbXk=="); 47 | ("dummy", "ZHVtbXk==="); 48 | ("dummy", "ZHVtbXk===="); 49 | ("dummy", "ZHVtbXk====="); 50 | ("dummy", "ZHVtbXk======"); 51 | ] 52 | 53 | let php_tests = 54 | [ 55 | ( "πάντα χωρεῖ καὶ οὐδὲν μένει …", 56 | "z4DOrM69z4TOsSDPh8-Jz4HOteG_liDOus6x4b22IM6_4b2QzrThvbLOvSDOvM6tzr3Otc65IOKApg" 57 | ); 58 | ] 59 | 60 | let rfc3548_tests = 61 | [ 62 | ("\x14\xfb\x9c\x03\xd9\x7e", "FPucA9l+"); 63 | ("\x14\xfb\x9c\x03\xd9", "FPucA9k="); 64 | ("\x14\xfb\x9c\x03", "FPucAw=="); 65 | ] 66 | 67 | let cfcs_tests = 68 | [ 69 | (0, 2, "\004", "BB"); 70 | (1, 2, "\004", "ABB"); 71 | (1, 2, "\004", "ABBA"); 72 | (2, 2, "\004", "AABBA"); 73 | (2, 2, "\004", "AABBAA"); 74 | (0, 0, "", "BB"); 75 | (1, 0, "", "BB"); 76 | (2, 0, "", "BB"); 77 | ] 78 | 79 | let nocrypto_tests = 80 | [ 81 | ("\x00\x5a\x6d\x39\x76", None); 82 | ("\x5a\x6d\x39\x76", Some "\x66\x6f\x6f"); 83 | ("\x5a\x6d\x39\x76\x76", None); 84 | ("\x5a\x6d\x39\x76\x76\x76", None); 85 | ("\x5a\x6d\x39\x76\x76\x76\x76", None); 86 | ("\x5a\x6d\x39\x76\x00", None); 87 | ("\x5a\x6d\x39\x76\x62\x77\x3d\x3d", Some "\x66\x6f\x6f\x6f"); 88 | ("\x5a\x6d\x39\x76\x62\x77\x3d\x3d\x00", None); 89 | ("\x5a\x6d\x39\x76\x62\x77\x3d\x3d\x00\x01", None); 90 | ("\x5a\x6d\x39\x76\x62\x77\x3d\x3d\x00\x01\x02", None); 91 | ("\x5a\x6d\x39\x76\x62\x77\x3d\x3d\x00\x01\x02\x03", None); 92 | ("\x5a\x6d\x39\x76\x62\x32\x38\x3d", Some "\x66\x6f\x6f\x6f\x6f"); 93 | ("\x5a\x6d\x39\x76\x62\x32\x39\x76", Some "\x66\x6f\x6f\x6f\x6f\x6f"); 94 | ("YWE=", Some "aa"); 95 | ("YWE==", None); 96 | ("YWE===", None); 97 | ("YWE=====", None); 98 | ("YWE======", None); 99 | ] 100 | 101 | let alphabet_size () = 102 | List.iter 103 | (fun (name, alphabet) -> 104 | Alcotest.(check int) 105 | (sprintf "Alphabet size %s = 64" name) 106 | 64 107 | (Base64.length_alphabet alphabet)) 108 | [ 109 | ("default", Base64.default_alphabet); 110 | ("uri_safe", Base64.uri_safe_alphabet); 111 | ] 112 | 113 | (* Encode using OpenSSL `base64` utility *) 114 | let openssl_encode buf = 115 | Bos.( 116 | OS.Cmd.in_string buf 117 | |> OS.Cmd.run_io (Cmd.v "base64") 118 | |> OS.Cmd.to_string ~trim:true) 119 | |> function 120 | | Ok r -> 121 | prerr_endline r ; 122 | r 123 | | Error (`Msg e) -> raise (Failure (sprintf "OpenSSL decode: %s" e)) 124 | 125 | (* Encode using this library *) 126 | let lib_encode buf = Base64.encode_exn ~pad:true buf 127 | 128 | let test_rfc4648 () = 129 | List.iter 130 | (fun (c, r) -> 131 | (* Base64 vs openssl *) 132 | Alcotest.(check string) 133 | (sprintf "encode %s" c) (openssl_encode c) (lib_encode c) ; 134 | (* Base64 vs test cases above *) 135 | Alcotest.(check string) (sprintf "encode rfc4648 %s" c) r (lib_encode c) ; 136 | (* Base64 decode vs library *) 137 | Alcotest.(check string) (sprintf "decode %s" r) c (Base64.decode_exn r)) 138 | rfc4648_tests 139 | 140 | let test_rfc3548 () = 141 | List.iter 142 | (fun (c, r) -> 143 | (* Base64 vs openssl *) 144 | Alcotest.(check string) 145 | (sprintf "encode %s" c) (openssl_encode c) (lib_encode c) ; 146 | (* Base64 vs test cases above *) 147 | Alcotest.(check string) (sprintf "encode rfc3548 %s" c) r (lib_encode c) ; 148 | (* Base64 decode vs library *) 149 | Alcotest.(check string) (sprintf "decode %s" r) c (Base64.decode_exn r)) 150 | rfc3548_tests 151 | 152 | let test_hannes () = 153 | List.iter 154 | (fun (c, r) -> 155 | (* Base64 vs test cases above *) 156 | Alcotest.(check string) 157 | (sprintf "decode %s" r) c 158 | (Base64.decode_exn ~pad:false r)) 159 | hannes_tests 160 | 161 | let test_php () = 162 | List.iter 163 | (fun (c, r) -> 164 | Alcotest.(check string) 165 | (sprintf "decode %s" r) c 166 | (Base64.decode_exn ~pad:false ~alphabet:Base64.uri_safe_alphabet r)) 167 | php_tests 168 | 169 | let test_cfcs () = 170 | List.iter 171 | (fun (off, len, c, r) -> 172 | Alcotest.(check string) 173 | (sprintf "decode %s" r) c 174 | (Base64.decode_exn ~pad:false ~off ~len r)) 175 | cfcs_tests 176 | 177 | let test_nocrypto () = 178 | List.iter 179 | (fun (input, res) -> 180 | let res' = 181 | match Base64.decode ~pad:true input with 182 | | Ok v -> Some v 183 | | Error _ -> None in 184 | Alcotest.(check (option string)) (sprintf "decode %S" input) res' res) 185 | nocrypto_tests 186 | 187 | exception Malformed 188 | 189 | exception Wrong_padding 190 | 191 | let strict_base64_rfc2045_of_string x = 192 | let decoder = Base64_rfc2045.decoder (`String x) in 193 | let res = Buffer.create 16 in 194 | 195 | let rec go () = 196 | match Base64_rfc2045.decode decoder with 197 | | `End -> () 198 | | `Wrong_padding -> raise Wrong_padding 199 | | `Malformed _ -> raise Malformed 200 | | `Flush x -> 201 | Buffer.add_string res x ; 202 | go () 203 | | `Await -> Alcotest.failf "Retrieve impossible case: `Await" in 204 | 205 | Base64_rfc2045.src decoder (Bytes.unsafe_of_string x) 0 (String.length x) ; 206 | go () ; 207 | Buffer.contents res 208 | 209 | let relaxed_base64_rfc2045_of_string x = 210 | let decoder = Base64_rfc2045.decoder (`String x) in 211 | let res = Buffer.create 16 in 212 | 213 | let rec go () = 214 | match Base64_rfc2045.decode decoder with 215 | | `End -> () 216 | | `Wrong_padding -> go () 217 | | `Malformed _ -> go () 218 | | `Flush x -> 219 | Buffer.add_string res x ; 220 | go () 221 | | `Await -> Alcotest.failf "Retrieve impossible case: `Await" in 222 | 223 | Base64_rfc2045.src decoder (Bytes.unsafe_of_string x) 0 (String.length x) ; 224 | go () ; 225 | Buffer.contents res 226 | 227 | let test_strict_rfc2045 = 228 | [ 229 | ( "c2FsdXQgbGVzIGNvcGFpbnMgZmF1dCBhYnNvbHVtZW50IHF1ZSBqZSBkw6lwYXNzZSBsZXMgODAg\r\n\ 230 | Y2hhcmFjdGVycyBwb3VyIHZvaXIgc2kgbW9uIGVuY29kZXIgZml0cyBiaWVuIGRhbnMgbGVzIGxp\r\n\ 231 | bWl0ZXMgZGUgbGEgUkZDIDIwNDUgLi4u", 232 | "salut les copains faut absolument que je dépasse les 80 characters \ 233 | pour voir si mon encoder fits bien dans les limites de la RFC 2045 ..." 234 | ); 235 | ("", ""); 236 | ("Zg==", "f"); 237 | ("Zm8=", "fo"); 238 | ("Zm9v", "foo"); 239 | ("Zm9vYg==", "foob"); 240 | ("Zm9vYmE=", "fooba"); 241 | ("Zm9vYmFy", "foobar"); 242 | ] 243 | 244 | let test_relaxed_rfc2045 = 245 | [ 246 | ("Zg", "f"); 247 | ("Zm\n8", "fo"); 248 | ("Zm\r9v", "foo"); 249 | ("Zm9 vYg", "foob"); 250 | ("Zm9\r\n vYmE", "fooba"); 251 | ("Zm9évYmFy", "foobar"); 252 | ] 253 | 254 | let strict_base64_rfc2045_to_string x = 255 | let res = Buffer.create 16 in 256 | let encoder = Base64_rfc2045.encoder (`Buffer res) in 257 | String.iter 258 | (fun chr -> 259 | match Base64_rfc2045.encode encoder (`Char chr) with 260 | | `Ok -> () 261 | | `Partial -> 262 | Alcotest.failf "Retrieve impossible case for (`Char %02x): `Partial" 263 | (Char.code chr)) 264 | x ; 265 | match Base64_rfc2045.encode encoder `End with 266 | | `Ok -> Buffer.contents res 267 | | `Partial -> Alcotest.fail "Retrieve impossible case for `End: `Partial" 268 | 269 | let test_strict_with_malformed_input_rfc2045 = 270 | List.mapi 271 | (fun i (has, _) -> 272 | Alcotest.test_case (Fmt.str "strict rfc2045 - %02d" i) `Quick @@ fun () -> 273 | try 274 | let _ = strict_base64_rfc2045_of_string has in 275 | Alcotest.failf "Strict parser valids malformed input: %S" has 276 | with Malformed | Wrong_padding -> ()) 277 | test_relaxed_rfc2045 278 | 279 | let test_strict_rfc2045 = 280 | List.mapi 281 | (fun i (has, expect) -> 282 | Alcotest.test_case (Fmt.str "strict rfc2045 - %02d" i) `Quick @@ fun () -> 283 | try 284 | let res0 = strict_base64_rfc2045_of_string has in 285 | let res1 = strict_base64_rfc2045_to_string res0 in 286 | Alcotest.(check string) "encode(decode(x)) = x" res1 has ; 287 | Alcotest.(check string) "decode(x)" res0 expect 288 | with Malformed | Wrong_padding -> Alcotest.failf "Invalid input %S" has) 289 | test_strict_rfc2045 290 | 291 | let test_relaxed_rfc2045 = 292 | List.mapi 293 | (fun i (has, expect) -> 294 | Alcotest.test_case (Fmt.str "relaxed rfc2045 - %02d" i) `Quick 295 | @@ fun () -> 296 | let res0 = relaxed_base64_rfc2045_of_string has in 297 | Alcotest.(check string) "decode(x)" res0 expect) 298 | test_relaxed_rfc2045 299 | 300 | let test_invariants = [ ("Alphabet size", `Quick, alphabet_size) ] 301 | 302 | let test_codec = 303 | [ 304 | ("RFC4648 test vectors", `Quick, test_rfc4648); 305 | ("RFC3548 test vectors", `Quick, test_rfc3548); 306 | ("Hannes test vectors", `Quick, test_hannes); 307 | ("Cfcs test vectors", `Quick, test_cfcs); 308 | ("PHP test vectors", `Quick, test_php); 309 | ("Nocrypto test vectors", `Quick, test_nocrypto); 310 | ] 311 | 312 | let () = 313 | Alcotest.run "Base64" 314 | [ 315 | ("invariants", test_invariants); 316 | ("codec", test_codec); 317 | ("rfc2045 (0)", test_strict_rfc2045); 318 | ("rfc2045 (1)", test_strict_with_malformed_input_rfc2045); 319 | ("rfc2045 (2)", test_relaxed_rfc2045); 320 | ] 321 | -------------------------------------------------------------------------------- /src/base64.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Copyright (c) 2006-2009 Citrix Systems Inc. 3 | * Copyright (c) 2010 Thomas Gazagnaire 4 | * Copyright (c) 2014-2016 Anil Madhavapeddy 5 | * Copyright (c) 2016 David Kaloper Meršinjak 6 | * Copyright (c) 2018 Romain Calascibetta 7 | * 8 | * Permission to use, copy, modify, and distribute this software for any 9 | * purpose with or without fee is hereby granted, provided that the above 10 | * copyright notice and this permission notice appear in all copies. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 13 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 14 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 15 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 16 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 17 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 18 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 19 | * 20 | *) 21 | 22 | type alphabet = { emap : int array; dmap : int array } 23 | 24 | type sub = string * int * int 25 | 26 | let ( // ) x y = 27 | if y < 1 then raise Division_by_zero ; 28 | if x > 0 then 1 + ((x - 1) / y) else 0 29 | [@@inline] 30 | 31 | let unsafe_get_uint8 t off = Char.code (String.unsafe_get t off) 32 | 33 | let unsafe_set_uint8 t off v = Bytes.unsafe_set t off (Char.chr v) 34 | 35 | external unsafe_set_uint16 : bytes -> int -> int -> unit = "%caml_bytes_set16u" 36 | [@@noalloc] 37 | 38 | external unsafe_get_uint16 : string -> int -> int = "%caml_string_get16u" 39 | [@@noalloc] 40 | 41 | external swap16 : int -> int = "%bswap16" [@@noalloc] 42 | 43 | let none = -1 44 | 45 | (* We mostly want to have an optional array for [dmap] (e.g. [int option 46 | array]). So we consider the [none] value as [-1]. *) 47 | 48 | let make_alphabet alphabet = 49 | if String.length alphabet <> 64 50 | then invalid_arg "Length of alphabet must be 64" ; 51 | if String.contains alphabet '=' 52 | then invalid_arg "Alphabet can not contain padding character" ; 53 | let emap = 54 | Array.init (String.length alphabet) (fun i -> Char.code alphabet.[i]) in 55 | let dmap = Array.make 256 none in 56 | String.iteri (fun idx chr -> dmap.(Char.code chr) <- idx) alphabet ; 57 | { emap; dmap } 58 | 59 | let length_alphabet { emap; _ } = Array.length emap 60 | 61 | let alphabet { emap; _ } = 62 | String.init (Array.length emap) (fun i -> Char.chr emap.(i)) 63 | 64 | let default_alphabet = 65 | make_alphabet 66 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" 67 | 68 | let uri_safe_alphabet = 69 | make_alphabet 70 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" 71 | 72 | let unsafe_set_be_uint16 = 73 | if Sys.big_endian 74 | then fun t off v -> unsafe_set_uint16 t off v 75 | else fun t off v -> unsafe_set_uint16 t off (swap16 v) 76 | 77 | (* We make this exception to ensure to keep a control about which exception we 78 | can raise and avoid appearance of unknown exceptions like an ex-nihilo 79 | magic rabbit (or magic money?). *) 80 | exception Out_of_bounds 81 | 82 | exception Too_much_input 83 | 84 | let get_uint8 t off = 85 | if off < 0 || off >= String.length t then raise Out_of_bounds ; 86 | unsafe_get_uint8 t off 87 | 88 | let padding = int_of_char '=' 89 | 90 | let error_msgf fmt = Format.ksprintf (fun err -> Error (`Msg err)) fmt 91 | 92 | let encode_sub pad { emap; _ } ?(off = 0) ?len input = 93 | let len = 94 | match len with Some len -> len | None -> String.length input - off in 95 | 96 | if len < 0 || off < 0 || off > String.length input - len 97 | then error_msgf "Invalid bounds" 98 | else 99 | let n = len in 100 | let n' = n // 3 * 4 in 101 | let res = Bytes.create n' in 102 | 103 | let emap i = Array.unsafe_get emap i in 104 | 105 | let emit b1 b2 b3 i = 106 | unsafe_set_be_uint16 res i 107 | ((emap ((b1 lsr 2) land 0x3f) lsl 8) 108 | lor emap ((b1 lsl 4) lor (b2 lsr 4) land 0x3f)) ; 109 | unsafe_set_be_uint16 res (i + 2) 110 | ((emap ((b2 lsl 2) lor (b3 lsr 6) land 0x3f) lsl 8) 111 | lor emap (b3 land 0x3f)) in 112 | 113 | let rec enc j i = 114 | if i = n 115 | then () 116 | else if i = n - 1 117 | then emit (unsafe_get_uint8 input (off + i)) 0 0 j 118 | else if i = n - 2 119 | then 120 | emit 121 | (unsafe_get_uint8 input (off + i)) 122 | (unsafe_get_uint8 input (off + i + 1)) 123 | 0 j 124 | else ( 125 | emit 126 | (unsafe_get_uint8 input (off + i)) 127 | (unsafe_get_uint8 input (off + i + 1)) 128 | (unsafe_get_uint8 input (off + i + 2)) 129 | j ; 130 | enc (j + 4) (i + 3)) in 131 | 132 | let rec unsafe_fix = function 133 | | 0 -> () 134 | | i -> 135 | unsafe_set_uint8 res (n' - i) padding ; 136 | unsafe_fix (i - 1) in 137 | 138 | enc 0 0 ; 139 | 140 | let pad_to_write = (3 - (n mod 3)) mod 3 in 141 | 142 | if pad 143 | then ( 144 | unsafe_fix pad_to_write ; 145 | Ok (Bytes.unsafe_to_string res, 0, n')) 146 | else Ok (Bytes.unsafe_to_string res, 0, n' - pad_to_write) 147 | 148 | (* [pad = false], we don't want to write them. *) 149 | 150 | let encode ?(pad = true) ?(alphabet = default_alphabet) ?off ?len input = 151 | match encode_sub pad alphabet ?off ?len input with 152 | | Ok (res, off, len) -> Ok (String.sub res off len) 153 | | Error _ as err -> err 154 | 155 | let encode_string ?pad ?alphabet input = 156 | match encode ?pad ?alphabet input with 157 | | Ok res -> res 158 | | Error _ -> assert false 159 | 160 | let encode_sub ?(pad = true) ?(alphabet = default_alphabet) ?off ?len input = 161 | encode_sub pad alphabet ?off ?len input 162 | 163 | let encode_exn ?pad ?alphabet ?off ?len input = 164 | match encode ?pad ?alphabet ?off ?len input with 165 | | Ok v -> v 166 | | Error (`Msg err) -> invalid_arg err 167 | 168 | let decode_sub ?(pad = true) { dmap; _ } ?(off = 0) ?len input = 169 | let len = 170 | match len with Some len -> len | None -> String.length input - off in 171 | 172 | if len < 0 || off < 0 || off > String.length input - len 173 | then error_msgf "Invalid bounds" 174 | else 175 | let n = len // 4 * 4 in 176 | let n' = n // 4 * 3 in 177 | let res = Bytes.create n' in 178 | let invalid_pad_overflow = pad in 179 | 180 | let get_uint8_or_padding = 181 | if pad 182 | then (fun t i -> 183 | if i >= len then raise Out_of_bounds ; 184 | get_uint8 t (off + i)) 185 | else 186 | fun t i -> 187 | try if i < len then get_uint8 t (off + i) else padding 188 | with Out_of_bounds -> padding in 189 | 190 | let set_be_uint16 t off v = 191 | (* can not write 2 bytes. *) 192 | if off < 0 || off + 1 > Bytes.length t 193 | then () (* can not write 1 byte but can write 1 byte *) 194 | else if off < 0 || off + 2 > Bytes.length t 195 | then unsafe_set_uint8 t off (v lsr 8) (* can write 2 bytes. *) 196 | else unsafe_set_be_uint16 t off v in 197 | 198 | let set_uint8 t off v = 199 | if off < 0 || off >= Bytes.length t then () else unsafe_set_uint8 t off v 200 | in 201 | 202 | let emit a b c d j = 203 | let x = (a lsl 18) lor (b lsl 12) lor (c lsl 6) lor d in 204 | set_be_uint16 res j (x lsr 8) ; 205 | set_uint8 res (j + 2) (x land 0xff) in 206 | 207 | let dmap i = 208 | let x = Array.unsafe_get dmap i in 209 | if x = none then raise Not_found ; 210 | x in 211 | 212 | let only_padding pad idx = 213 | (* because we round length of [res] to the upper bound of how many 214 | characters we should have from [input], we got at this stage only padding 215 | characters and we need to delete them, so for each [====], we delete 3 216 | bytes. *) 217 | let pad = ref (pad + 3) in 218 | let idx = ref idx in 219 | 220 | while !idx + 4 < len do 221 | (* use [unsafe_get_uint16] instead [unsafe_get_uint32] to avoid allocation 222 | of [int32]. Of course, [3d3d3d3d] is [====]. *) 223 | if unsafe_get_uint16 input (off + !idx) <> 0x3d3d 224 | || unsafe_get_uint16 input (off + !idx + 2) <> 0x3d3d 225 | then raise Not_found ; 226 | 227 | (* We got something bad, should be a valid character according to 228 | [alphabet] but outside the scope. *) 229 | idx := !idx + 4 ; 230 | pad := !pad + 3 231 | done ; 232 | while !idx < len do 233 | if unsafe_get_uint8 input (off + !idx) <> padding then raise Not_found ; 234 | 235 | incr idx 236 | done ; 237 | !pad in 238 | 239 | let rec dec j i = 240 | if i = n 241 | then 0 242 | else 243 | let d, pad = 244 | let x = get_uint8_or_padding input (i + 3) in 245 | try (dmap x, 0) with Not_found when x = padding -> (0, 1) in 246 | (* [Not_found] iff [x ∉ alphabet and x <> '='] can leak. *) 247 | let c, pad = 248 | let x = get_uint8_or_padding input (i + 2) in 249 | try (dmap x, pad) 250 | with Not_found when x = padding && pad = 1 -> (0, 2) in 251 | (* [Not_found] iff [x ∉ alphabet and x <> '='] can leak. *) 252 | let b, pad = 253 | let x = get_uint8_or_padding input (i + 1) in 254 | try (dmap x, pad) 255 | with Not_found when x = padding && pad = 2 -> (0, 3) in 256 | (* [Not_found] iff [x ∉ alphabet and x <> '='] can leak. *) 257 | let a, pad = 258 | let x = get_uint8_or_padding input i in 259 | try (dmap x, pad) 260 | with Not_found when x = padding && pad = 3 -> (0, 4) in 261 | 262 | (* [Not_found] iff [x ∉ alphabet and x <> '='] can leak. *) 263 | emit a b c d j ; 264 | 265 | if i + 4 = n (* end of input in anyway *) 266 | then 267 | match pad with 268 | | 0 -> 0 269 | | 4 -> 270 | (* assert (invalid_pad_overflow = false) ; *) 271 | 3 272 | (* [get_uint8] lies and if we get [4], that mean we got one or more (at 273 | most 4) padding character. In this situation, because we round length 274 | of [res] (see [n // 4]), we need to delete 3 bytes. *) 275 | | pad -> pad 276 | else 277 | match pad with 278 | | 0 -> dec (j + 3) (i + 4) 279 | | 4 -> 280 | (* assert (invalid_pad_overflow = false) ; *) 281 | only_padding 3 (i + 4) 282 | (* Same situation than above but we should get only more padding 283 | characters then. *) 284 | | pad -> 285 | if invalid_pad_overflow = true then raise Too_much_input ; 286 | only_padding pad (i + 4) in 287 | 288 | match dec 0 0 with 289 | | 0 -> Ok (Bytes.unsafe_to_string res, 0, n') 290 | | pad -> Ok (Bytes.unsafe_to_string res, 0, n' - pad) 291 | | exception Out_of_bounds -> 292 | error_msgf "Wrong padding" 293 | (* appear only when [pad = true] and when length of input is not a multiple of 4. *) 294 | | exception Not_found -> 295 | (* appear when one character of [input] ∉ [alphabet] and this character <> '=' *) 296 | error_msgf "Malformed input" 297 | | exception Too_much_input -> error_msgf "Too much input" 298 | 299 | let decode ?pad ?(alphabet = default_alphabet) ?off ?len input = 300 | match decode_sub ?pad alphabet ?off ?len input with 301 | | Ok (res, off, len) -> Ok (String.sub res off len) 302 | | Error _ as err -> err 303 | 304 | let decode_sub ?pad ?(alphabet = default_alphabet) ?off ?len input = 305 | decode_sub ?pad alphabet ?off ?len input 306 | 307 | let decode_exn ?pad ?alphabet ?off ?len input = 308 | match decode ?pad ?alphabet ?off ?len input with 309 | | Ok res -> res 310 | | Error (`Msg err) -> invalid_arg err 311 | -------------------------------------------------------------------------------- /src/base64_rfc2045.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Copyright (c) 2018 Romain Calascibetta 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | * 16 | *) 17 | 18 | let default_alphabet = 19 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" 20 | 21 | let io_buffer_size = 65536 22 | 23 | let invalid_arg fmt = Format.ksprintf (fun s -> invalid_arg s) fmt 24 | 25 | let invalid_bounds off len = 26 | invalid_arg "Invalid bounds (off: %d, len: %d)" off len 27 | 28 | let malformed chr = `Malformed (String.make 1 chr) 29 | 30 | let unsafe_byte source off pos = Bytes.unsafe_get source (off + pos) 31 | 32 | let unsafe_blit = Bytes.unsafe_blit 33 | 34 | let unsafe_chr = Char.unsafe_chr 35 | 36 | let unsafe_set_chr source off chr = Bytes.unsafe_set source off chr 37 | 38 | type state = { quantum : int; size : int; buffer : Bytes.t } 39 | 40 | let continue state (quantum, size) = `Continue { state with quantum; size } 41 | 42 | let flush state = `Flush { state with quantum = 0; size = 0 } 43 | 44 | let table = 45 | "\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\255\062\255\255\255\063\052\053\054\055\056\057\058\059\060\061\255\255\255\255\255\255\255\000\001\002\003\004\005\006\007\008\009\010\011\012\013\014\015\016\017\018\019\020\021\022\023\024\025\255\255\255\255\255\255\026\027\028\029\030\031\032\033\034\035\036\037\038\039\040\041\042\043\044\045\046\047\048\049\050\051\255\255\255\255\255" 46 | 47 | let r_repr ({ quantum; size; _ } as state) chr = 48 | (* assert (0 <= off && 0 <= len && off + len <= String.length source); *) 49 | (* assert (len >= 1); *) 50 | let code = Char.code table.[Char.code chr] in 51 | match size with 52 | | 0 -> continue state (code, 1) 53 | | 1 -> continue state ((quantum lsl 6) lor code, 2) 54 | | 2 -> continue state ((quantum lsl 6) lor code, 3) 55 | | 3 -> 56 | unsafe_set_chr state.buffer 0 (unsafe_chr ((quantum lsr 10) land 255)) ; 57 | unsafe_set_chr state.buffer 1 (unsafe_chr ((quantum lsr 2) land 255)) ; 58 | unsafe_set_chr state.buffer 2 59 | (unsafe_chr ((quantum lsl 6) lor code land 255)) ; 60 | flush state 61 | | _ -> malformed chr 62 | 63 | type src = [ `Channel of in_channel | `String of string | `Manual ] 64 | 65 | type decode = 66 | [ `Await | `End | `Wrong_padding | `Malformed of string | `Flush of string ] 67 | 68 | type input = 69 | [ `Line_break | `Wsp | `Padding | `Malformed of string | `Flush of state ] 70 | 71 | type decoder = { 72 | src : src; 73 | mutable i : Bytes.t; 74 | mutable i_off : int; 75 | mutable i_pos : int; 76 | mutable i_len : int; 77 | mutable s : state; 78 | mutable padding : int; 79 | mutable unsafe : bool; 80 | mutable byte_count : int; 81 | mutable limit_count : int; 82 | mutable pp : decoder -> input -> decode; 83 | mutable k : decoder -> decode; 84 | } 85 | 86 | let i_rem decoder = decoder.i_len - decoder.i_pos + 1 87 | 88 | let end_of_input decoder = 89 | decoder.i <- Bytes.empty ; 90 | decoder.i_off <- 0 ; 91 | decoder.i_pos <- 0 ; 92 | decoder.i_len <- min_int 93 | 94 | let src decoder source off len = 95 | if off < 0 || len < 0 || off + len > Bytes.length source 96 | then invalid_bounds off len 97 | else if len = 0 98 | then end_of_input decoder 99 | else ( 100 | decoder.i <- source ; 101 | decoder.i_off <- off ; 102 | decoder.i_pos <- 0 ; 103 | decoder.i_len <- len - 1) 104 | 105 | let refill k decoder = 106 | match decoder.src with 107 | | `Manual -> 108 | decoder.k <- k ; 109 | `Await 110 | | `String _ -> 111 | end_of_input decoder ; 112 | k decoder 113 | | `Channel ic -> 114 | let len = input ic decoder.i 0 (Bytes.length decoder.i) in 115 | src decoder decoder.i 0 len ; 116 | k decoder 117 | 118 | let dangerous decoder v = decoder.unsafe <- v 119 | 120 | let reset decoder = decoder.limit_count <- 0 121 | 122 | let ret k v byte_count decoder = 123 | decoder.k <- k ; 124 | decoder.byte_count <- decoder.byte_count + byte_count ; 125 | decoder.limit_count <- decoder.limit_count + byte_count ; 126 | if decoder.limit_count > 78 then dangerous decoder true ; 127 | decoder.pp decoder v 128 | 129 | type flush_and_malformed = [ `Flush of state | `Malformed of string ] 130 | 131 | let padding { size; _ } padding = 132 | match (size, padding) with 133 | | 0, 0 -> true 134 | | 1, _ -> false 135 | | 2, 2 -> true 136 | | 3, 1 -> true 137 | | _ -> false 138 | 139 | let t_flush { quantum; size; buffer } = 140 | match size with 141 | | 0 | 1 -> `Flush { quantum; size; buffer = Bytes.empty } 142 | | 2 -> 143 | let quantum = quantum lsr 4 in 144 | `Flush 145 | { quantum; size; buffer = Bytes.make 1 (unsafe_chr (quantum land 255)) } 146 | | 3 -> 147 | let quantum = quantum lsr 2 in 148 | unsafe_set_chr buffer 0 (unsafe_chr ((quantum lsr 8) land 255)) ; 149 | unsafe_set_chr buffer 1 (unsafe_chr (quantum land 255)) ; 150 | `Flush { quantum; size; buffer = Bytes.sub buffer 0 2 } 151 | | _ -> assert false 152 | 153 | (* this branch is impossible, size can only ever be in the range [0..3]. *) 154 | 155 | let wrong_padding decoder = 156 | let k _ = `End in 157 | decoder.k <- k ; 158 | `Wrong_padding 159 | 160 | let rec t_decode_base64 chr decoder = 161 | if decoder.padding = 0 162 | then 163 | let rec go pos = function 164 | | `Continue state -> 165 | if decoder.i_len - (decoder.i_pos + pos) + 1 > 0 166 | then ( 167 | match unsafe_byte decoder.i decoder.i_off (decoder.i_pos + pos) with 168 | | ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '+' | '/') as chr -> 169 | go (succ pos) (r_repr state chr) 170 | | '=' -> 171 | decoder.padding <- decoder.padding + 1 ; 172 | decoder.i_pos <- decoder.i_pos + pos + 1 ; 173 | decoder.s <- state ; 174 | ret decode_base64 `Padding (pos + 1) decoder 175 | | ' ' | '\t' -> 176 | decoder.i_pos <- decoder.i_pos + pos + 1 ; 177 | decoder.s <- state ; 178 | ret decode_base64 `Wsp (pos + 1) decoder 179 | | '\r' -> 180 | decoder.i_pos <- decoder.i_pos + pos + 1 ; 181 | decoder.s <- state ; 182 | decode_base64_lf_after_cr decoder 183 | | chr -> 184 | decoder.i_pos <- decoder.i_pos + pos + 1 ; 185 | decoder.s <- state ; 186 | ret decode_base64 (malformed chr) (pos + 1) decoder) 187 | else ( 188 | decoder.i_pos <- decoder.i_pos + pos ; 189 | decoder.byte_count <- decoder.byte_count + pos ; 190 | decoder.limit_count <- decoder.limit_count + pos ; 191 | decoder.s <- state ; 192 | refill decode_base64 decoder) 193 | | #flush_and_malformed as v -> 194 | decoder.i_pos <- decoder.i_pos + pos ; 195 | ret decode_base64 v pos decoder in 196 | go 1 (r_repr decoder.s chr) 197 | else ( 198 | decoder.i_pos <- decoder.i_pos + 1 ; 199 | ret decode_base64 (malformed chr) 1 decoder) 200 | 201 | and decode_base64_lf_after_cr decoder = 202 | let rem = i_rem decoder in 203 | if rem < 0 204 | then ret decode_base64 (malformed '\r') 1 decoder 205 | else if rem = 0 206 | then refill decode_base64_lf_after_cr decoder 207 | else 208 | match unsafe_byte decoder.i decoder.i_off decoder.i_pos with 209 | | '\n' -> 210 | decoder.i_pos <- decoder.i_pos + 1 ; 211 | ret decode_base64 `Line_break 2 decoder 212 | | _ -> ret decode_base64 (malformed '\r') 1 decoder 213 | 214 | and decode_base64 decoder = 215 | let rem = i_rem decoder in 216 | if rem <= 0 217 | then 218 | if rem < 0 219 | then 220 | ret 221 | (fun decoder -> 222 | if padding decoder.s decoder.padding 223 | then `End 224 | else wrong_padding decoder) 225 | (t_flush decoder.s) 0 decoder 226 | else refill decode_base64 decoder 227 | else 228 | match unsafe_byte decoder.i decoder.i_off decoder.i_pos with 229 | | ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '+' | '/') as chr -> 230 | t_decode_base64 chr decoder 231 | | '=' -> 232 | decoder.padding <- decoder.padding + 1 ; 233 | decoder.i_pos <- decoder.i_pos + 1 ; 234 | ret decode_base64 `Padding 1 decoder 235 | | ' ' | '\t' -> 236 | decoder.i_pos <- decoder.i_pos + 1 ; 237 | ret decode_base64 `Wsp 1 decoder 238 | | '\r' -> 239 | decoder.i_pos <- decoder.i_pos + 1 ; 240 | decode_base64_lf_after_cr decoder 241 | | chr -> 242 | decoder.i_pos <- decoder.i_pos + 1 ; 243 | ret decode_base64 (malformed chr) 1 decoder 244 | 245 | let pp_base64 decoder = function 246 | | `Line_break -> 247 | reset decoder ; 248 | decoder.k decoder 249 | | `Wsp | `Padding -> decoder.k decoder 250 | | `Flush state -> 251 | decoder.s <- state ; 252 | `Flush (Bytes.to_string state.buffer) 253 | | `Malformed _ as v -> v 254 | 255 | let decoder src = 256 | let pp = pp_base64 in 257 | let k = decode_base64 in 258 | let i, i_off, i_pos, i_len = 259 | match src with 260 | | `Manual -> (Bytes.empty, 0, 1, 0) 261 | | `Channel _ -> (Bytes.create io_buffer_size, 0, 1, 0) 262 | | `String s -> (Bytes.unsafe_of_string s, 0, 0, String.length s - 1) in 263 | { 264 | src; 265 | i_off; 266 | i_pos; 267 | i_len; 268 | i; 269 | s = { quantum = 0; size = 0; buffer = Bytes.create 3 }; 270 | padding = 0; 271 | unsafe = false; 272 | byte_count = 0; 273 | limit_count = 0; 274 | pp; 275 | k; 276 | } 277 | 278 | let decode decoder = decoder.k decoder 279 | 280 | let decoder_byte_count decoder = decoder.byte_count 281 | 282 | let decoder_src decoder = decoder.src 283 | 284 | let decoder_dangerous decoder = decoder.unsafe 285 | 286 | (* / *) 287 | 288 | let invalid_encode () = invalid_arg "Expected `Await encode" 289 | 290 | type dst = [ `Channel of out_channel | `Buffer of Buffer.t | `Manual ] 291 | 292 | type encode = [ `Await | `End | `Char of char ] 293 | 294 | type encoder = { 295 | dst : dst; 296 | mutable o : Bytes.t; 297 | mutable o_off : int; 298 | mutable o_pos : int; 299 | mutable o_len : int; 300 | mutable c_col : int; 301 | i : Bytes.t; 302 | mutable s : int; 303 | t : Bytes.t; 304 | mutable t_pos : int; 305 | mutable t_len : int; 306 | mutable k : encoder -> encode -> [ `Ok | `Partial ]; 307 | } 308 | 309 | let o_rem encoder = encoder.o_len - encoder.o_pos + 1 310 | 311 | let dst encoder source off len = 312 | if off < 0 || len < 0 || off + len > Bytes.length source 313 | then invalid_bounds off len ; 314 | encoder.o <- source ; 315 | encoder.o_off <- off ; 316 | encoder.o_pos <- 0 ; 317 | encoder.o_len <- len - 1 318 | 319 | let dst_rem = o_rem 320 | 321 | let partial k encoder = function 322 | | `Await -> k encoder 323 | | `Char _ | `End -> invalid_encode () 324 | 325 | let flush k encoder = 326 | match encoder.dst with 327 | | `Manual -> 328 | encoder.k <- partial k ; 329 | `Partial 330 | | `Channel oc -> 331 | output oc encoder.o encoder.o_off encoder.o_pos ; 332 | encoder.o_pos <- 0 ; 333 | k encoder 334 | | `Buffer b -> 335 | let o = Bytes.unsafe_to_string encoder.o in 336 | Buffer.add_substring b o encoder.o_off encoder.o_pos ; 337 | encoder.o_pos <- 0 ; 338 | k encoder 339 | 340 | let t_range encoder len = 341 | encoder.t_pos <- 0 ; 342 | encoder.t_len <- len 343 | 344 | let rec t_flush k encoder = 345 | let blit encoder len = 346 | unsafe_blit encoder.t encoder.t_pos encoder.o encoder.o_pos len ; 347 | encoder.o_pos <- encoder.o_pos + len ; 348 | encoder.t_pos <- encoder.t_pos + len in 349 | let rem = o_rem encoder in 350 | let len = encoder.t_len - encoder.t_pos + 1 in 351 | if rem < len 352 | then ( 353 | blit encoder rem ; 354 | flush (t_flush k) encoder) 355 | else ( 356 | blit encoder len ; 357 | k encoder) 358 | 359 | let rec encode_line_break k encoder = 360 | let rem = o_rem encoder in 361 | let s, j, k = 362 | if rem < 2 363 | then ( 364 | t_range encoder 2 ; 365 | (encoder.t, 0, t_flush k)) 366 | else 367 | let j = encoder.o_pos in 368 | encoder.o_pos <- encoder.o_pos + 2 ; 369 | (encoder.o, encoder.o_off + j, k) in 370 | unsafe_set_chr s j '\r' ; 371 | unsafe_set_chr s (j + 1) '\n' ; 372 | encoder.c_col <- 0 ; 373 | k encoder 374 | 375 | and encode_char chr k (encoder : encoder) = 376 | if encoder.s >= 2 377 | then ( 378 | let a, b, c = (unsafe_byte encoder.i 0 0, unsafe_byte encoder.i 0 1, chr) in 379 | encoder.s <- 0 ; 380 | let quantum = (Char.code a lsl 16) + (Char.code b lsl 8) + Char.code c in 381 | let a = quantum lsr 18 in 382 | let b = (quantum lsr 12) land 63 in 383 | let c = (quantum lsr 6) land 63 in 384 | let d = quantum land 63 in 385 | let rem = o_rem encoder in 386 | let s, j, k = 387 | if rem < 4 388 | then ( 389 | t_range encoder 4 ; 390 | (encoder.t, 0, t_flush (k 4))) 391 | else 392 | let j = encoder.o_pos in 393 | encoder.o_pos <- encoder.o_pos + 4 ; 394 | (encoder.o, encoder.o_off + j, k 4) in 395 | unsafe_set_chr s j default_alphabet.[a] ; 396 | unsafe_set_chr s (j + 1) default_alphabet.[b] ; 397 | unsafe_set_chr s (j + 2) default_alphabet.[c] ; 398 | unsafe_set_chr s (j + 3) default_alphabet.[d] ; 399 | flush k encoder) 400 | else ( 401 | unsafe_set_chr encoder.i encoder.s chr ; 402 | encoder.s <- encoder.s + 1 ; 403 | k 0 encoder) 404 | 405 | and encode_trailing k encoder = 406 | match encoder.s with 407 | | 2 -> 408 | let b, c = (unsafe_byte encoder.i 0 0, unsafe_byte encoder.i 0 1) in 409 | encoder.s <- 0 ; 410 | let quantum = (Char.code b lsl 10) + (Char.code c lsl 2) in 411 | let b = (quantum lsr 12) land 63 in 412 | let c = (quantum lsr 6) land 63 in 413 | let d = quantum land 63 in 414 | let rem = o_rem encoder in 415 | let s, j, k = 416 | if rem < 4 417 | then ( 418 | t_range encoder 4 ; 419 | (encoder.t, 0, t_flush (k 4))) 420 | else 421 | let j = encoder.o_pos in 422 | encoder.o_pos <- encoder.o_pos + 4 ; 423 | (encoder.o, encoder.o_off + j, k 4) in 424 | unsafe_set_chr s j default_alphabet.[b] ; 425 | unsafe_set_chr s (j + 1) default_alphabet.[c] ; 426 | unsafe_set_chr s (j + 2) default_alphabet.[d] ; 427 | unsafe_set_chr s (j + 3) '=' ; 428 | flush k encoder 429 | | 1 -> 430 | let c = unsafe_byte encoder.i 0 0 in 431 | encoder.s <- 0 ; 432 | let quantum = Char.code c lsl 4 in 433 | let c = (quantum lsr 6) land 63 in 434 | let d = quantum land 63 in 435 | let rem = o_rem encoder in 436 | let s, j, k = 437 | if rem < 4 438 | then ( 439 | t_range encoder 4 ; 440 | (encoder.t, 0, t_flush (k 4))) 441 | else 442 | let j = encoder.o_pos in 443 | encoder.o_pos <- encoder.o_pos + 4 ; 444 | (encoder.o, encoder.o_off + j, k 4) in 445 | unsafe_set_chr s j default_alphabet.[c] ; 446 | unsafe_set_chr s (j + 1) default_alphabet.[d] ; 447 | unsafe_set_chr s (j + 2) '=' ; 448 | unsafe_set_chr s (j + 3) '=' ; 449 | flush k encoder 450 | | 0 -> k 0 encoder 451 | | _ -> assert false 452 | 453 | and encode_base64 encoder v = 454 | let k col_count encoder = 455 | encoder.c_col <- encoder.c_col + col_count ; 456 | encoder.k <- encode_base64 ; 457 | `Ok in 458 | match v with 459 | | `Await -> k 0 encoder 460 | | `End -> 461 | if encoder.c_col = 76 462 | then encode_line_break (fun encoder -> encode_base64 encoder v) encoder 463 | else encode_trailing k encoder 464 | | `Char chr -> 465 | let rem = o_rem encoder in 466 | if rem < 1 467 | then flush (fun encoder -> encode_base64 encoder v) encoder 468 | else if encoder.c_col = 76 469 | then encode_line_break (fun encoder -> encode_base64 encoder v) encoder 470 | else encode_char chr k encoder 471 | 472 | let encoder dst = 473 | let o, o_off, o_pos, o_len = 474 | match dst with 475 | | `Manual -> (Bytes.empty, 1, 0, 0) 476 | | `Buffer _ | `Channel _ -> 477 | (Bytes.create io_buffer_size, 0, 0, io_buffer_size - 1) in 478 | { 479 | dst; 480 | o_off; 481 | o_pos; 482 | o_len; 483 | o; 484 | t = Bytes.create 4; 485 | t_pos = 1; 486 | t_len = 0; 487 | c_col = 0; 488 | i = Bytes.create 3; 489 | s = 0; 490 | k = encode_base64; 491 | } 492 | 493 | let encode encoder = encoder.k encoder 494 | 495 | let encoder_dst encoder = encoder.dst 496 | --------------------------------------------------------------------------------