├── .gitignore ├── .ocamlformat ├── CHANGES.md ├── CONTRIBUTING.md ├── LICENSE.md ├── Makefile ├── README.md ├── dune-project ├── email_address └── src │ ├── dune │ ├── email_address.ml │ ├── email_address.mli │ ├── email_address_parser_stable_v1.ml │ └── email_address_parser_stable_v1.mli ├── email_message.opam ├── kernel └── src │ ├── bigstring_shared.ml │ ├── bigstring_shared.mli │ ├── boundary.ml │ ├── boundary.mli │ ├── dune │ ├── email.ml │ ├── email.mli │ ├── email_content.ml │ ├── email_content.mli │ ├── email_grammar.mly │ ├── email_grammar_types.ml │ ├── email_grammar_types.mli │ ├── email_intf.ml │ ├── email_lexer.mli │ ├── email_lexer.mll │ ├── email_lexer_state.ml │ ├── email_lexer_state.mli │ ├── email_message_kernel.ml │ ├── email_raw_content.ml │ ├── email_raw_content.mli │ ├── email_simple.ml │ ├── email_simple.mli │ ├── email_simple_intf.ml │ ├── encoded_word.ml │ ├── encoded_word.mli │ ├── headers.ml │ ├── headers.mli │ ├── lf_or_crlf.ml │ ├── lf_or_crlf.mli │ ├── magic_mime_external.ml │ ├── media_type.ml │ ├── media_type.mli │ ├── media_type_grammar.mly │ ├── media_type_grammar_types.ml │ ├── media_type_grammar_types.mli │ ├── media_type_lexer.mli │ ├── media_type_lexer.mll │ ├── mimestring.ml │ ├── mimestring.mli │ ├── octet_stream.ml │ ├── octet_stream.mli │ ├── quoted_printable_lexer.mli │ ├── quoted_printable_lexer.mll │ ├── rfc.ml │ ├── rfc.mli │ ├── string_monoid.ml │ ├── string_monoid.mli │ ├── string_monoid_intf.ml │ ├── string_monoidable.ml │ └── string_monoidable.mli ├── src ├── crypto.ml ├── dune ├── email.ml ├── email.mli ├── email_date.ml ├── email_date.mli ├── email_message.ml ├── email_simple.ml ├── email_simple.mli ├── import.ml ├── selector.ml ├── selector.mli ├── string_monoid.ml ├── string_monoid.mli ├── wrapper.ml └── wrapper.mli └── test ├── dune ├── email_message_test.ml ├── test_boundary.ml ├── test_boundary.mli ├── test_email.ml ├── test_email.mli ├── test_email_content.ml ├── test_email_content.mli ├── test_email_date.ml ├── test_email_date.mli ├── test_email_save.ml ├── test_email_save.mli ├── test_email_simple.ml ├── test_email_simple.mli ├── test_encoded_word.ml ├── test_encoded_word.mli ├── test_headers.ml ├── test_headers.mli ├── test_media_type.ml ├── test_media_type.mli ├── test_octet_stream.ml ├── test_octet_stream.mli ├── test_related_parts.ml ├── test_related_parts.mli ├── test_rfc.ml └── test_rfc.mli /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | *.install 3 | *.merlin 4 | _opam 5 | 6 | -------------------------------------------------------------------------------- /.ocamlformat: -------------------------------------------------------------------------------- 1 | profile=janestreet 2 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | ## Release v0.17.0 2 | 3 | - `Email_address` module now has a `to_string_utf8` function to generate a UTF-8 encoded 4 | representation of an email address 5 | 6 | - Split [Email_message] into [Email_message] and [Email_message_kernel]. The 7 | [Email_message_kernel] library is compatible with JS of OCaml. 8 | 9 | - Tweak the normalization of encoded header values. We previously decoded before 10 | normalizing, now we normalize before decoding. 11 | 12 | - Improve parsing of email addresses to catch more invalid addresses as errors 13 | 14 | ## Release v0.16.0 15 | 16 | - `Email_simple.all_attachments` and `Email_simple.map_attachments`: Added optional 17 | `include_inline_parts` parameter to control whether to attempt to interpret inline 18 | parts as attachments 19 | 20 | - `Email_date.rfc822_date` now accepts an optional `zone` parameter which determines what 21 | timezone to use in the output string 22 | 23 | ## Old pre-v0.15 changelogs (very likely stale and incomplete) 24 | 25 | ## 113.24.00 26 | 27 | - Bugfixes and minor API improvements. 28 | 29 | ## 113.00.00 30 | 31 | - Extended and improved Email_message API. 32 | 33 | ## 112.17.00 34 | 35 | Moved from janestreet-alpha 36 | 37 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | This repository contains open source software that is developed and 2 | maintained by [Jane Street][js]. 3 | 4 | Contributions to this project are welcome and should be submitted via 5 | GitHub pull requests. 6 | 7 | Signing contributions 8 | --------------------- 9 | 10 | We require that you sign your contributions. Your signature certifies 11 | that you wrote the patch or otherwise have the right to pass it on as 12 | an open-source patch. The rules are pretty simple: if you can certify 13 | the below (from [developercertificate.org][dco]): 14 | 15 | ``` 16 | Developer Certificate of Origin 17 | Version 1.1 18 | 19 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 20 | 1 Letterman Drive 21 | Suite D4700 22 | San Francisco, CA, 94129 23 | 24 | Everyone is permitted to copy and distribute verbatim copies of this 25 | license document, but changing it is not allowed. 26 | 27 | 28 | Developer's Certificate of Origin 1.1 29 | 30 | By making a contribution to this project, I certify that: 31 | 32 | (a) The contribution was created in whole or in part by me and I 33 | have the right to submit it under the open source license 34 | indicated in the file; or 35 | 36 | (b) The contribution is based upon previous work that, to the best 37 | of my knowledge, is covered under an appropriate open source 38 | license and I have the right under that license to submit that 39 | work with modifications, whether created in whole or in part 40 | by me, under the same open source license (unless I am 41 | permitted to submit under a different license), as indicated 42 | in the file; or 43 | 44 | (c) The contribution was provided directly to me by some other 45 | person who certified (a), (b) or (c) and I have not modified 46 | it. 47 | 48 | (d) I understand and agree that this project and the contribution 49 | are public and that a record of the contribution (including all 50 | personal information I submit with it, including my sign-off) is 51 | maintained indefinitely and may be redistributed consistent with 52 | this project or the open source license(s) involved. 53 | ``` 54 | 55 | Then you just add a line to every git commit message: 56 | 57 | ``` 58 | Signed-off-by: Joe Smith 59 | ``` 60 | 61 | Use your real name (sorry, no pseudonyms or anonymous contributions.) 62 | 63 | If you set your `user.name` and `user.email` git configs, you can sign 64 | your commit automatically with git commit -s. 65 | 66 | [dco]: http://developercertificate.org/ 67 | [js]: https://opensource.janestreet.com/ 68 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2005--2025 Jane Street Group, LLC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | INSTALL_ARGS := $(if $(PREFIX),--prefix $(PREFIX),) 2 | 3 | default: 4 | dune build 5 | 6 | install: 7 | dune install $(INSTALL_ARGS) 8 | 9 | uninstall: 10 | dune uninstall $(INSTALL_ARGS) 11 | 12 | reinstall: uninstall install 13 | 14 | clean: 15 | dune clean 16 | 17 | .PHONY: default install uninstall reinstall clean 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | "Email Message" 2 | =============== 3 | 4 | `email_message` is a library that defines types that represent an RFC2822 email. 5 | 6 | # Parsing an email 7 | You can use `Email.of_string` to parse an email (e.g. from a ".eml" file). 8 | 9 | Once you have your hands on an `Email.t`, you can use various functions in the 10 | `Email.Simple` and `Email` modules to inspect parts of the email. For example: 11 | 12 | - `Email.Simple.subject` : the subject of the email 13 | - `Email.Simple.from` : the From header sender of the email 14 | - `Email.headers` : all the email headers 15 | - `Email.Simple.all_attachments` the email attachments 16 | 17 | # Constructing an email 18 | The `Email.Simple` module exposes various functions for constructing an email. For example: 19 | 20 | ```ocaml 21 | Email.Simple.create 22 | ~from:(Email_address.of_string "sender@example.com") 23 | ~to_:[ (Email_address.of_string "recipient@example.com") ] 24 | ~subject:"Example email" 25 | (Email.Simple.Content.text_utf8 "This is an example email.") 26 | ``` 27 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 3.17) 2 | -------------------------------------------------------------------------------- /email_address/src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name email_address) 3 | (public_name email_message.email_address) 4 | (libraries angstrom core base64) 5 | (preprocess 6 | (pps ppx_jane))) 7 | -------------------------------------------------------------------------------- /email_address/src/email_address.ml: -------------------------------------------------------------------------------- 1 | open! Core.Core_stable 2 | open! Core.Int.Replace_polymorphic_compare 3 | 4 | module Stable_caseless_string = struct 5 | module U = Core.String.Caseless 6 | 7 | module V1 = struct 8 | module T = struct 9 | type t = String.V1.t [@@deriving sexp, bin_io, stable_witness, equal] 10 | 11 | let compare = U.compare 12 | let hash_fold_t = U.hash_fold_t 13 | 14 | type comparator_witness = U.comparator_witness 15 | 16 | let comparator = U.comparator 17 | end 18 | 19 | include T 20 | include Comparable.V1.With_stable_witness.Make (T) 21 | end 22 | end 23 | 24 | module Stable = struct 25 | module Domain = struct 26 | module V1 = Stable_caseless_string.V1 27 | end 28 | 29 | module V1 = struct 30 | module T = struct 31 | type t = Email_address_parser_stable_v1.t = 32 | { (* [prefix = None] means no brackets. *) 33 | prefix : String.V1.t option [@compare.ignore] [@hash.ignore] 34 | ; local_part : String.V1.t 35 | ; domain : Domain.V1.t option 36 | } 37 | [@@deriving fields ~getters, compare, hash, stable_witness] 38 | 39 | let equal = [%compare.equal: t] 40 | let create ?prefix ?domain local_part = { prefix; local_part; domain } 41 | 42 | let with_default_domain email ~default_domain = 43 | { email with domain = Core.Option.first_some email.domain default_domain } 44 | ;; 45 | 46 | let of_string ?default_domain input_str = 47 | let open Core in 48 | let open! Int.Replace_polymorphic_compare in 49 | match 50 | Angstrom.parse_string 51 | ~consume:Prefix 52 | Email_address_parser_stable_v1.email_only 53 | input_str 54 | with 55 | | Error error -> 56 | Or_error.error_s 57 | [%message 58 | "Failed to parse email address" (error : string) (input_str : string)] 59 | | Ok email -> Or_error.return (with_default_domain email ~default_domain) 60 | ;; 61 | 62 | let of_string_exn ?default_domain input_str = 63 | of_string ?default_domain input_str |> Core.Or_error.ok_exn 64 | ;; 65 | 66 | let compose ~prefix ~address_part = 67 | match prefix with 68 | | None -> address_part 69 | | Some prefix -> Core.sprintf "%s<%s>" prefix address_part 70 | ;; 71 | 72 | let compose_utf8 ~prefix ~address_part = 73 | match prefix with 74 | | None -> address_part 75 | | Some prefix -> 76 | let encoded_prefix = Base64.encode_string prefix in 77 | let prefix_utf8 = Core.sprintf "=?%s?B?%s?=" "UTF-8" encoded_prefix in 78 | Core.sprintf "%s<%s>" prefix_utf8 address_part 79 | ;; 80 | 81 | let to_string t = 82 | let address_part = 83 | match t.domain with 84 | | None -> t.local_part 85 | | Some domain -> Core.sprintf "%s@%s" t.local_part domain 86 | in 87 | compose ~prefix:t.prefix ~address_part 88 | ;; 89 | 90 | let to_string_utf8 t = 91 | let address_part = 92 | match t.domain with 93 | | None -> t.local_part 94 | | Some domain -> Core.sprintf "%s@%s" t.local_part domain 95 | in 96 | compose_utf8 ~prefix:t.prefix ~address_part 97 | ;; 98 | 99 | include Sexpable.Of_stringable.V1 (struct 100 | type nonrec t = t 101 | 102 | let to_string = to_string 103 | let of_string s = of_string_exn s 104 | end) 105 | 106 | let t_sexp_grammar = Sexplib0.Sexp_grammar.coerce [%sexp_grammar: string] 107 | 108 | include Binable.Of_stringable.V1 [@alert "-legacy"] (struct 109 | type nonrec t = t 110 | 111 | let to_string = to_string 112 | let of_string s = of_string_exn s 113 | end) 114 | end 115 | 116 | module With_comparator = struct 117 | include T 118 | include Comparator.V1.Make (T) 119 | end 120 | 121 | include With_comparator 122 | include Comparable.V1.With_stable_witness.Make (With_comparator) 123 | end 124 | end 125 | 126 | open Core 127 | open! Int.Replace_polymorphic_compare 128 | 129 | module Domain = struct 130 | include String.Caseless 131 | 132 | let to_string = Fn.id 133 | let of_string = Fn.id 134 | end 135 | 136 | module T = Stable.V1.With_comparator 137 | include T 138 | 139 | let list_of_string ?default_domain input_str = 140 | match 141 | Angstrom.parse_string 142 | ~consume:Prefix 143 | Email_address_parser_stable_v1.email_list_only 144 | input_str 145 | with 146 | | Error error -> 147 | Or_error.error_s 148 | [%message "Failed to parse email address(es)" (error : string) (input_str : string)] 149 | | Ok emails -> 150 | Or_error.return (List.map ~f:(with_default_domain ~default_domain) emails) 151 | ;; 152 | 153 | let list_of_string_exn ?default_domain input_str = 154 | list_of_string ?default_domain input_str |> Core.Or_error.ok_exn 155 | ;; 156 | 157 | let list_to_header_value ts = String.concat ~sep:",\n\t" (List.map ts ~f:to_string) 158 | 159 | let address_part ?(brackets = false) ?(lowercase_domain = false) t = 160 | let prefix = if brackets then Some "" else None in 161 | let domain = 162 | if not lowercase_domain then t.domain else Option.map t.domain ~f:String.lowercase 163 | in 164 | { t with prefix; domain } 165 | ;; 166 | 167 | let address_part_string ?brackets ?lowercase_domain t = 168 | to_string (address_part ?brackets ?lowercase_domain t) 169 | ;; 170 | 171 | let set_address_part t address_part = of_string (compose ~prefix:t.prefix ~address_part) 172 | let set_local_part t local_part = { t with local_part } 173 | let set_domain t domain = { t with domain } 174 | let set_prefix t prefix = { t with prefix } 175 | let arg_type = Command.Arg_type.create of_string_exn 176 | 177 | include Comparable.Make_plain_using_comparator (T) 178 | include Hashable.Make_plain (T) 179 | 180 | module Expert = struct 181 | module Parser = Email_address_parser_stable_v1 182 | end 183 | 184 | module For_test = struct 185 | type t = Email_address_parser_stable_v1.t [@@deriving sexp_of] 186 | end 187 | 188 | module Caseless = struct 189 | module T = struct 190 | type nonrec t = t = 191 | { prefix : String.Stable.V1.t option [@compare.ignore] [@hash.ignore] 192 | ; local_part : Stable_caseless_string.V1.t 193 | ; domain : Stable_caseless_string.V1.t option 194 | } 195 | [@@deriving compare, hash, sexp] 196 | end 197 | 198 | include T 199 | include Hashable.Make_plain (T) 200 | include Comparable.Make_plain (T) 201 | end 202 | -------------------------------------------------------------------------------- /email_address/src/email_address.mli: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | module Domain : sig 4 | type t = String.Caseless.t [@@deriving sexp_of, compare] 5 | 6 | include Stringable with type t := t 7 | 8 | include 9 | Comparable.S_plain 10 | with type t := t 11 | and type comparator_witness = String.Caseless.comparator_witness 12 | 13 | include Hashable.S_plain with type t := t 14 | end 15 | 16 | type t [@@deriving sexp_of, sexp_grammar, compare, hash] 17 | 18 | val create : ?prefix:string -> ?domain:Domain.t -> string -> t 19 | val of_string : ?default_domain:string -> string -> t Or_error.t 20 | val of_string_exn : ?default_domain:string -> string -> t 21 | val list_of_string : ?default_domain:string -> string -> t list Or_error.t 22 | val list_of_string_exn : ?default_domain:string -> string -> t list 23 | val to_string : t -> string 24 | val to_string_utf8 : t -> string 25 | val list_to_header_value : t list -> string 26 | val local_part : t -> string 27 | val set_local_part : t -> string -> t 28 | val domain : t -> Domain.t option 29 | val set_domain : t -> Domain.t option -> t 30 | 31 | val address_part 32 | : ?brackets:bool (** default: [false] *) 33 | -> ?lowercase_domain:bool 34 | -> t 35 | -> t 36 | 37 | val address_part_string 38 | : ?brackets:bool (** default: [false] *) 39 | -> ?lowercase_domain:bool 40 | -> t 41 | -> string 42 | 43 | (** [set_address_part] expects an email address without prefix or angle brackets e.g. 44 | USER@DOMAIN. *) 45 | val set_address_part : t -> string -> t Or_error.t 46 | 47 | val prefix : t -> string option 48 | 49 | (** [set_prefix] will remove angle brackets if given [None], otherwise angle brackets are 50 | added before the given prefix. *) 51 | val set_prefix : t -> string option -> t 52 | 53 | val arg_type : t Command.Arg_type.t 54 | 55 | (* Hash and comparisons are based on the address part (local_part + domain) 56 | only. *) 57 | 58 | include Comparable.S_plain with type t := t 59 | include Hashable.S_plain with type t := t 60 | 61 | module Caseless : sig 62 | type nonrec t = t [@@deriving sexp_of, compare, hash] 63 | 64 | include Comparable.S_plain with type t := t 65 | include Hashable.S_plain with type t := t 66 | end 67 | 68 | module For_test : sig 69 | type nonrec t = t [@@deriving sexp_of] 70 | end 71 | 72 | module Expert : sig 73 | module Parser : sig 74 | val email : t Angstrom.t 75 | val skip_whitespace : unit Angstrom.t 76 | val prefix : string Angstrom.t 77 | end 78 | end 79 | 80 | module Stable : sig 81 | module V1 : sig 82 | type nonrec t = t [@@deriving bin_io, hash, sexp_grammar, stable_witness, equal] 83 | 84 | include 85 | Stable_comparable.With_stable_witness.V1 86 | with type t := t 87 | with type comparator_witness = comparator_witness 88 | 89 | val of_string_exn : ?default_domain:string -> string -> t 90 | val to_string : t -> string 91 | end 92 | 93 | module Domain : sig 94 | module V1 : 95 | Stable_comparable.With_stable_witness.V1 96 | with type t := Domain.t 97 | with type comparator_witness = Domain.comparator_witness 98 | end 99 | end 100 | -------------------------------------------------------------------------------- /email_address/src/email_address_parser_stable_v1.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | open! Core.Int.Replace_polymorphic_compare 3 | open Angstrom 4 | open Angstrom.Let_syntax 5 | 6 | type t = 7 | { prefix : string option 8 | ; local_part : string 9 | ; domain : string option 10 | } 11 | [@@deriving sexp_of] 12 | 13 | let is_whitespace_char = function 14 | | ' ' | '\r' | '\n' | '\t' -> true 15 | | _ -> false 16 | ;; 17 | 18 | let is_unquoted_prefix_char = function 19 | | '<' | '>' | '@' | ',' | '"' -> false 20 | | _ -> true 21 | ;; 22 | 23 | let skip_whitespace = skip_while is_whitespace_char 24 | let skip_while1 f = satisfy f *> skip_while f 25 | 26 | let skip_satisfied_or_escaped1 f = 27 | skip_many1 (char '\\' *> advance 1 <|> skip_while1 (fun c -> Char.( <> ) '\\' c && f c)) 28 | ;; 29 | 30 | let quoted_prefix : unit Angstrom.t = 31 | let not_a_quote = Char.( <> ) '"' in 32 | char '"' *> option () (skip_satisfied_or_escaped1 not_a_quote) <* char '"' 33 | ;; 34 | 35 | let unquoted_prefix : unit Angstrom.t = 36 | satisfy (fun c -> is_unquoted_prefix_char c && not (is_whitespace_char c)) 37 | *> skip_while is_unquoted_prefix_char 38 | ;; 39 | 40 | let prefix : string Angstrom.t = 41 | (* Includes the whitespace. *) 42 | consumed ((quoted_prefix <|> unquoted_prefix "prefix") *> skip_whitespace) 43 | ;; 44 | 45 | let is_address_char = function 46 | | '<' | '>' | '@' | ',' -> false 47 | | c -> not (is_whitespace_char c) 48 | ;; 49 | 50 | let is_domain_char = function 51 | | '\'' | '"' -> false 52 | | c -> is_address_char c 53 | ;; 54 | 55 | let local_part = consumed (skip_satisfied_or_escaped1 is_address_char) "local_part" 56 | 57 | let maybe_at_domain = 58 | option 59 | None 60 | (let%map (_ : char) = char '@' 61 | and domain = take_while1 is_domain_char in 62 | Some domain) 63 | "domain" 64 | ;; 65 | 66 | let email_without_prefix_no_quote = 67 | let%map local_part 68 | and domain = maybe_at_domain in 69 | local_part, domain 70 | ;; 71 | 72 | let email_without_prefix_quote q = char q *> email_without_prefix_no_quote <* char q 73 | 74 | let email_without_prefix_maybe_quote = 75 | (* Order matters here. 76 | Because [email_without_prefix_no_quote] may contain quotes in the local part 77 | we need to match the quoted version first. 78 | Otherwise we will 'successfully' match the string and be left with an unconsumed trailing quote. 79 | *) 80 | email_without_prefix_quote '\'' 81 | <|> email_without_prefix_quote '"' 82 | <|> email_without_prefix_no_quote 83 | ;; 84 | 85 | let email_without_prefix = 86 | let%map local_part, domain = email_without_prefix_maybe_quote in 87 | { prefix = None; local_part; domain } 88 | ;; 89 | 90 | let email_with_prefix = 91 | let%map prefix = option "" prefix 92 | and (_ : char) = char '<' 93 | and local_part, domain = email_without_prefix_maybe_quote 94 | and (_ : char) = char '>' in 95 | { prefix = Some prefix; local_part; domain } 96 | ;; 97 | 98 | let email = 99 | (* Order matters here. 100 | `email_without_prefix` can (sometimes) parse the prefix of 101 | `email_with_prefix`. 102 | This means we must match `email_with_prefix` first, if this fails we 103 | fall back to `email_without_prefix`. 104 | (Otherwise [email_without_prefix] would match and then fail due to unconsumed input) 105 | *) 106 | email_with_prefix <|> email_without_prefix "email" 107 | ;; 108 | 109 | let email_list = 110 | let delim = skip_whitespace *> char ',' <* skip_whitespace in 111 | sep_by delim email "email_list" 112 | ;; 113 | 114 | let parse_only x = skip_whitespace *> x <* skip_whitespace <* end_of_input 115 | let email_only = parse_only email 116 | let email_list_only = parse_only email_list 117 | -------------------------------------------------------------------------------- /email_address/src/email_address_parser_stable_v1.mli: -------------------------------------------------------------------------------- 1 | type t = 2 | { prefix : string option 3 | ; local_part : string 4 | ; domain : string option 5 | } 6 | [@@deriving sexp_of] 7 | 8 | val skip_whitespace : unit Angstrom.t 9 | val prefix : string Angstrom.t 10 | val email : t Angstrom.t 11 | val email_only : t Angstrom.t 12 | val email_list_only : t list Angstrom.t 13 | -------------------------------------------------------------------------------- /email_message.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | maintainer: "Jane Street developers" 3 | authors: ["Jane Street Group, LLC"] 4 | homepage: "https://github.com/janestreet/email_message" 5 | bug-reports: "https://github.com/janestreet/email_message/issues" 6 | dev-repo: "git+https://github.com/janestreet/email_message.git" 7 | doc: "https://ocaml.janestreet.com/ocaml-core/latest/doc/email_message/index.html" 8 | license: "MIT" 9 | build: [ 10 | ["dune" "build" "-p" name "-j" jobs] 11 | ] 12 | depends: [ 13 | "ocaml" {>= "5.1.0"} 14 | "async" 15 | "core" 16 | "core_kernel" 17 | "core_unix" 18 | "ppx_jane" 19 | "re2" 20 | "angstrom" {>= "0.15.0"} 21 | "base64" {>= "3.4.0"} 22 | "cryptokit" {>= "1.16" & < "1.17"} 23 | "dune" {>= "3.17.0"} 24 | "magic-mime" 25 | ] 26 | available: arch != "arm32" & arch != "x86_32" 27 | synopsis: "E-mail message parser" 28 | description: " 29 | " 30 | -------------------------------------------------------------------------------- /kernel/src/bigstring_shared.ml: -------------------------------------------------------------------------------- 1 | module Stable = struct 2 | open! Core.Core_stable 3 | 4 | module V1 = struct 5 | type t = Core.Bigstring.Stable.V1.t_frozen [@@deriving sexp, bin_io, compare, hash] 6 | 7 | let equal = [%compare.equal: t] 8 | end 9 | end 10 | 11 | type t = Stable.V1.t [@@deriving sexp_of, compare, hash, equal] 12 | 13 | open Core 14 | open Poly 15 | open Bigstring 16 | 17 | let to_bigstring t = t 18 | let of_bigstring t = t 19 | let to_string_monoid t = String_monoid.of_bigstring t 20 | let of_string_monoid t = String_monoid.to_bigstring t 21 | let to_string t = to_string t 22 | let of_string s = of_string s 23 | let empty = Bigstring.create 0 24 | let length t = Bigstring.length t 25 | 26 | let sub ?pos ?len t = 27 | let pos, len = 28 | match pos, len with 29 | | None, None -> 0, length t 30 | | None, Some len -> 0, len 31 | | Some pos, None -> pos, length t - pos 32 | | Some pos, Some len -> pos, len 33 | in 34 | Bigstring.sub_shared ~pos ~len t 35 | ;; 36 | 37 | let to_lexbuf t = 38 | let offset = ref 0 in 39 | let len = length t in 40 | Lexing.from_function (fun dst n -> 41 | let read = min n (len - !offset) in 42 | Bigstring.To_bytes.blit ~src:t ~src_pos:!offset ~len:read ~dst ~dst_pos:0; 43 | offset := !offset + read; 44 | read) 45 | ;; 46 | 47 | let foldi t ~init ~f = 48 | let len = length t in 49 | let rec loop init pos = 50 | if pos >= len then init else loop (f pos init t.{pos}) (pos + 1) 51 | in 52 | loop init 0 53 | ;; 54 | 55 | (* Copied from String.split_lines. *) 56 | let iter_lines_rev t ~f = 57 | let back_up_at_newline ~t ~pos ~eol = 58 | pos := !pos - if !pos > 0 && get t (!pos - 1) = '\r' then 2 else 1; 59 | eol := !pos + 1 60 | in 61 | let n = length t in 62 | if n = 0 63 | then () 64 | else ( 65 | (* Invariant: [-1 <= pos < eol]. *) 66 | let pos = ref (n - 1) in 67 | let eol = ref n in 68 | (* We treat the end of the string specially, because if the string ends with a 69 | newline, we don't want an extra empty string at the end of the output. *) 70 | if get t !pos = '\n' then back_up_at_newline ~t ~pos ~eol; 71 | while !pos >= 0 do 72 | if get t !pos <> '\n' 73 | then decr pos 74 | else ( 75 | (* Becuase [pos < eol], we know that [start <= eol]. *) 76 | let start = !pos + 1 in 77 | f (sub t ~pos:start ~len:(!eol - start)); 78 | back_up_at_newline ~t ~pos ~eol) 79 | done; 80 | f (sub t ~pos:0 ~len:!eol)) 81 | ;; 82 | 83 | let split_lines t = 84 | let acc = ref [] in 85 | iter_lines_rev t ~f:(fun line -> acc := line :: !acc); 86 | !acc 87 | ;; 88 | 89 | let lines_seq ?include_empty_last_line t = 90 | let open Sequence.Generator in 91 | let open Sequence.Generator.Let_syntax in 92 | let rec traverse ~sol ~pos = 93 | let prev_char_is_cr = pos <> 0 && get t (pos - 1) = '\r' in 94 | if pos = length t 95 | then 96 | if (* Safe because [length t > 0] *) 97 | Option.is_some include_empty_last_line || not (get t (pos - 1) = '\n') 98 | then ( 99 | let len = pos - sol in 100 | let%bind () = yield (sub t ~pos:sol ~len) in 101 | return ()) 102 | else return () 103 | else if get t pos <> '\n' 104 | then traverse ~sol ~pos:(pos + 1) 105 | else ( 106 | let len = pos - sol - if prev_char_is_cr then 1 else 0 in 107 | let%bind () = yield (sub t ~pos:sol ~len) in 108 | let pos' = pos + 1 in 109 | traverse ~sol:pos' ~pos:pos') 110 | in 111 | if length t = 0 then Sequence.empty else Sequence.Generator.run (traverse ~sol:0 ~pos:0) 112 | ;; 113 | 114 | let iter_lines t ~f = Sequence.iter (lines_seq t) ~f 115 | 116 | let%expect_test "split_lines and iter_lines" = 117 | let split_lines t = split_lines (of_string t) |> List.map ~f:to_string in 118 | let split_lines_via_iter_lines t = 119 | let acc = ref [] in 120 | iter_lines (of_string t) ~f:(fun line -> acc := line :: !acc); 121 | List.rev_map !acc ~f:to_string 122 | in 123 | let impls = 124 | [ "Bigstring.iter_lines_rev", split_lines 125 | ; "Bigstring.iter_lines", split_lines_via_iter_lines 126 | ; "String.split_lines", String.split_lines 127 | ] 128 | in 129 | List.iter 130 | ~f:(fun s -> 131 | let results = List.map impls ~f:(fun (desc, f) -> desc, f s) in 132 | let all_equal = 133 | List.dedup_and_sort results ~compare:(fun (_, r1) (_, r2) -> 134 | [%compare: string list] r1 r2) 135 | |> List.length 136 | |> Int.equal 1 137 | in 138 | if not all_equal 139 | then 140 | raise_s 141 | [%message 142 | "Mismatching implementations" 143 | ~input:(s : string) 144 | ~_:(results : (string * string list) list)]) 145 | [ "" 146 | ; "\n" 147 | ; "a" 148 | ; "a\n" 149 | ; "a\nb" 150 | ; "a\nb\n" 151 | ; "a\n\n" 152 | ; "a\n\nb" 153 | ; "a\r\n\nb" 154 | ; "\ra\r\n\nb" 155 | ; "\ra\r\n\nb\r" 156 | ; "\ra\r\n\nb\r\n" 157 | ] 158 | ;; 159 | 160 | let of_bigbuffer_volatile buffer = 161 | (* If this isn't done, the buffer might contain extra uninitialized characters *) 162 | Bigstring.sub_shared 163 | ~pos:0 164 | ~len:(Bigbuffer.length buffer) 165 | (Bigbuffer.volatile_contents buffer) 166 | ;; 167 | 168 | let substr_index ?(pos = 0) t ~pattern = 169 | if length pattern = 0 170 | then Some pos 171 | else ( 172 | let c = Bigstring.get pattern 0 in 173 | let last_index = Bigstring.length t - Bigstring.length pattern in 174 | let rec loop pos = 175 | if pos > last_index 176 | then None 177 | else ( 178 | match Bigstring.find c t ~pos ~len:(last_index - pos + 1) with 179 | | None -> None 180 | | Some pos -> 181 | assert (pos <= last_index); 182 | let found_ = Bigstring.sub_shared t ~pos ~len:(Bigstring.length pattern) in 183 | if Bigstring.equal pattern found_ then Some pos else loop (pos + 1)) 184 | in 185 | loop pos) 186 | ;; 187 | -------------------------------------------------------------------------------- /kernel/src/bigstring_shared.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | (** Immutable sequences of bytes which can be windowed efficiently. *) 4 | type t = private Bigstring.t [@@deriving sexp_of, compare, hash, equal] 5 | 6 | val of_bigstring : Bigstring.t -> t 7 | val to_bigstring : t -> Bigstring.t 8 | 9 | include Stringable.S with type t := t 10 | include String_monoidable.S with type t := t 11 | 12 | val to_lexbuf : t -> Lexing.lexbuf 13 | 14 | (** Empty, immutable Bigstring *) 15 | val empty : t 16 | 17 | val length : t -> int 18 | val sub : ?pos:int -> ?len:int -> t -> t 19 | val foldi : t -> init:'b -> f:(int -> 'b -> char -> 'b) -> 'b 20 | 21 | (** [include_empty_last_line] determines whether a string that ends in "\n" has an empty 22 | string as the last line. 23 | 24 | [iter_lines] and [split_lines] do not include an empty last line. *) 25 | val lines_seq : ?include_empty_last_line:unit -> t -> t Sequence.t 26 | 27 | val iter_lines : t -> f:(t -> unit) -> unit 28 | val split_lines : t -> t list 29 | 30 | (** Gets a bigstring from a bigbuffer with minimal memory overhead. *) 31 | val of_bigbuffer_volatile : Bigbuffer.t -> t 32 | 33 | val of_string_monoid : String_monoid.t -> t 34 | val substr_index : ?pos:int -> t -> pattern:t -> int option 35 | 36 | module Stable : sig 37 | module V1 : sig 38 | type nonrec t = t [@@deriving equal] 39 | 40 | include Stable_without_comparator with type t := t 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /kernel/src/boundary.ml: -------------------------------------------------------------------------------- 1 | module Stable = struct 2 | open Core.Core_stable 3 | 4 | module V1 = struct 5 | type t = string [@@deriving sexp, bin_io, compare] 6 | end 7 | end 8 | 9 | open Core 10 | 11 | type t = string [@@deriving sexp_of, compare, hash] 12 | type boundary = t 13 | 14 | let of_string = Fn.id 15 | let to_string = Fn.id 16 | 17 | module Open = struct 18 | let to_string_monoid t = String_monoid.concat_string [ "\n"; "--"; t; "\n" ] 19 | end 20 | 21 | module Close = struct 22 | let to_string_monoid t = String_monoid.concat_string [ "\n"; "--"; t; "--" ] 23 | end 24 | 25 | module Open_first = struct 26 | let to_string_monoid t = String_monoid.concat_string [ "--"; t; "\n" ] 27 | end 28 | 29 | let split t bstr = 30 | let lf = Bigstring_shared.of_string "\n" in 31 | let crlf = Bigstring_shared.of_string "\r\n" in 32 | let dashdash = Bigstring_shared.of_string "--" in 33 | let t = Bigstring_shared.of_string ("--" ^ t) in 34 | let match_after ~pos bstr ~pattern = 35 | let len = Bigstring_shared.length pattern in 36 | Option.some_if 37 | (pos >= 0 38 | && pos + len <= Bigstring_shared.length bstr 39 | && [%compare.equal: Bigstring_shared.t] 40 | pattern 41 | (Bigstring_shared.sub bstr ~pos ~len)) 42 | (pos + len) 43 | in 44 | let match_before ~pos:end_ bstr ~pattern = 45 | let start = end_ - Bigstring_shared.length pattern in 46 | match_after ~pos:start bstr ~pattern 47 | |> Option.map ~f:(fun end_' -> 48 | assert (end_ = end_'); 49 | start) 50 | in 51 | let match_crlf direction ~pos bstr = 52 | if pos = 0 || pos = Bigstring_shared.length bstr 53 | then Some pos 54 | else ( 55 | let match_ = 56 | match direction with 57 | | `After -> fun pattern -> match_after ~pos bstr ~pattern 58 | | `Before -> fun pattern -> match_before ~pos bstr ~pattern 59 | in 60 | Option.first_some (match_ crlf) (match_ lf)) 61 | in 62 | let rec find_boundary pos = 63 | match Bigstring_shared.substr_index bstr ~pos ~pattern:t with 64 | | None -> 65 | (* No more occurrences of [BOUNDARY] so definitely at EOF *) 66 | `Eof 67 | | Some pos -> 68 | let no_prologue = pos = 0 in 69 | (* Ensure we are at the start of a line (after [CR]LF, or beginning of bigstring) *) 70 | (match match_crlf `Before ~pos bstr with 71 | | None -> 72 | find_boundary (pos + 1) (* skip a character to avoid getting stuck in a loop *) 73 | | Some begin_ -> 74 | let pos = pos + Bigstring_shared.length t in 75 | let is_terminal, pos = 76 | match match_after ~pos bstr ~pattern:dashdash with 77 | | Some pos -> true, pos 78 | | None -> false, pos 79 | in 80 | (* Ensure we are at the end of a line (before [CR]LF, or end of bigstring) *) 81 | (match match_crlf `After ~pos bstr with 82 | | None -> find_boundary pos 83 | | Some end_ -> 84 | if is_terminal 85 | then `Close_boundary (begin_, pos) 86 | else if no_prologue 87 | then `Open_boundary_first end_ 88 | else `Open_boundary (begin_, end_))) 89 | in 90 | let rec loop pos acc has_prologue = 91 | let sub ?stop () = 92 | let stop = Option.value stop ~default:(Bigstring_shared.length bstr) in 93 | let len = stop - pos in 94 | if len <= 0 then Bigstring_shared.empty else Bigstring_shared.sub ~pos ~len bstr 95 | in 96 | match find_boundary pos with 97 | | `Open_boundary_first pos -> loop pos acc false 98 | | `Open_boundary (stop, pos) -> 99 | let chunk = sub ~stop () in 100 | loop pos (chunk :: acc) has_prologue 101 | | `Close_boundary (stop, pos) -> 102 | let chunk = sub ~stop () in 103 | let epilogue = 104 | if pos < Bigstring_shared.length bstr 105 | then Some (Bigstring_shared.sub ~pos bstr) 106 | else None 107 | in 108 | chunk :: acc, epilogue, has_prologue 109 | | `Eof -> 110 | let chunk = sub () in 111 | chunk :: acc, None, has_prologue 112 | in 113 | (* RFC 2046: A multipart body may have a prologue and an epilogue *) 114 | let parts, epilogue, has_prologue = loop 0 [] true in 115 | match List.rev parts with 116 | | [] -> Some bstr, [], epilogue 117 | | prologue :: parts when has_prologue -> Some prologue, parts, epilogue 118 | | parts -> None, parts, epilogue 119 | ;; 120 | 121 | module Generator = struct 122 | type nonrec t = t Sequence.t 123 | 124 | let sexp_of_t t = 125 | [%sexp ((Sequence.take t 5 |> Sequence.to_list) @ [ "..." ] : string list)] 126 | ;; 127 | 128 | (* This boundary pattern ensures that the boundary should not appear in 129 | - Headers 130 | - Quoted-printable text 131 | - Base64 encoded content. 132 | 133 | The only posibility is that it might appear in plaintext, but 134 | that would be incredibly rare. 135 | 136 | We avoid conflicts by generating IDs with different numbers as needed. 137 | *) 138 | let default = 139 | Sequence.unfold ~init:0 ~f:(fun num -> 140 | let str = sprintf "--==::BOUNDARY::%06d::==--" num in 141 | Some (str, num + 1)) 142 | ;; 143 | 144 | let%expect_test _ = 145 | Sequence.take default 5 |> Sequence.iter ~f:print_endline; 146 | [%expect 147 | {| 148 | --==::BOUNDARY::000000::==-- 149 | --==::BOUNDARY::000001::==-- 150 | --==::BOUNDARY::000002::==-- 151 | --==::BOUNDARY::000003::==-- 152 | --==::BOUNDARY::000004::==-- 153 | |}] 154 | ;; 155 | 156 | (* Increment the last numeric component to avoid number conflicts. *) 157 | let from_existing_boundary str = Sequence.append (Sequence.singleton str) default 158 | 159 | let%expect_test _ = 160 | Sequence.take (from_existing_boundary "BOUNDARY") 5 |> Sequence.iter ~f:print_endline; 161 | [%expect 162 | {| 163 | BOUNDARY 164 | --==::BOUNDARY::000000::==-- 165 | --==::BOUNDARY::000001::==-- 166 | --==::BOUNDARY::000002::==-- 167 | --==::BOUNDARY::000003::==-- 168 | |}] 169 | ;; 170 | 171 | let find_nonflicting t parts = 172 | Sequence.find_exn t ~f:(fun t -> 173 | List.for_all parts ~f:(fun part -> 174 | (* This will incorrectly report a conflict if [BOUNDARY] occurs in the text or is 175 | a prefix/suffix of any existing boundary. This is very unlikely to happen in 176 | practice, and would just result in an alternative boundary being used. *) 177 | not (String_monoid.is_substring part ~substring:t))) 178 | ;; 179 | 180 | let%expect_test _ = 181 | find_nonflicting 182 | (from_existing_boundary "BOUNDARY") 183 | [ String_monoid.of_string "foobar" ] 184 | |> print_endline; 185 | [%expect {| BOUNDARY |}] 186 | ;; 187 | 188 | let%expect_test _ = 189 | find_nonflicting 190 | (from_existing_boundary "BOUNDARY") 191 | [ String_monoid.of_string "--BOUNDARY--" ] 192 | |> print_endline; 193 | [%expect {| --==::BOUNDARY::000000::==-- |}] 194 | ;; 195 | 196 | let%expect_test _ = 197 | find_nonflicting 198 | (from_existing_boundary "BOUNDARY") 199 | [ String_monoid.of_string "...BOUNDARY...--==::BOUNDARY::000000::==--..." ] 200 | |> print_endline; 201 | [%expect {| --==::BOUNDARY::000001::==-- |}] 202 | ;; 203 | end 204 | 205 | let generate_non_conflicting_boundary ?prologue ~parts ?epilogue t = 206 | Generator.find_nonflicting 207 | t 208 | ((Option.to_list prologue |> List.map ~f:Bigstring_shared.to_string_monoid) 209 | @ parts 210 | @ (Option.to_list epilogue |> List.map ~f:Bigstring_shared.to_string_monoid)) 211 | ;; 212 | 213 | let join_without_checking_for_conflicts ?prologue ~parts ?epilogue t = 214 | if List.is_empty parts 215 | then ( 216 | match prologue, epilogue with 217 | | Some prologue, Some epilogue -> 218 | String_monoid.plus 219 | (Bigstring_shared.to_string_monoid prologue) 220 | (Bigstring_shared.to_string_monoid epilogue) 221 | | Some content, None | None, Some content -> Bigstring_shared.to_string_monoid content 222 | | None, None -> String_monoid.of_string "\n") 223 | else ( 224 | (* Different types of boundaries that may appear in a message *) 225 | let boundary_open_first = t |> Open_first.to_string_monoid in 226 | let boundary_open = t |> Open.to_string_monoid in 227 | let boundary_close = t |> Close.to_string_monoid in 228 | let first_boundary = 229 | if Option.is_some prologue then boundary_open else boundary_open_first 230 | in 231 | let prologue = 232 | Option.value_map 233 | prologue 234 | ~f:Bigstring_shared.to_string_monoid 235 | ~default:String_monoid.empty 236 | in 237 | let inner_boundary = boundary_open in 238 | let last_boundary = boundary_close in 239 | let epilogue = 240 | Option.value_map 241 | epilogue 242 | ~f:Bigstring_shared.to_string_monoid 243 | ~default:String_monoid.empty 244 | in 245 | String_monoid.concat 246 | [ prologue 247 | ; first_boundary 248 | ; String_monoid.concat ~sep:inner_boundary parts 249 | ; last_boundary 250 | ; epilogue 251 | ]) 252 | ;; 253 | -------------------------------------------------------------------------------- /kernel/src/boundary.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | type t [@@deriving sexp_of] 4 | type boundary = t 5 | 6 | (** Creates a boundary from the value of the "boundary" parameter in a Content-type header 7 | (RFC2046, p.19) *) 8 | 9 | include Stringable.S with type t := t 10 | 11 | module Generator : sig 12 | type t [@@deriving sexp_of] 13 | 14 | (** A boundary generator that should clash rarely. *) 15 | val default : t 16 | 17 | (** A generator using an existing boundary as a template. *) 18 | val from_existing_boundary : boundary -> t 19 | end 20 | 21 | (** Use the generator to find a boundary that doesn't conflict. *) 22 | val generate_non_conflicting_boundary 23 | : ?prologue:Bigstring_shared.t 24 | -> parts:String_monoid.t list 25 | -> ?epilogue:Bigstring_shared.t 26 | -> Generator.t 27 | -> t 28 | 29 | (** Combine parts using the given boundary. This assumes that the boundary doesn't 30 | conflict. *) 31 | val join_without_checking_for_conflicts 32 | : ?prologue:Bigstring_shared.t 33 | -> parts:String_monoid.t list 34 | -> ?epilogue:Bigstring_shared.t 35 | -> t 36 | -> String_monoid.t 37 | 38 | (** Splits a multipart body into a list of messages, and, if there are, an optional 39 | prologue and epilogue. *) 40 | val split 41 | : t 42 | -> Bigstring_shared.t 43 | -> Bigstring_shared.t option * Bigstring_shared.t list * Bigstring_shared.t option 44 | 45 | module Stable : sig 46 | module V1 : sig 47 | type nonrec t = t [@@deriving sexp, bin_io] 48 | end 49 | end 50 | -------------------------------------------------------------------------------- /kernel/src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name email_message_kernel) 3 | (public_name email_message.kernel) 4 | (libraries angstrom base64 base64.rfc2045 core email_address magic-mime 5 | core_kernel.nonempty_list core_kernel.reversed_list) 6 | (preprocess 7 | (pps ppx_jane))) 8 | 9 | (ocamllex email_lexer media_type_lexer quoted_printable_lexer) 10 | 11 | (ocamlyacc email_grammar media_type_grammar) 12 | -------------------------------------------------------------------------------- /kernel/src/email.ml: -------------------------------------------------------------------------------- 1 | module Stable_no_v1_bin_io = struct 2 | open! Core.Core_stable 3 | 4 | module V1 = struct 5 | type t = 6 | { headers : Headers.Stable.V1.t 7 | ; raw_content : Email_raw_content.Stable.V1.t 8 | } 9 | [@@deriving sexp, compare] 10 | end 11 | end 12 | 13 | open Core 14 | open Or_error.Let_syntax 15 | 16 | module T = struct 17 | type t = Stable_no_v1_bin_io.V1.t = 18 | { headers : Headers.t 19 | ; raw_content : Email_raw_content.t 20 | } 21 | [@@deriving sexp_of, fields ~getters ~iterators:create, compare, hash] 22 | end 23 | 24 | include T 25 | include Comparable.Make_plain (T) 26 | include Hashable.Make_plain (T) 27 | 28 | (* The default type of a message depends on the type of its parent, 29 | so we need to pass it around. *) 30 | let of_bigstring_shared ?(normalize_headers = `None) bstr = 31 | let lexbuf = Bigstring_shared.to_lexbuf bstr in 32 | let%map (`Message (headers, content_offset)) = 33 | try 34 | Ok 35 | (Email_grammar.message (Email_lexer.message (Email_lexer_state.create ())) lexbuf) 36 | with 37 | | _ -> 38 | (* Looks like lexer just throws Failure, not Parsing.Parse_error *) 39 | let pos = lexbuf.Lexing.lex_curr_p in 40 | Or_error.error_string 41 | (sprintf 42 | "Error parsing email at line %d, column %d" 43 | pos.Lexing.pos_lnum 44 | (pos.Lexing.pos_cnum - pos.Lexing.pos_bol)) 45 | in 46 | let headers = Headers.of_list ~normalize:normalize_headers headers in 47 | let raw_content = 48 | match content_offset with 49 | | `Truncated -> None 50 | | `Bad_headers pos -> Some (Bigstring_shared.sub ~pos bstr) 51 | | `Content_offset pos -> Some (Bigstring_shared.sub ~pos bstr) 52 | in 53 | { headers 54 | ; raw_content = Email_raw_content.Expert.of_bigstring_shared_option raw_content 55 | } 56 | ;; 57 | 58 | let of_string ?normalize_headers str = 59 | of_bigstring_shared ?normalize_headers (Bigstring_shared.of_string str) 60 | |> Or_error.ok_exn 61 | ;; 62 | 63 | let of_bigstring bstr = 64 | of_bigstring_shared (Bigstring_shared.of_bigstring bstr) |> Or_error.ok_exn 65 | ;; 66 | 67 | let of_bigbuffer buffer = of_bigstring (Bigbuffer.big_contents buffer) 68 | 69 | (* Message bodies are optional. I highly doubt anybody would handle [None] differently 70 | from [Some ""], so we don't expose this detail. It allows us to be smarter with 71 | [to_string] so we don't add a newline. *) 72 | let to_string_monoid ?(eol_except_raw_content = `LF) t = 73 | let optional_body = 74 | match Email_raw_content.Expert.to_bigstring_shared_option t.raw_content with 75 | | None -> [] 76 | | Some raw_content -> 77 | [ String_monoid.concat 78 | [ String_monoid.of_string (Lf_or_crlf.to_string eol_except_raw_content) 79 | ; String_monoid.of_bigstring (Bigstring_shared.to_bigstring raw_content) 80 | ] 81 | ] 82 | in 83 | String_monoid.concat 84 | (Headers.to_string_monoid ~eol:eol_except_raw_content t.headers :: optional_body) 85 | ;; 86 | 87 | let to_string ?eol_except_raw_content t = 88 | String_monoid.to_string (to_string_monoid ?eol_except_raw_content t) 89 | ;; 90 | 91 | let to_bigstring ?eol_except_raw_content t = 92 | String_monoid.to_bigstring (to_string_monoid ?eol_except_raw_content t) 93 | ;; 94 | 95 | let to_bigstring_shared ?eol_except_raw_content t = 96 | Bigstring_shared.of_string_monoid (to_string_monoid ?eol_except_raw_content t) 97 | ;; 98 | 99 | let create = Fields.create 100 | let set_headers t headers = { t with headers } 101 | let set_raw_content t raw_content = { t with raw_content } 102 | let modify_headers t ~f = set_headers t (f t.headers) 103 | let modify_raw_content t ~f = set_raw_content t (f (raw_content t)) 104 | 105 | module Stable = struct 106 | module V1 = struct 107 | include Stable_no_v1_bin_io.V1 108 | 109 | include 110 | Binable.Of_binable_without_uuid [@alert "-legacy"] 111 | (Bigstring.Stable.V1) 112 | (struct 113 | type nonrec t = t 114 | 115 | let to_binable t = to_bigstring t 116 | let of_binable = of_bigstring 117 | end) 118 | end 119 | end 120 | -------------------------------------------------------------------------------- /kernel/src/email.mli: -------------------------------------------------------------------------------- 1 | include Email_intf.Email (** @inline *) 2 | -------------------------------------------------------------------------------- /kernel/src/email_content.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | module Multipart : sig 4 | type t = private 5 | { boundary : Boundary.t 6 | ; prologue : Bigstring_shared.t option 7 | ; epilogue : Bigstring_shared.t option 8 | ; parts : Email.t list 9 | ; container_headers : Headers.t 10 | } 11 | [@@deriving sexp_of] 12 | 13 | val create_unsafe 14 | : boundary:Boundary.t 15 | -> ?prologue:Bigstring_shared.t 16 | -> ?epilogue:Bigstring_shared.t 17 | -> Email.t list 18 | -> container_headers:Headers.t 19 | -> t 20 | 21 | val create 22 | : ?boundary:Boundary.t 23 | -> ?prologue:Bigstring_shared.t 24 | -> ?epilogue:Bigstring_shared.t 25 | -> ?container_headers:Headers.t 26 | -> Email.t list 27 | -> t 28 | 29 | val set 30 | : t 31 | -> ?boundary:Boundary.t 32 | -> ?prologue:Bigstring_shared.t option 33 | -> ?epilogue:Bigstring_shared.t option 34 | -> ?parts:Email.t list 35 | -> ?container_headers:Headers.t 36 | -> unit 37 | -> t 38 | 39 | include String_monoidable.S with type t := t 40 | end = struct 41 | type t = 42 | { boundary : Boundary.t 43 | ; prologue : Bigstring_shared.t option 44 | ; epilogue : Bigstring_shared.t option 45 | ; parts : Email.t list 46 | ; container_headers : Headers.t 47 | } 48 | [@@deriving sexp_of] 49 | 50 | let create_unsafe ~boundary ?prologue ?epilogue parts ~container_headers = 51 | { boundary; prologue; epilogue; parts; container_headers } 52 | ;; 53 | 54 | let create ?boundary ?prologue ?epilogue ?(container_headers = Headers.empty) parts = 55 | let boundary = 56 | Boundary.generate_non_conflicting_boundary 57 | ?prologue 58 | ~parts:(List.map parts ~f:Email.to_string_monoid) 59 | ?epilogue 60 | (Option.value_map 61 | boundary 62 | ~default:Boundary.Generator.default 63 | ~f:Boundary.Generator.from_existing_boundary) 64 | in 65 | create_unsafe ~boundary ?prologue ?epilogue parts ~container_headers 66 | ;; 67 | 68 | let set 69 | t 70 | ?(boundary = t.boundary) 71 | ?(prologue = t.prologue) 72 | ?(epilogue = t.epilogue) 73 | ?(parts = t.parts) 74 | ?(container_headers = t.container_headers) 75 | () 76 | = 77 | create ~boundary ?prologue ?epilogue ~container_headers parts 78 | ;; 79 | 80 | let to_string_monoid t = 81 | Boundary.join_without_checking_for_conflicts 82 | ?prologue:t.prologue 83 | ~parts:(List.map t.parts ~f:Email.to_string_monoid) 84 | ?epilogue:t.epilogue 85 | t.boundary 86 | ;; 87 | end 88 | 89 | type t = 90 | | Multipart of Multipart.t 91 | | Message of Email.t 92 | | Data of Octet_stream.t 93 | [@@deriving sexp_of] 94 | 95 | let rec multipart_of_bigstring_shared ~boundary ~container_headers bstr = 96 | let open Or_error.Let_syntax in 97 | let prologue, parts, epilogue = Boundary.split boundary bstr in 98 | let%map parts = 99 | List.map parts ~f:(fun part -> 100 | Or_error.tag 101 | (Or_error.try_with (fun () -> 102 | Email.of_bigstring (Bigstring_shared.to_bigstring part))) 103 | ~tag:(sprintf "failed part:\n%s" (Bigstring_shared.to_string part))) 104 | |> Or_error.all 105 | in 106 | Multipart.create_unsafe ~boundary ?prologue ?epilogue ~container_headers parts 107 | 108 | and content_of_bigstring_shared ~headers ?container_headers bstr = 109 | let open Or_error.Let_syntax in 110 | let parent_media_type = Option.bind container_headers ~f:Media_type.from_headers in 111 | let media_type = 112 | Option.value 113 | (Media_type.from_headers headers) 114 | ~default:(Media_type.default ?parent:parent_media_type ()) 115 | in 116 | let encoding = Octet_stream.Encoding.of_headers_or_default headers in 117 | let octet_stream = Octet_stream.of_bigstring_shared ~encoding bstr in 118 | let decode octet_stream = 119 | match Octet_stream.decode octet_stream with 120 | | None -> 121 | Or_error.error "Unknown message encoding" encoding Octet_stream.Encoding.sexp_of_t 122 | | Some decoded_bstr -> Ok decoded_bstr 123 | in 124 | match Media_type.multipart_boundary media_type with 125 | | Some boundary -> 126 | (* According to Wikipedia, the content-transfer-encoding of a multipart 127 | type must always be "7bit", "8bit" or "binary" to avoid the 128 | complications that would be posed by multiple levels of decoding. In 129 | this case this decode call is free. *) 130 | let%bind decoded_bstr = decode octet_stream in 131 | let%bind multipart = 132 | multipart_of_bigstring_shared ~boundary ~container_headers:headers decoded_bstr 133 | in 134 | Ok (Multipart multipart) 135 | | None -> 136 | if Media_type.is_message_rfc822 media_type 137 | then ( 138 | let%bind decoded_bstr = decode octet_stream in 139 | let%bind email = 140 | Or_error.try_with (fun () -> 141 | Email.of_bigstring (Bigstring_shared.to_bigstring decoded_bstr)) 142 | in 143 | Ok (Message email)) 144 | else Ok (Data octet_stream) 145 | 146 | and parse ?container_headers email = 147 | content_of_bigstring_shared 148 | ?container_headers 149 | ~headers:(Email.headers email) 150 | (Email.raw_content email |> Email_raw_content.to_bigstring_shared) 151 | ;; 152 | 153 | let to_string_monoid = function 154 | | Multipart multipart -> Multipart.to_string_monoid multipart 155 | | Message message -> Email.to_string_monoid message 156 | | Data octet_stream -> 157 | Octet_stream.encoded_contents octet_stream |> Bigstring_shared.to_string_monoid 158 | ;; 159 | 160 | let to_bigstring_shared t = 161 | to_string_monoid t |> String_monoid.to_bigstring |> Bigstring_shared.of_bigstring 162 | ;; 163 | 164 | let to_raw_content t = to_bigstring_shared t |> Email_raw_content.of_bigstring_shared 165 | 166 | let rec multipart_map_data ~on_unparsable_content mp ~f = 167 | Multipart.set 168 | mp 169 | ~parts:(List.map mp.Multipart.parts ~f:(map_data ~on_unparsable_content ~f)) 170 | () 171 | 172 | and content_map_data ~on_unparsable_content t ~f = 173 | match t with 174 | | Multipart t -> Multipart (multipart_map_data ~on_unparsable_content t ~f) 175 | | Message message -> Message (map_data ~on_unparsable_content message ~f) 176 | | Data data -> Data (f data) 177 | 178 | and map_data ~on_unparsable_content email ~f = 179 | match parse email with 180 | | Ok content -> 181 | let content = content_map_data content ~on_unparsable_content ~f in 182 | Email.set_raw_content 183 | email 184 | (to_bigstring_shared content |> Email_raw_content.of_bigstring_shared) 185 | | Error e -> 186 | (match on_unparsable_content with 187 | | `Skip -> email 188 | | `Raise -> raise_s [%message "[map_data] has unparsable content" (e : Error.t)]) 189 | ;; 190 | 191 | let map_data ?(on_unparsable_content = `Skip) email ~f = 192 | map_data ~on_unparsable_content email ~f 193 | ;; 194 | 195 | let to_email ~headers t = 196 | let headers = 197 | let media_type = 198 | match t with 199 | | Multipart mp -> 200 | (match Media_type.from_headers headers with 201 | | None -> Some (Media_type.create_multipart "related" ~boundary:mp.boundary) 202 | | Some media_type -> 203 | Some (Media_type.set_multipart_boundary media_type mp.boundary)) 204 | | _ -> None 205 | in 206 | match media_type with 207 | | None -> headers 208 | | Some media_type -> Media_type.set_headers headers media_type 209 | in 210 | Email.create 211 | ~headers 212 | ~raw_content:(to_bigstring_shared t |> Email_raw_content.of_bigstring_shared) 213 | ;; 214 | 215 | let set_content email t = to_email ~headers:(Email.headers email) t 216 | -------------------------------------------------------------------------------- /kernel/src/email_content.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | (** {v 4 | The cost depends on the encoding of the content and the main media type. 5 | 6 | N = Size of the message 7 | H = Size of the headers of the sub-message(s) 8 | 9 | Format: time complexity, memory complexity 10 | 11 | . | 7bit, 8bit, binary | Base64, Quoted_printable 12 | ------------------------------------------------------------- 13 | message | O(N), O(H) | O(N), O(N) 14 | multipart | O(N), O(H) | O(N), O(N) 15 | other | O(1), O(1) | O(N), O(N) 16 | 17 | Where other is any other main media type: text, image, application... 18 | 19 | Encoding and type can be obtained from the headers, using the modules 20 | Headers.Content_type and Headers.Content_transfer_encoding, and the corresponding 21 | default values. 22 | v} *) 23 | 24 | module Multipart : sig 25 | type t = private 26 | { boundary : Boundary.t 27 | ; prologue : Bigstring_shared.t option 28 | ; epilogue : Bigstring_shared.t option 29 | ; parts : Email.t list 30 | (** [container_headers] is informational only for use when further processing parts. 31 | it is ignored by [to_email]. *) 32 | ; container_headers : Headers.t 33 | } 34 | [@@deriving sexp_of] 35 | 36 | val create 37 | : ?boundary:Boundary.t 38 | -> ?prologue:Bigstring_shared.t 39 | -> ?epilogue:Bigstring_shared.t 40 | -> ?container_headers:Headers.t 41 | -> Email.t list 42 | -> t 43 | 44 | val set 45 | : t 46 | -> ?boundary:Boundary.t 47 | -> ?prologue:Bigstring_shared.t option 48 | -> ?epilogue:Bigstring_shared.t option 49 | -> ?parts:Email.t list 50 | -> ?container_headers:Headers.t 51 | -> unit 52 | -> t 53 | end 54 | 55 | type t = 56 | | Multipart of Multipart.t 57 | | Message of Email.t 58 | | Data of Octet_stream.t 59 | [@@deriving sexp_of] 60 | 61 | (** [parse ?container_headers email] parses the content of [email]. The default content 62 | type of a multipart body changes based on the container headers. This only comes into 63 | play if the container had "Content-Type: multipart/digest". *) 64 | val parse : ?container_headers:Headers.t -> Email.t -> t Or_error.t 65 | 66 | val to_email : headers:Headers.t -> t -> Email.t 67 | val set_content : Email.t -> t -> Email.t 68 | 69 | (** Allow changing the message content to mask the actual data but retain the structure *) 70 | val map_data 71 | : ?on_unparsable_content:[ `Skip | `Raise ] (** default [`Skip] *) 72 | -> Email.t 73 | -> f:(Octet_stream.t -> Octet_stream.t) 74 | -> Email.t 75 | 76 | val to_raw_content : t -> Email_raw_content.t 77 | val to_bigstring_shared : t -> Bigstring_shared.t 78 | val to_string_monoid : t -> String_monoid.t 79 | -------------------------------------------------------------------------------- /kernel/src/email_grammar.mly: -------------------------------------------------------------------------------- 1 | %{ 2 | open! Core 3 | 4 | %} 5 | 6 | /*(* Standard tokens *)*/ 7 | %token ERROR 8 | %token EOF 9 | 10 | /*(* Headers *)*/ 11 | %token FIELD 12 | %token HEADER_END 13 | /**/ 14 | %token NO_HEADER_END 15 | 16 | /*(* Body *)*/ 17 | /*(* %token OCTET_STREAM *)*/ 18 | %token OCTET_STREAM_OFFSET 19 | 20 | %start message 21 | %type message 22 | 23 | %% 24 | 25 | message : part EOF { $1 }; 26 | 27 | part : header HEADER_END OCTET_STREAM_OFFSET 28 | { `Message ($1, `Content_offset $3) } 29 | /**/ 30 | | header NO_HEADER_END OCTET_STREAM_OFFSET 31 | { `Message ($1, `Bad_headers $3) } 32 | | header 33 | { `Message ($1, `Truncated) } 34 | ; 35 | 36 | header : 37 | FIELD header { ($1 :: $2) } 38 | | { [] } 39 | ; 40 | 41 | %% 42 | -------------------------------------------------------------------------------- /kernel/src/email_grammar_types.ml: -------------------------------------------------------------------------------- 1 | (* Some simple, lightweight types for parser output *) 2 | type header = (string * string) list 3 | 4 | type content_offset = 5 | [ `Content_offset of int 6 | | `Bad_headers of int 7 | | `Truncated 8 | ] 9 | 10 | type message = [ `Message of header * content_offset ] 11 | -------------------------------------------------------------------------------- /kernel/src/email_grammar_types.mli: -------------------------------------------------------------------------------- 1 | (** Some simple, lightweight types for parser output *) 2 | 3 | type header = (string * string) list 4 | 5 | type content_offset = 6 | [ `Content_offset of int 7 | | `Bad_headers of int 8 | | `Truncated 9 | ] 10 | 11 | type message = [ `Message of header * content_offset ] 12 | -------------------------------------------------------------------------------- /kernel/src/email_intf.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | module type Email = sig 4 | (** An [Email.t] is a list of headers along with unparsed content. [Email_content.parse] 5 | can be used to work with the structured content of an email. *) 6 | 7 | type t [@@deriving compare, hash, sexp_of] 8 | 9 | val create : headers:Headers.t -> raw_content:Email_raw_content.t -> t 10 | val headers : t -> Headers.t 11 | val set_headers : t -> Headers.t -> t 12 | val modify_headers : t -> f:(Headers.t -> Headers.t) -> t 13 | val raw_content : t -> Email_raw_content.t 14 | val set_raw_content : t -> Email_raw_content.t -> t 15 | val modify_raw_content : t -> f:(Email_raw_content.t -> Email_raw_content.t) -> t 16 | 17 | val to_bigstring_shared 18 | : ?eol_except_raw_content:Lf_or_crlf.t 19 | -> t 20 | -> Bigstring_shared.t 21 | 22 | (** String-builder-like module. Small-to-no memory overhead when unparsed. *) 23 | val to_string_monoid : ?eol_except_raw_content:Lf_or_crlf.t -> t -> String_monoid.t 24 | 25 | (** Convert from string, in compliance with the RFC 2822 standard (standard email 26 | format, e.g. .eml but not mbox) *) 27 | val of_string : ?normalize_headers:Headers.Normalize.encode -> string -> t 28 | 29 | (** Convert to string, in compliance with the RFC 2822 standard (standard email format, 30 | e.g. .eml but not mbox) *) 31 | val to_string : ?eol_except_raw_content:Lf_or_crlf.t -> t -> string 32 | 33 | val to_bigstring : ?eol_except_raw_content:Lf_or_crlf.t -> t -> Bigstring.t 34 | val of_bigstring : Bigstring.t -> t 35 | val of_bigbuffer : Bigbuffer.t -> t 36 | 37 | include Comparable.S_plain with type t := t 38 | include Hashable.S_plain with type t := t 39 | 40 | module Stable : sig 41 | module V1 : Stable_without_comparator with type t = t 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /kernel/src/email_lexer.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val message : Email_lexer_state.t -> Lexing.lexbuf -> Email_grammar.token 4 | -------------------------------------------------------------------------------- /kernel/src/email_lexer.mll: -------------------------------------------------------------------------------- 1 | { 2 | open Email_grammar 3 | module LS = Email_lexer_state 4 | 5 | let force_token token _lexbuf = token 6 | 7 | } 8 | (* Rules from RFCs. Some may need to be copied to specific places to parse 9 | * their individual parts *) 10 | 11 | (** RFC2234 - Core *) 12 | let cr = "\013" 13 | let lf = "\010" 14 | 15 | (* Deliberately chose to include bare CR and LF here, although the RFC suggests 16 | * them to be included as part of the text characters. 17 | * The rationale being that, when parsing e-mail from a text file, they will 18 | * probably mean CRLF. 19 | * The issue should not arise in conforming e-mails. 20 | *) 21 | let crlf_conforming = cr lf 22 | let crlf_non_conforming = crlf_conforming | lf 23 | let crlf = crlf_non_conforming 24 | 25 | let wsp = [' ' '\t'] 26 | 27 | (** RFC2822 3.2.1 -- Primitive tokens *) 28 | 29 | let no_ws_ctl = [ '\001'-'\008' '\011' '\012' '\014'-'\031' '\127'] 30 | 31 | (** RFC2822 3.2.3 -- Folding whitespace and comments *) 32 | (* Obs: If this matches a CRLF, there's forcibly wsp afterwards *) 33 | let obs_fws = wsp + (crlf wsp +) * 34 | let fws = ((wsp * crlf) ? wsp + ) | obs_fws 35 | 36 | let obs_char = [^ '\n' '\r' ] 37 | 38 | (* Match lone CRs *) 39 | (* This matching is not perfect, as in this case: 40 | - \r\r\r\r..\r\n 41 | It might not consider the last \r as part of a CRLF line ending. 42 | However, it fulfils the following properties: 43 | * On standards-compliant text, it will never match a CR that is part 44 | of a CRLF line-ending; standards compliant meaning that it doesn't 45 | have any lone CR pairs. 46 | * It works as expected on texts with lone CRs. 47 | *) 48 | let obs_cr_text = cr + [ ^ '\r' '\n'] 49 | 50 | let no_ws_printable = [ '\033' - '\126' ] 51 | 52 | let utext = 53 | no_ws_ctl | 54 | no_ws_printable | 55 | obs_char | 56 | obs_cr_text 57 | 58 | (* Beware of "a\n\t\nb". RFC2822, 3.2.3 says such headers must not be produced, 59 | but if they are, we must deal with them. *) 60 | let unstructured = (fws? utext?)* 61 | 62 | (**********************************************************) 63 | 64 | (* Header fields *) 65 | let ftext = [ '\033'-'\057' '\059'-'\126' ] 66 | let field_name = ftext * 67 | 68 | (* MESSAGE PARSING RULES *) 69 | 70 | (* This lexer incorporates explicit state to allow it to process t 71 | the different parts of the email. This is because the tokens 72 | we want to generate depend on the context we are in. 73 | 74 | Another posibility is to handle everything in the grammar. However, 75 | each of the terminal symbols (characters) being a different 76 | state incurs significant overhead (both runtime and developing 77 | time). 78 | 79 | Statefulness is distasteful, so we keep it to a mininum: 80 | - Small number of states and transitions. 81 | - State is only read and set in one place, in the 82 | dispatcher. 83 | *) 84 | 85 | rule 86 | (* Dispatcher *) 87 | message t = parse 88 | | "" 89 | { 90 | match Core.Queue.dequeue t.LS.buf with 91 | | Some tok -> force_token tok lexbuf 92 | | None -> 93 | let result = 94 | match t.LS.state with 95 | (* Tokenization is vastly different depending on the context *) 96 | | `Header -> field lexbuf 97 | | `Content -> body_octet_stream lexbuf 98 | | `Expected_eof -> expected_eof lexbuf 99 | in 100 | begin 101 | LS.combine t result; 102 | message t lexbuf 103 | end 104 | } 105 | and 106 | (** Looks for a header name *) 107 | field = parse 108 | | (field_name as name) 109 | (wsp * as _wsp) 110 | ":" 111 | (unstructured as body) 112 | crlf ? 113 | { 114 | (* This code is repeated to avoid cyclical dependencies. Effort has 115 | * been made to make it minimal. 116 | *) 117 | LS.return [FIELD(name, body)] 118 | } 119 | | crlf { LS.return ~new_state:`Content [HEADER_END] } 120 | | eof { LS.return_eof } 121 | | "" { LS.return ~new_state:`Content [NO_HEADER_END] } 122 | 123 | and 124 | 125 | (** This rule throws an error if there are any more characters in the file *) 126 | body_octet_stream = 127 | parse 128 | | "" | eof 129 | { 130 | assert (Lexing.lexeme_start lexbuf = Lexing.lexeme_end lexbuf); 131 | let pos = Lexing.lexeme_start lexbuf in 132 | LS.return ~new_state:`Expected_eof [ OCTET_STREAM_OFFSET (pos); EOF ] 133 | } 134 | and 135 | 136 | 137 | (* Supporting functions *) 138 | error = 139 | parse 140 | | _ as c { LS.return_error (Core.sprintf "Unexpected character %c" c) } 141 | | eof { LS.return_error "Unexpected EOF" } 142 | and 143 | expected_eof = 144 | parse 145 | | eof { LS.return_eof } 146 | | "" { error lexbuf } 147 | 148 | { 149 | 150 | } 151 | -------------------------------------------------------------------------------- /kernel/src/email_lexer_state.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | module State = struct 4 | type t = 5 | [ `Header (** Initial state, parsing headers *) 6 | | `Content (** Parsing the body of the message. The details are in the body state. *) 7 | | `Expected_eof (** The message should end here. If it doesn't, it's an error *) 8 | ] 9 | 10 | let initial = `Header 11 | end 12 | 13 | module Content = struct 14 | type t = 15 | | Multipart of string list 16 | | Octet_stream 17 | 18 | let default = Octet_stream 19 | end 20 | 21 | type t = 22 | { mutable state : State.t 23 | ; buf : Email_grammar.token Queue.t 24 | } 25 | 26 | let create () = { state = State.initial; buf = Queue.create () } 27 | 28 | module Result = struct 29 | type t = 30 | { new_state : State.t option 31 | ; tokens : Email_grammar.token list 32 | } 33 | 34 | module Std = struct 35 | let return ?new_state tokens = { new_state; tokens } 36 | let return_eof = return ~new_state:`Expected_eof [ Email_grammar.EOF ] 37 | let return_error str = return [ Email_grammar.ERROR str ] 38 | end 39 | end 40 | 41 | let combine t result = 42 | (match result.Result.new_state with 43 | | Some state -> t.state <- state 44 | | None -> ()); 45 | List.iter result.Result.tokens ~f:(fun tok -> Queue.enqueue t.buf tok) 46 | ;; 47 | 48 | include Result.Std 49 | 50 | module Error = struct 51 | let unexpected_char c = sprintf "Unexpected char: %c" c 52 | let unexpected_eof = "Unexpected end of file" 53 | end 54 | -------------------------------------------------------------------------------- /kernel/src/email_lexer_state.mli: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | module State : sig 4 | type t = 5 | [ `Header (** Initial state, parsing headers *) 6 | | `Content (** Parsing the body of the message. The details are in the body state. *) 7 | | `Expected_eof (** The message should end here. If it doesn't, it's an error *) 8 | ] 9 | 10 | val initial : [ `Header ] 11 | end 12 | 13 | module Content : sig 14 | type t = 15 | | Multipart of string list 16 | | Octet_stream 17 | 18 | val default : t 19 | end 20 | 21 | type t = 22 | { mutable state : State.t 23 | ; buf : Email_grammar.token Queue.t 24 | } 25 | 26 | val create : unit -> t 27 | 28 | module Result : sig 29 | type t = 30 | { new_state : State.t option 31 | ; tokens : Email_grammar.token list 32 | } 33 | end 34 | 35 | val combine : t -> Result.t -> unit 36 | val return : ?new_state:State.t -> Email_grammar.token list -> Result.t 37 | val return_eof : Result.t 38 | val return_error : string -> Result.t 39 | 40 | module Error : sig 41 | val unexpected_char : char -> string 42 | val unexpected_eof : string 43 | end 44 | -------------------------------------------------------------------------------- /kernel/src/email_message_kernel.ml: -------------------------------------------------------------------------------- 1 | include Email (** @inline *) 2 | 3 | module Bigstring_shared = Bigstring_shared 4 | module Content = Email_content 5 | module Raw_content = Email_raw_content 6 | module Simple = Email_simple 7 | module Mimestring = Mimestring 8 | module Octet_stream = Octet_stream 9 | module String_monoid = String_monoid 10 | module Lf_or_crlf = Lf_or_crlf 11 | 12 | module Headers = struct 13 | include Headers 14 | module Encoded_word = Encoded_word 15 | end 16 | 17 | module Stable = struct 18 | include Email.Stable 19 | module Raw_content = Email_raw_content.Stable 20 | module Simple = Email_simple.Stable 21 | module Headers = Headers.Stable 22 | end 23 | 24 | module Private = struct 25 | module Boundary = Boundary 26 | module Media_type = Media_type 27 | module Rfc = Rfc 28 | 29 | module Email_intf = struct 30 | module type Email = Email_intf.Email with type t = Email.t 31 | end 32 | 33 | module Email_simple_intf = struct 34 | module type Email_simple = 35 | Email_simple_intf.Email_simple 36 | with type Mimetype.t = Email_simple.Mimetype.t 37 | and type Content.t = Email_simple.Content.t 38 | 39 | module type Mimetype = 40 | Email_simple_intf.Mimetype with type t = Email_simple.Mimetype.t 41 | 42 | module type Content = 43 | Email_simple_intf.Content 44 | with module Mimetype := Email_simple.Mimetype 45 | and type attachment_name := Email_simple.attachment_name 46 | and type t = Email_simple.Content.t 47 | 48 | module type Expert = 49 | Email_simple_intf.Expert 50 | with module Mimetype := Email_simple.Mimetype 51 | and module Content := Email_simple.Content 52 | and type attachment_name := Email_simple.attachment_name 53 | and type t := Email_simple.t 54 | 55 | module type Stable = 56 | Email_simple_intf.Stable 57 | with type Mimetype.latest := Email_simple.Mimetype.t 58 | and type Content.latest := Email_simple.Content.t 59 | end 60 | 61 | module String_monoid_intf = struct 62 | module type Underlying = 63 | String_monoid_intf.Underlying with type t = String_monoid.Underlying.t 64 | 65 | module type String_monoid = 66 | String_monoid_intf.String_monoid 67 | with type t = String_monoid.t 68 | and type Underlying.t = String_monoid.Underlying.t 69 | end 70 | end 71 | -------------------------------------------------------------------------------- /kernel/src/email_raw_content.ml: -------------------------------------------------------------------------------- 1 | module Stable = struct 2 | open! Core.Core_stable 3 | 4 | module V1 = struct 5 | type t = Bigstring_shared.Stable.V1.t option [@@deriving bin_io, sexp, compare, equal] 6 | end 7 | end 8 | 9 | open! Core 10 | 11 | type t = Bigstring_shared.t option [@@deriving compare, hash, sexp_of, equal] 12 | 13 | let of_bigstring_shared bstr = Some bstr 14 | let of_string str = of_bigstring_shared (Bigstring_shared.of_string str) 15 | 16 | let to_bigstring_shared = function 17 | | None -> Bigstring_shared.empty 18 | | Some bstr -> bstr 19 | ;; 20 | 21 | let length t = Bigstring_shared.length (to_bigstring_shared t) 22 | 23 | module Expert = struct 24 | let of_bigstring_shared_option = Fn.id 25 | let to_bigstring_shared_option = Fn.id 26 | end 27 | -------------------------------------------------------------------------------- /kernel/src/email_raw_content.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | type t [@@deriving compare, hash, sexp_of, equal] 4 | 5 | val to_bigstring_shared : t -> Bigstring_shared.t 6 | val of_string : string -> t 7 | val of_bigstring_shared : Bigstring_shared.t -> t 8 | val length : t -> int 9 | 10 | (** Even though the underlying type includes an option, most users should not have to 11 | think about the difference between [Some ""] and [None]. You can use the [Expert] 12 | module to deal with the optionality, if need be. *) 13 | module Expert : sig 14 | val to_bigstring_shared_option : t -> Bigstring_shared.t option 15 | val of_bigstring_shared_option : Bigstring_shared.t option -> t 16 | end 17 | 18 | module Stable : sig 19 | module V1 : sig 20 | type nonrec t = t [@@deriving equal] 21 | 22 | include Stable_without_comparator with type t := t 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /kernel/src/email_simple.mli: -------------------------------------------------------------------------------- 1 | include Email_simple_intf.Email_simple (** @inline *) 2 | -------------------------------------------------------------------------------- /kernel/src/email_simple_intf.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | module type Mimetype = sig 4 | type t = private string [@@deriving compare, sexp_of] 5 | 6 | val text : t 7 | val text_utf8 : t 8 | val html : t 9 | val html_utf8 : t 10 | val pdf : t 11 | val jpg : t 12 | val png : t 13 | val csv : t 14 | val multipart_mixed : t 15 | val multipart_related : t 16 | val multipart_alternative : t 17 | val of_string : string -> t 18 | val equal : t -> t -> bool 19 | val arg_type : t Command.Arg_type.t 20 | val from_filename : string -> t 21 | val from_extension : string -> t 22 | val to_extension : t -> string option 23 | val guess_encoding : t -> Octet_stream.Encoding.known 24 | end 25 | 26 | module type Content = sig 27 | type attachment_name 28 | 29 | module Mimetype : T 30 | 31 | type t = private Email.t [@@deriving sexp_of] 32 | 33 | val of_email : Email.t -> t 34 | 35 | val create_custom 36 | : content_type:Mimetype.t 37 | -> ?encoding:Octet_stream.Encoding.known 38 | -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list 39 | -> string 40 | -> t 41 | 42 | val create 43 | : content_type:Mimetype.t 44 | -> ?encoding:Octet_stream.Encoding.known 45 | -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list 46 | -> string 47 | -> t 48 | [@@deprecated "[since 2019-08] Renamed to [create_custom]"] 49 | 50 | val html_utf8 51 | : ?encoding:Octet_stream.Encoding.known (** default: `Quoted_printable *) 52 | -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list 53 | -> string 54 | -> t 55 | 56 | val html 57 | : ?encoding:Octet_stream.Encoding.known (** default: `Quoted_printable *) 58 | -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list 59 | -> string 60 | -> t 61 | [@@deprecated "[since 2019-08] Please specify the charset, e.g. [html_utf8]"] 62 | 63 | val text_utf8 64 | : ?encoding:Octet_stream.Encoding.known (** default: `Quoted_printable *) 65 | -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list 66 | -> string 67 | -> t 68 | 69 | val text 70 | : ?encoding:Octet_stream.Encoding.known (** default: `Quoted_printable *) 71 | -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list 72 | -> string 73 | -> t 74 | [@@deprecated "[since 2019-08] Please specify the charset, e.g. [text_utf8]"] 75 | 76 | (** Plain text e-mail that also includes an html version so it's displayed monospace in 77 | gmail. 78 | 79 | By default, we add some custom styling to disable the line-wrap formatting rule 80 | which gmail uses. To disable this behavior, supply [~force_no_line_wrap:false]. *) 81 | val text_monospace_utf8 82 | : ?extra_headers:(Headers.Name.t * Headers.Value.t) list 83 | -> ?force_no_line_wrap:bool (** default: true *) 84 | -> string 85 | -> t 86 | 87 | val text_monospace 88 | : ?extra_headers:(Headers.Name.t * Headers.Value.t) list 89 | -> ?force_no_line_wrap:bool (** default: true *) 90 | -> string 91 | -> t 92 | [@@deprecated "[since 2019-08] Please specify the charset, e.g. [text_monospace_utf8]"] 93 | 94 | (** Combine 2 or more contents as alternative versions. List should be sorted from worst 95 | to best. *) 96 | val alternatives : ?extra_headers:(Headers.Name.t * Headers.Value.t) list -> t list -> t 97 | 98 | (** Combine 2 or more contents that should be bundled together *) 99 | val mixed : ?extra_headers:(Headers.Name.t * Headers.Value.t) list -> t list -> t 100 | 101 | (** Add related resources (e.g. inline images). You can reference them using 102 | 'cid:$[{attachment_name}]' in the content. To attach files you should use 103 | [create ~attachments] *) 104 | val with_related 105 | : ?extra_headers:(Headers.Name.t * Headers.Value.t) list 106 | -> resources:(attachment_name * t) list 107 | -> t 108 | -> t 109 | 110 | val content_type : t -> Mimetype.t 111 | 112 | (** The Content-ID of the content *) 113 | val related_part_cid : t -> attachment_name option 114 | 115 | val all_related_parts : t -> (attachment_name * t) list 116 | val find_related : t -> attachment_name -> t option 117 | 118 | (** [content] and [parts] return [None] if the email doesn't properly parse. They also 119 | return [None] if the message has content type "message/rfc822" *) 120 | val content : t -> Octet_stream.t option 121 | 122 | val parts : t -> t list option 123 | 124 | (** Get the alternative versions available. If the message is not of content type 125 | "multipart/alternative" then return a singleton list. *) 126 | val alternative_parts : t -> t list 127 | 128 | (** Get the 'inline' parts, This expands "Content-Type: multipart/[{mixed,related}]", 129 | stripping out any attachment parts. multipart/alternative is not expanded *) 130 | val inline_parts : t -> t list 131 | 132 | val content_disposition : t -> [ `Inline | `Attachment of string ] 133 | val attachment_name : t -> string option 134 | end 135 | 136 | module type Expert = sig 137 | type t 138 | type attachment_name 139 | 140 | module Mimetype : T 141 | module Content : T 142 | 143 | val create_raw 144 | : from:string (** defaults to *) 145 | -> to_:string list 146 | -> ?cc:string list 147 | -> ?reply_to:string 148 | -> subject:string 149 | -> id:string 150 | -> ?in_reply_to:string 151 | -> date:string 152 | -> ?auto_generated:unit 153 | -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list 154 | -> ?attachments:(attachment_name * Content.t) list 155 | -> Content.t 156 | -> t 157 | 158 | val content 159 | : normalize_headers:Headers.Normalize.encode 160 | -> extra_headers:(Headers.Name.t * Headers.Value.t) list 161 | -> encoding:Octet_stream.Encoding.known 162 | -> string 163 | -> t 164 | 165 | val multipart 166 | : normalize_headers:Headers.Normalize.encode 167 | -> content_type:Mimetype.t 168 | -> extra_headers:(Headers.Name.t * Headers.Value.t) list 169 | -> t list 170 | -> t 171 | end 172 | 173 | module type Stable = sig 174 | module Content : sig 175 | type latest 176 | 177 | module V1 : sig 178 | type t = latest [@@deriving bin_io, sexp] 179 | end 180 | end 181 | 182 | module Mimetype : sig 183 | type latest 184 | 185 | module V1 : Stable_without_comparator with type t = latest 186 | end 187 | end 188 | 189 | module type Email_simple = sig 190 | module Mimetype : Mimetype 191 | 192 | type attachment_name = string [@@deriving sexp_of] 193 | 194 | module Content : 195 | Content with module Mimetype := Mimetype and type attachment_name := attachment_name 196 | 197 | type t = Email.t [@@deriving sexp_of] 198 | 199 | val create 200 | : from:Email_address.t (** defaults to *) 201 | -> to_:Email_address.t list 202 | -> ?cc:Email_address.t list 203 | -> ?reply_to:Email_address.t 204 | -> subject:string 205 | -> id:string 206 | -> ?in_reply_to:string 207 | -> date_string:string 208 | -> ?auto_generated:unit 209 | -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list 210 | -> ?attachments:(attachment_name * Content.t) list 211 | -> Content.t 212 | -> t 213 | 214 | val from : t -> Email_address.t option 215 | val to_ : t -> Email_address.t list option 216 | val cc : t -> Email_address.t list option 217 | val subject : t -> string option 218 | val id : t -> string option 219 | 220 | (** [extract_body ?content_type t] returns the body associated with the email part that 221 | matches the [content_type] mimetype, or none if [t] does not contain a body or part 222 | of type [content_type]. *) 223 | val extract_body 224 | : ?content_type:Mimetype.t (** default: [Mimetype.text] *) 225 | -> t 226 | -> string option 227 | 228 | (** [extract_body_ext] and [extract_body_ext'] Attempt to find the message body in the 229 | preferred format. 230 | 231 | [accept] is used to select only content parts in a format that is supported. If a 232 | multipart component is accepted by [accept], we do NOT recursively extract the body. 233 | We do so otherwise. 234 | 235 | [order] will be used to select the least part (by default the first part) *) 236 | val extract_body_ext' 237 | : accept:((Mimetype.t * (string * string option) list) option -> 'format option) 238 | -> t 239 | -> ('format * string) Sequence.t 240 | 241 | val extract_body_ext 242 | : accept:((Mimetype.t * (string * string option) list) option -> 'format option) 243 | -> ?order:('format -> 'format -> int) 244 | -> t 245 | -> ('format * string) option 246 | 247 | (** Related parts are those that are included in a multi-part message with a 248 | "Content-ID" header. This content can be referenced by adding the "cid:" prefix and 249 | stripping the enclosing '<' and '>'. 250 | 251 | For example (from https://tools.ietf.org/html/rfc2392): 252 | 253 | {v 254 | From: foo1@bar.net 255 | To: foo2@bar.net 256 | Subject: A simple example 257 | Mime-Version: 1.0 258 | Content-Type: multipart/related; boundary="boundary-example-1"; type=Text/HTML 259 | --boundary-example 1 260 | Content-Type: Text/HTML; charset=US-ASCII 261 | 262 | to the other body part, for example through a statement such as: 263 | IETF logo 264 | 265 | --boundary-example-1 266 | 267 | Content-ID: 268 | Content-Type: IMAGE/GIF 269 | Content-Transfer-Encoding: BASE64 270 | 271 | R0lGODlhGAGgAPEAAP/////ZRaCgoAAAACH+PUNvcHlyaWdodCAoQykgMTk5 272 | NSBJRVRGLiBVbmF1dGhvcml6ZWQgZHVwbGljYXRpb24gcHJvaGliaXRlZC4A 273 | etc... 274 | 275 | --boundary-example-1-- 276 | v} 277 | 278 | Calling [all_related_parts] on this email would return a list of length one where 279 | the [attachment_name] is "foo4*foo1@bar.net" for the single entry. 280 | 281 | Related parts are often used for inline images. *) 282 | val all_related_parts : t -> (attachment_name * Content.t) list 283 | 284 | val find_related : t -> attachment_name -> Content.t option 285 | val inline_parts : t -> Content.t list 286 | 287 | module Expert : 288 | Expert 289 | with module Mimetype := Mimetype 290 | and module Content := Content 291 | and type t := t 292 | and type attachment_name := attachment_name 293 | 294 | module Stable : 295 | Stable with type Content.latest := Content.t and type Mimetype.latest := Mimetype.t 296 | end 297 | -------------------------------------------------------------------------------- /kernel/src/encoded_word.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | open Angstrom 3 | 4 | module Charset = struct 5 | (* The following might not be an exhaustive list. We can add to this as we encounter 6 | more cases. *) 7 | type t = 8 | [ `Ascii 9 | | `Big5 10 | | `GB2312 11 | | `Latin1 12 | | `Latin2 13 | | `Utf8 14 | | `Windows1252 15 | | `KS_C_5601_1987 16 | ] 17 | [@@deriving sexp_of, enumerate, compare, equal] 18 | 19 | let to_string = function 20 | | `Ascii -> "US-ASCII" 21 | | `Big5 -> "BIG5" 22 | | `GB2312 -> "GB2312" 23 | | `Latin1 -> "ISO-8859-1" 24 | | `Latin2 -> "ISO-8859-2" 25 | | `Utf8 -> "UTF-8" 26 | | `Windows1252 -> "WINDOWS-1252" 27 | | `KS_C_5601_1987 -> "KS_C_5601-1987" 28 | ;; 29 | end 30 | 31 | module Let_syntax = struct 32 | let bind t ~f = t >>= f 33 | let map t ~f = t >>| f 34 | let both a b = lift2 Tuple2.create a b 35 | end 36 | 37 | let ws = take_while1 Char.is_whitespace 38 | 39 | let charset_parser charsets = 40 | List.map charsets ~f:(fun charset -> 41 | string_ci (Charset.to_string charset) >>| const charset) 42 | |> choice 43 | ;; 44 | 45 | let encoding : [ `Base64 | `Quoted_printable ] Angstrom.t = 46 | choice [ string_ci "B" >>| const `Base64; string_ci "Q" >>| const `Quoted_printable ] 47 | ;; 48 | 49 | let parser_ : charset_parser:'a Angstrom.t -> ('a * string) Angstrom.t = 50 | fun ~charset_parser -> 51 | let%bind () = string "=?" >>| ignore 52 | and charset = charset_parser 53 | and () = string "?" >>| ignore 54 | and encoding 55 | and () = string "?" >>| ignore 56 | and data = 57 | take_while (function 58 | | '?' -> false 59 | | c -> (not (Char.is_whitespace c)) && Char.is_print c) 60 | and () = string "?=" >>| ignore in 61 | let%bind data = 62 | match encoding with 63 | | `Quoted_printable -> 64 | (* RFC2047 deviates slightly from common quoted printable. 65 | In particular 66 | 4.2(2) - Underscore may be used to encode space, and 67 | 4.2(3)- underscore must be encoded. 68 | This substituion handles that decoding step. *) 69 | let data = String.substr_replace_all data ~pattern:"_" ~with_:" " in 70 | let data_bstr, _ = 71 | Quoted_printable_lexer.decode_quoted_printable 72 | (String.length data) 73 | (Lexing.from_string data) 74 | in 75 | return (Bigbuffer.contents data_bstr) 76 | | `Base64 -> 77 | (match Base64.decode data with 78 | | Ok data -> return data 79 | | Error (`Msg msg) -> fail msg) 80 | in 81 | return (charset, data) 82 | ;; 83 | 84 | let parser_many 85 | : charset_parser:'a Angstrom.t 86 | -> [ `Encoded of 'a * string | `Plain of string ] list Angstrom.t 87 | = 88 | fun ~charset_parser -> 89 | many 90 | (choice 91 | [ (let%map hd = parser_ ~charset_parser 92 | and tl = 93 | (* RFC2047 6.2 When displaying a particular header field that contains 94 | multiple 'encoded-word's, any 'linear-white-space' that separates a 95 | pair of adjacent 'encoded-word's is ignored. *) 96 | many 97 | (let%map (_ : string) = option "" ws 98 | and res = parser_ ~charset_parser in 99 | `Encoded res) 100 | in 101 | `Encoded hd :: tl) 102 | ; (let%map c = 103 | choice 104 | [ take_while1 (function 105 | | '=' -> false 106 | | c -> not (Char.is_whitespace c)) 107 | ; string "=" 108 | (* Collapse Line breaks as per 109 | RFC822 - 3.1.1 Unfolding is accomplished by regarding CRLF immediately 110 | followed by an LWSP-char as equivalent to the LWSP-char. 111 | RFC822 - 3.1.3 Rules of (un)folding apply to these (unstructured) fields *) 112 | ; (let%bind (_ : string) = choice [ string "\r\n"; string "\n" ] in 113 | ws) 114 | (* The RFC is ambiguous on what should happen if there is a lone CRLF, so we 115 | ignore those, and treat these as regular white space. The RFC is also 116 | ambiguous on how to treat multiple consecutive whitespaces, so we do the 117 | conservative thing and leave them exactly as is. *) 118 | ; ws 119 | ] 120 | in 121 | [ `Plain c ]) 122 | ]) 123 | >>| List.concat 124 | ;; 125 | 126 | let decode_with_charset ?(charsets = Charset.all) str = 127 | Angstrom.parse_string 128 | ~consume:Prefix 129 | (parser_many ~charset_parser:(charset_parser charsets)) 130 | str 131 | |> Result.map_error ~f:Error.of_string 132 | ;; 133 | 134 | let decode ?(charsets = Charset.all) str = 135 | let%map.Or_error chunks = decode_with_charset ~charsets str in 136 | List.map chunks ~f:(function 137 | | `Plain str -> str 138 | | `Encoded ((_charset : Charset.t), str) -> str) 139 | |> String.concat ~sep:"" 140 | ;; 141 | 142 | let decode_with_raw_charset str = 143 | Angstrom.parse_string 144 | ~consume:Prefix 145 | (parser_many 146 | ~charset_parser: 147 | (let%map charset = 148 | take_while (function 149 | | ' ' 150 | | '(' 151 | | ')' 152 | | '<' 153 | | '>' 154 | | '@' 155 | | ',' 156 | | ';' 157 | | ':' 158 | | '\\' 159 | | '"' 160 | | '|' 161 | | '[' 162 | | ']' 163 | | '?' 164 | | '.' 165 | | '=' -> 166 | (* List from https://datatracker.ietf.org/doc/html/rfc2047 + 167 | https://www.rfc-editor.org/errata/eid506 *) 168 | false 169 | | c -> Char.is_print c) 170 | in 171 | `Charset charset)) 172 | str 173 | |> Result.map_error ~f:Error.of_string 174 | ;; 175 | -------------------------------------------------------------------------------- /kernel/src/encoded_word.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | module Charset : sig 4 | type t = 5 | [ `Ascii 6 | | `Big5 7 | | `GB2312 8 | | `Latin1 9 | | `Latin2 10 | | `Utf8 11 | | `Windows1252 12 | | `KS_C_5601_1987 13 | ] 14 | [@@deriving compare, sexp_of] 15 | 16 | val all : t list 17 | end 18 | 19 | (** Like [decode_with_charset], but completely ignores all charset information and simply 20 | concats all of the bytes together. *) 21 | val decode : ?charsets:Charset.t list -> string -> string Or_error.t 22 | 23 | (** Decodes words encoded as per: https://tools.ietf.org/html/rfc2047 *) 24 | val decode_with_charset 25 | : ?charsets:Charset.t list 26 | -> string 27 | -> [ `Plain of string | `Encoded of Charset.t * string ] list Or_error.t 28 | 29 | (** Like [decode_with_charset], but supports arbitrary charsets, not just the ones in 30 | [Charset.t]. *) 31 | val decode_with_raw_charset 32 | : string 33 | -> [ `Plain of string | `Encoded of [ `Charset of string ] * string ] list Or_error.t 34 | -------------------------------------------------------------------------------- /kernel/src/headers.ml: -------------------------------------------------------------------------------- 1 | module Stable = struct 2 | open Core.Core_stable 3 | 4 | module Name = struct 5 | module V1 = struct 6 | type t = string [@@deriving bin_io, compare, hash, sexp, equal] 7 | end 8 | end 9 | 10 | module Value = struct 11 | module V1 = struct 12 | type t = string [@@deriving bin_io, compare, hash, sexp] 13 | end 14 | end 15 | 16 | module V1 = struct 17 | type t = (Name.V1.t * string) list [@@deriving bin_io, compare, hash, sexp, equal] 18 | end 19 | end 20 | 21 | open Core 22 | 23 | module Normalize = struct 24 | type encode = 25 | [ `None (* Leave whitespace unchanged *) 26 | | `Whitespace (* Cleanup leading and trailing whitespace on each line *) 27 | ] 28 | [@@deriving sexp_of] 29 | 30 | type decode = 31 | [ encode 32 | | `Whitespace_and_encoding of 33 | [ `Any_charset | `Only of Encoded_word.Charset.t list ] 34 | * [ `Pretend_all_charsets_are_same ] 35 | ] 36 | [@@deriving sexp_of] 37 | 38 | let default : [> `Whitespace ] = `Whitespace 39 | end 40 | 41 | module Name : sig 42 | type t = string [@@deriving sexp_of, compare, hash, equal] 43 | 44 | val of_string : string -> t 45 | val to_string : t -> string 46 | 47 | include Comparable.S_plain with type t := t 48 | include Hashable.S_plain with type t := t 49 | 50 | val is : t -> string -> bool 51 | end = struct 52 | include Mimestring.Case_insensitive 53 | 54 | let to_string str = str 55 | let is = equal_string 56 | end 57 | 58 | module Value : sig 59 | type t = string [@@deriving sexp_of, compare, hash] 60 | 61 | val of_string : ?normalize:Normalize.decode -> string -> t 62 | val to_string : ?normalize:Normalize.encode -> t -> string 63 | val to_string' : ?normalize:Normalize.decode -> t -> string 64 | 65 | include Comparable.S_plain with type t := t 66 | include Hashable.S_plain with type t := t 67 | end = struct 68 | include String 69 | 70 | let normalize_string str = 71 | (* From the RFC (https://datatracker.ietf.org/doc/html/rfc822#section-3.1.1), newlines 72 | followed by whitespace should be replace by just the whitespace character. 73 | Stripping the lines isn't quite obeying the standard but matches what we've done 74 | historically and some emails probably have excessive whitespace. *) 75 | String.split_lines str 76 | |> List.map ~f:String.strip 77 | |> List.filter ~f:(Fn.non String.is_empty) 78 | |> String.concat ~sep:" " 79 | ;; 80 | 81 | let of_string ?(normalize = Normalize.default) str = 82 | match normalize with 83 | | `None -> str 84 | | `Whitespace -> normalize_string str 85 | | `Whitespace_and_encoding (charsets, `Pretend_all_charsets_are_same) -> 86 | let normalized = normalize_string str in 87 | let charsets = 88 | match charsets with 89 | | `Any_charset -> Encoded_word.Charset.all 90 | | `Only charsets -> charsets 91 | in 92 | (match Encoded_word.decode ~charsets normalized with 93 | | Ok str -> str 94 | | Error _ -> normalized) 95 | ;; 96 | 97 | let to_string ?(normalize = Normalize.default) str = 98 | match normalize with 99 | | `None -> str 100 | | `Whitespace -> 101 | " " 102 | ^ (String.split_lines str |> List.map ~f:String.strip |> String.concat ~sep:"\n ") 103 | ;; 104 | 105 | let to_string' ?(normalize = Normalize.default) str = 106 | match normalize with 107 | | #Normalize.encode as normalize -> to_string ~normalize str 108 | | `Whitespace_and_encoding (_charsets, _armour) -> 109 | to_string ~normalize:`Whitespace str 110 | ;; 111 | 112 | let%expect_test "normalize_string" = 113 | let values = [ "value"; " value"; " \n value"; "\nvalue" ] in 114 | List.iter values ~f:(fun value -> normalize_string value |> print_endline); 115 | [%expect 116 | {| 117 | value 118 | value 119 | value 120 | value 121 | |}] 122 | ;; 123 | end 124 | 125 | module Common = struct 126 | let subject = "Subject" 127 | let to_ = "To" 128 | let cc = "Cc" 129 | let bcc = "Bcc" 130 | let from = "From" 131 | let date = "Date" 132 | let message_id = "Message-ID" 133 | let list_id = "list-Id" 134 | end 135 | 136 | type t = (Name.t * string) list [@@deriving sexp_of, compare, hash, equal] 137 | 138 | let to_string_monoid ?(eol = `LF) t = 139 | List.map t ~f:(fun (name, value) -> 140 | String_monoid.concat_string [ (name :> string); ":"; value; Lf_or_crlf.to_string eol ]) 141 | |> String_monoid.concat 142 | ;; 143 | 144 | let to_string ?eol t = String_monoid.to_string (to_string_monoid ?eol t) 145 | let length = List.length 146 | let empty = [] 147 | let append = List.append 148 | 149 | (* Accessors *) 150 | let last ?normalize t name = 151 | let name = Name.of_string name in 152 | List.fold t ~init:None ~f:(fun r (k, v) -> if Name.equal name k then Some v else r) 153 | |> Option.map ~f:(Value.of_string ?normalize) 154 | ;; 155 | 156 | let any ?normalize t name = 157 | let name = Name.of_string name in 158 | List.Assoc.find t name ~equal:Name.equal |> Option.map ~f:(Value.of_string ?normalize) 159 | ;; 160 | 161 | let find_all ?normalize t name = 162 | let name = Name.of_string name in 163 | List.filter_map t ~f:(fun (name', value) -> 164 | if Name.equal name name' then Some (Value.of_string ?normalize value) else None) 165 | ;; 166 | 167 | (* Modify *) 168 | let of_list ~normalize : _ -> t = 169 | List.map ~f:(fun (name, value) -> 170 | let name = Name.of_string name in 171 | let value = Value.to_string ~normalize value in 172 | name, value) 173 | ;; 174 | 175 | let to_list ?normalize : t -> _ = 176 | List.map ~f:(fun (name, value) -> name, Value.of_string ?normalize value) 177 | ;; 178 | 179 | let add ?normalize t ~name ~value = 180 | let name = Name.of_string name in 181 | let value = Value.to_string ?normalize value in 182 | let rec add acc = function 183 | | (name', _) :: _ as fields when Name.equal name name' -> 184 | List.rev acc @ [ name, value ] @ fields 185 | | field :: fields -> add (field :: acc) fields 186 | | [] -> (name, value) :: t 187 | in 188 | add [] t 189 | ;; 190 | 191 | let add_if_missing ?normalize t ~name ~value = 192 | if List.Assoc.mem t ~equal:Name.equal name then t else add ?normalize t ~name ~value 193 | ;; 194 | 195 | let set ?normalize t ~name ~value = 196 | let name = Name.of_string name in 197 | let value = Value.to_string ?normalize value in 198 | let rec set acc = function 199 | | (name', _) :: fields when Name.equal name name' -> 200 | List.rev acc @ [ name, value ] @ fields 201 | | field :: fields -> set (field :: acc) fields 202 | | [] -> (name, value) :: t 203 | in 204 | set [] t 205 | ;; 206 | 207 | let add_at_bottom ?normalize t ~name ~value = 208 | List.rev (add ?normalize (List.rev t) ~name ~value) 209 | ;; 210 | 211 | let add_at_bottom_if_missing ?normalize t ~name ~value = 212 | if List.Assoc.mem t ~equal:Name.equal name 213 | then t 214 | else add_at_bottom ?normalize t ~name ~value 215 | ;; 216 | 217 | let set_at_bottom ?normalize t ~name ~value = 218 | List.rev (set ?normalize (List.rev t) ~name ~value) 219 | ;; 220 | 221 | let add_all ?normalize t ts : t = 222 | List.fold 223 | ~init:t 224 | ~f:(fun t (name, value) -> add ?normalize t ~name ~value) 225 | (List.rev ts) 226 | ;; 227 | 228 | let add_all_at_bottom ?normalize t ts = 229 | List.fold ~init:t ~f:(fun t (name, value) -> add_at_bottom ?normalize t ~name ~value) ts 230 | ;; 231 | 232 | let filter ?normalize t ~f = 233 | List.filter t ~f:(fun (name, value) -> 234 | f ~name ~value:(Value.of_string ?normalize value)) 235 | ;; 236 | 237 | let map' ?normalize t ~f = 238 | List.map t ~f:(fun ((name : Name.t), (value_raw : string)) -> 239 | let value = Value.of_string ?normalize value_raw in 240 | let name', value' = f ~name ~value in 241 | let value = 242 | if String.equal (value :> string) value' 243 | then value_raw 244 | else Value.to_string' ?normalize value' 245 | in 246 | name', value) 247 | ;; 248 | 249 | let map ?normalize t ~f = map' ?normalize t ~f:(fun ~name ~value -> name, f ~name ~value) 250 | 251 | let smash_and_add ?normalize t ~name ~value = 252 | let values = 253 | find_all 254 | ?normalize:((normalize : Normalize.encode option) :> Normalize.decode option) 255 | t 256 | name 257 | in 258 | let t = filter t ~f:(fun ~name:name' ~value:_ -> Name.(name <> name')) in 259 | let value = String.concat (values @ [ value ]) ~sep:", " in 260 | add_at_bottom ?normalize t ~name ~value 261 | ;; 262 | 263 | let names = List.map ~f:fst 264 | 265 | module%test _ = struct 266 | let t = of_list ~normalize:`None [ "A", "a1"; "B", "b1"; "B", "b2" ] 267 | let%test_unit _ = [%test_result: string] (to_string t) ~expect:"A:a1\nB:b1\nB:b2\n" 268 | 269 | let%test_unit _ = 270 | [%test_result: string] 271 | (add ~normalize:`None t ~name:"B" ~value:"b3" |> to_string) 272 | ~expect:"A:a1\nB:b3\nB:b1\nB:b2\n" 273 | ;; 274 | 275 | let%test_unit _ = 276 | [%test_result: string] 277 | (add ~normalize:`None t ~name:"B" ~value:"b3\nb3" |> to_string) 278 | ~expect:"A:a1\nB:b3\nb3\nB:b1\nB:b2\n" 279 | ;; 280 | 281 | let%test_unit _ = 282 | [%test_result: string] 283 | (add t ~name:"B" ~value:"b3" |> to_string) 284 | ~expect:"A:a1\nB: b3\nB:b1\nB:b2\n" 285 | ;; 286 | 287 | let%test_unit _ = 288 | [%test_result: string] 289 | (add t ~name:"B" ~value:"b3\nb3" |> to_string) 290 | ~expect:"A:a1\nB: b3\n b3\nB:b1\nB:b2\n" 291 | ;; 292 | 293 | let%test_unit _ = 294 | [%test_result: string] 295 | (add ~normalize:`None t ~name:"C" ~value:"c1" |> to_string) 296 | ~expect:"C:c1\nA:a1\nB:b1\nB:b2\n" 297 | ;; 298 | 299 | let%test_unit _ = 300 | [%test_result: string] 301 | (set ~normalize:`None t ~name:"B" ~value:"b3" |> to_string) 302 | ~expect:"A:a1\nB:b3\nB:b2\n" 303 | ;; 304 | 305 | let%test_unit _ = 306 | [%test_result: string] 307 | (set ~normalize:`None t ~name:"b" ~value:"b3" |> to_string) 308 | ~expect:"A:a1\nb:b3\nB:b2\n" 309 | ;; 310 | 311 | let%test_unit _ = 312 | [%test_result: string] 313 | (set ~normalize:`None t ~name:"C" ~value:"c1" |> to_string) 314 | ~expect:"C:c1\nA:a1\nB:b1\nB:b2\n" 315 | ;; 316 | 317 | let%test_unit _ = 318 | [%test_result: string] 319 | (set ~normalize:`None t ~name:"c" ~value:"c1" |> to_string) 320 | ~expect:"c:c1\nA:a1\nB:b1\nB:b2\n" 321 | ;; 322 | 323 | let%test_unit _ = 324 | [%test_result: string] 325 | (add_at_bottom ~normalize:`None t ~name:"A" ~value:"a2" |> to_string) 326 | ~expect:"A:a1\nA:a2\nB:b1\nB:b2\n" 327 | ;; 328 | 329 | let%test_unit _ = 330 | [%test_result: string] 331 | (add_at_bottom ~normalize:`None t ~name:"B" ~value:"b3" |> to_string) 332 | ~expect:"A:a1\nB:b1\nB:b2\nB:b3\n" 333 | ;; 334 | 335 | let%test_unit _ = 336 | [%test_result: string] 337 | (add_at_bottom ~normalize:`None t ~name:"C" ~value:"c1" |> to_string) 338 | ~expect:"A:a1\nB:b1\nB:b2\nC:c1\n" 339 | ;; 340 | 341 | let%test_unit _ = 342 | [%test_result: string] 343 | (set_at_bottom ~normalize:`None t ~name:"B" ~value:"b3" |> to_string) 344 | ~expect:"A:a1\nB:b1\nB:b3\n" 345 | ;; 346 | 347 | let%test_unit _ = 348 | [%test_result: string] 349 | (set_at_bottom ~normalize:`None t ~name:"C" ~value:"c1" |> to_string) 350 | ~expect:"A:a1\nB:b1\nB:b2\nC:c1\n" 351 | ;; 352 | end 353 | -------------------------------------------------------------------------------- /kernel/src/headers.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | (** [Normalize] specifies how to handle header values. It is used in two contexts: 4 | 5 | 1) Transport (Normalize.encode): Specify how to turn a string into a header value. 6 | [`Whitespace] will add the necessary for transport. 7 | 8 | 2) Processing (Normalize.decode): Specify how to turn a header value into a string. 9 | [`Whitespace] will remove all leading and trailing whitespace on each line in order 10 | to cleanly process the value. *) 11 | module Normalize : sig 12 | type encode = 13 | [ `None (** Leave whitespace unchanged *) 14 | | `Whitespace (** Cleanup leading and trailing whitespace on each line *) 15 | ] 16 | [@@deriving sexp_of] 17 | 18 | type decode = 19 | [ encode 20 | | `Whitespace_and_encoding of 21 | [ `Any_charset | `Only of Encoded_word.Charset.t list ] 22 | * [ `Pretend_all_charsets_are_same ] 23 | (** Will attempt to handle decoded words. [`Any_charset] will cause all known 24 | charsets to be handled. [`Only charsets] will restrict the parsing to only the 25 | given charsets. [`Pretend_all_charsets_are_same] will not actually do any 26 | charset conversion and just copies the raw bytes. For ascii-compatible encodings 27 | this often works well-enough, though you do end up with garbage for any 28 | characters not in the ascii-plane. *) 29 | ] 30 | [@@deriving sexp_of] 31 | 32 | val default : [> `Whitespace ] 33 | end 34 | 35 | module Name : sig 36 | (** Case insensitive *) 37 | 38 | type t = string [@@deriving sexp_of, compare, hash] 39 | 40 | val of_string : string -> t 41 | val to_string : t -> string 42 | 43 | include Comparable.S_plain with type t := t 44 | include Hashable.S_plain with type t := t 45 | 46 | (** Short hand for [let is a b = equal a (of_string b)] *) 47 | 48 | val is : t -> string -> bool 49 | end 50 | 51 | (** This is just a list of commonly used header field names for simple reuse *) 52 | module Common : sig 53 | val subject : string 54 | val to_ : string 55 | val cc : string 56 | val bcc : string 57 | val from : string 58 | val date : string 59 | val message_id : string 60 | val list_id : string 61 | end 62 | 63 | module Value : sig 64 | type t = string [@@deriving sexp_of, compare, hash] 65 | 66 | (** Normalize the whitespace for processing/ if [normalize == `None] this does nothing. 67 | if [normalize == `Whitespace] (default), strip leading/trailing whitespace on every 68 | line. *) 69 | val of_string : ?normalize:Normalize.decode -> string -> t 70 | 71 | (** Normalize the whitespace for transport (insert the appropriate leading space). if 72 | [normalize == `None] this does nothing. if [normalize == `Whitespace] (default), 73 | insert a leading space and indent subsequent lines with a tab (remove any other 74 | leading/trailing space on every line). *) 75 | val to_string : ?normalize:Normalize.encode -> t -> string 76 | 77 | include Comparable.S_plain with type t := t 78 | include Hashable.S_plain with type t := t 79 | end 80 | 81 | (*_ The add and set functions are same as in Field_list, except they add a space 82 | before the value. *) 83 | 84 | type t [@@deriving compare, hash, sexp_of, equal] 85 | 86 | (** [eol] defaults to `LF *) 87 | val to_string_monoid : ?eol:Lf_or_crlf.t -> t -> String_monoid.t 88 | 89 | val to_string : ?eol:Lf_or_crlf.t -> t -> string 90 | val empty : t 91 | val append : t -> t -> t 92 | val length : t -> int 93 | val of_list : normalize:Normalize.encode -> (Name.t * Value.t) list -> t 94 | val to_list : ?normalize:Normalize.decode -> t -> (Name.t * Value.t) list 95 | val last : ?normalize:Normalize.decode -> t -> Name.t -> Value.t option 96 | val any : ?normalize:Normalize.decode -> t -> Name.t -> Value.t option 97 | val find_all : ?normalize:Normalize.decode -> t -> Name.t -> Value.t list 98 | val names : t -> Name.t list 99 | val add : ?normalize:Normalize.encode -> t -> name:Name.t -> value:Value.t -> t 100 | val add_at_bottom : ?normalize:Normalize.encode -> t -> name:Name.t -> value:Value.t -> t 101 | val add_if_missing : ?normalize:Normalize.encode -> t -> name:Name.t -> value:Value.t -> t 102 | 103 | val add_at_bottom_if_missing 104 | : ?normalize:Normalize.encode 105 | -> t 106 | -> name:Name.t 107 | -> value:Value.t 108 | -> t 109 | 110 | val set : ?normalize:Normalize.encode -> t -> name:Name.t -> value:Value.t -> t 111 | val set_at_bottom : ?normalize:Normalize.encode -> t -> name:Name.t -> value:Value.t -> t 112 | val add_all : ?normalize:Normalize.encode -> t -> (Name.t * Value.t) list -> t 113 | val add_all_at_bottom : ?normalize:Normalize.encode -> t -> (Name.t * Value.t) list -> t 114 | 115 | (** If headers with this name already exist, concatenates the values for all separated by 116 | a comma, and appends the new value. Otherwise, creates a new header. *) 117 | val smash_and_add : ?normalize:Normalize.encode -> t -> name:Name.t -> value:Value.t -> t 118 | 119 | val filter 120 | : ?normalize:Normalize.decode 121 | -> t 122 | -> f:(name:Name.t -> value:Value.t -> bool) 123 | -> t 124 | 125 | (** rewrite header values, preserving original whitespace where possible. 126 | 127 | [normalize] is used to [Value.of_string ?normalize] the [~value] before passing to 128 | [f], and again to [Value.to_string ?normalize] the result. If the [~value] and 129 | [f ~name ~value] are the same no change will be made (white space is preserved). 130 | 131 | Particularly the following is an identity transform: 132 | [ map ~normalize:`Whitespace ~f:(fun ~name:_ ~value -> Value.of_string ~normalize:`Whitespace value) ]. 133 | By contrast the following will 'normalize' the whitespace on all headers. 134 | [ map ~normalize:`None ~f:(fun ~name:_ ~value -> Value.of_string ~normalize:`Whitespace value) ]. *) 135 | val map 136 | : ?normalize:Normalize.decode 137 | -> t 138 | -> f:(name:Name.t -> value:Value.t -> Value.t) 139 | -> t 140 | 141 | val map' 142 | : ?normalize:Normalize.decode 143 | -> t 144 | -> f:(name:Name.t -> value:Value.t -> Name.t * Value.t) 145 | -> t 146 | 147 | module Stable : sig 148 | module Name : sig 149 | module V1 : sig 150 | type t = Name.t [@@deriving equal] 151 | 152 | include Stable_without_comparator with type t := t 153 | end 154 | end 155 | 156 | module Value : sig 157 | module V1 : Stable_without_comparator with type t = Value.t 158 | end 159 | 160 | module V1 : sig 161 | type nonrec t = t [@@deriving equal] 162 | 163 | include Stable_without_comparator with type t := t 164 | end 165 | end 166 | -------------------------------------------------------------------------------- /kernel/src/lf_or_crlf.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | type t = 4 | [ `LF 5 | | `CRLF 6 | ] 7 | 8 | let to_string = function 9 | | `LF -> "\n" 10 | | `CRLF -> "\r\n" 11 | ;; 12 | -------------------------------------------------------------------------------- /kernel/src/lf_or_crlf.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | type t = 4 | [ `LF (** \n. Used to delineate new lines on most Unix systems. *) 5 | | `CRLF (** \r\n. Used to delineate new lines over the network. *) 6 | ] 7 | 8 | val to_string : t -> string 9 | -------------------------------------------------------------------------------- /kernel/src/magic_mime_external.ml: -------------------------------------------------------------------------------- 1 | module Magic_mime = Magic_mime 2 | module Mime_types = Mime_types 3 | -------------------------------------------------------------------------------- /kernel/src/media_type.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | module Params = struct 4 | type t = (Headers.Name.t * string) list [@@deriving sexp_of, compare] 5 | 6 | let last t name = 7 | let name = Headers.Name.of_string name in 8 | List.fold t ~init:None ~f:(fun r (k, v) -> 9 | if Headers.Name.equal name k then Some v else r) 10 | ;; 11 | 12 | let to_key_value_string (k, v) = 13 | Headers.Name.to_string k ^ "=" ^ Rfc.RFC2045.Token.is_valid_or_quote v 14 | ;; 15 | end 16 | 17 | type t = 18 | { mime_type : Rfc.RFC2045.Token.t 19 | ; mime_subtype : Rfc.RFC2045.Token.t 20 | ; params : Params.t 21 | } 22 | [@@deriving compare, sexp_of] 23 | 24 | let mime_type t = Rfc.RFC2045.Token.to_lowercase_string t.mime_type 25 | let mime_subtype t = Rfc.RFC2045.Token.to_lowercase_string t.mime_subtype 26 | 27 | let create mime_type mime_subtype = 28 | { mime_type = Rfc.RFC2045.Token.of_string mime_type 29 | ; mime_subtype = Rfc.RFC2045.Token.of_string mime_subtype 30 | ; params = [] 31 | } 32 | ;; 33 | 34 | let set_param t ~name ~value = 35 | { t with 36 | params = 37 | List.filter t.params ~f:(fun (k, _) -> not (Headers.Name.is name k)) 38 | @ [ name, value ] 39 | } 40 | ;; 41 | 42 | let is ?mime_type ?mime_subtype t = 43 | let module T = Rfc.RFC2045.Token in 44 | Option.value_map mime_type ~default:true ~f:(fun mime_type -> 45 | T.equal t.mime_type (T.of_string mime_type)) 46 | && Option.value_map mime_subtype ~default:true ~f:(fun mime_subtype -> 47 | T.equal t.mime_subtype (T.of_string mime_subtype)) 48 | ;; 49 | 50 | (* Some convenience functions for working with mime types *) 51 | let is_text t = is ~mime_type:"text" t 52 | let is_multipart t = is ~mime_type:"multipart" t 53 | let is_multipart_report = is ~mime_type:"multipart" ~mime_subtype:"report" 54 | let is_message_rfc822 = is ~mime_type:"message" ~mime_subtype:"rfc822" 55 | let is_digest t = is ~mime_type:"multipart" ~mime_subtype:"digest" t 56 | 57 | let multipart_boundary t = 58 | if is_multipart t 59 | then Option.map ~f:Boundary.of_string (Params.last t.params "boundary") 60 | else None 61 | ;; 62 | 63 | let set_multipart_boundary t boundary = 64 | set_param t ~name:"boundary" ~value:(Boundary.to_string boundary) 65 | ;; 66 | 67 | let of_grammar (mime_type, mime_subtype, params) = 68 | { mime_type = Rfc.RFC2045.Token.of_string mime_type 69 | ; mime_subtype = Rfc.RFC2045.Token.of_string mime_subtype 70 | ; params 71 | } 72 | ;; 73 | 74 | let of_string x = 75 | of_grammar 76 | (Media_type_grammar.content_type Media_type_lexer.content_type (Lexing.from_string x)) 77 | ;; 78 | 79 | let to_string t = 80 | String.concat 81 | ~sep:"; " 82 | ((Rfc.RFC2045.Token.to_lowercase_string t.mime_type 83 | ^ "/" 84 | ^ Rfc.RFC2045.Token.to_lowercase_string t.mime_subtype) 85 | :: List.map t.params ~f:Params.to_key_value_string) 86 | ;; 87 | 88 | let from_headers headers = 89 | Option.bind (Headers.last ~normalize:`None headers "Content-Type") ~f:(fun field -> 90 | Option.try_with (fun () -> of_string field)) 91 | ;; 92 | 93 | let set_headers headers t = 94 | match from_headers headers with 95 | | Some t' when [%compare.equal: t] t t' -> headers 96 | | _ -> Headers.set_at_bottom headers ~name:"Content-Type" ~value:(to_string t) 97 | ;; 98 | 99 | let message_rfc822 = create "message" "rfc822" 100 | 101 | let text_plain ?(charset = "us-ascii") () = 102 | create "text" "plain" |> set_param ~name:"charset" ~value:charset 103 | ;; 104 | 105 | let create_multipart mime_subtype ~boundary = 106 | set_multipart_boundary (create "multipart" mime_subtype) boundary 107 | ;; 108 | 109 | let default_default = text_plain () 110 | let default_digest = message_rfc822 111 | 112 | let default ?parent () = 113 | if Option.value_map parent ~f:is_digest ~default:false 114 | then default_digest 115 | else default_default 116 | ;; 117 | 118 | let%expect_test _ = 119 | let test t = 120 | let s = to_string t in 121 | print_endline s; 122 | let t' = of_string s in 123 | [%test_eq: t] t t' 124 | in 125 | test message_rfc822; 126 | [%expect {| message/rfc822 |}]; 127 | test (text_plain ()); 128 | [%expect {| text/plain; charset=us-ascii |}]; 129 | test (text_plain () ~charset:"nonsense charset"); 130 | [%expect {| text/plain; charset="nonsense charset" |}]; 131 | test default_default; 132 | [%expect {| text/plain; charset=us-ascii |}]; 133 | test default_digest; 134 | [%expect {| message/rfc822 |}]; 135 | test (create "application" "json"); 136 | [%expect {| application/json |}]; 137 | test (create_multipart "related" ~boundary:(Boundary.of_string "--::_BOUNDARY_::--")); 138 | [%expect {| multipart/related; boundary="--::_BOUNDARY_::--" |}]; 139 | test (create_multipart "related" ~boundary:(Boundary.of_string "boundary")); 140 | [%expect {| multipart/related; boundary=boundary |}]; 141 | test 142 | (create_multipart 143 | "related" 144 | ~boundary:(Boundary.of_string "questionable boundary string")); 145 | [%expect {| multipart/related; boundary="questionable boundary string" |}]; 146 | let test s = 147 | let t = of_string s in 148 | printf !"%{sexp:t}\n" t; 149 | test t 150 | in 151 | test {|text/plain|}; 152 | [%expect 153 | {| 154 | ((mime_type text) (mime_subtype plain) (params ())) 155 | text/plain 156 | |}]; 157 | test {|test/plain; charset="us-ascii"|}; 158 | [%expect 159 | {| 160 | ((mime_type test) (mime_subtype plain) (params ((charset us-ascii)))) 161 | test/plain; charset=us-ascii 162 | |}]; 163 | test 164 | {|multipart/related; 165 | boundary="--::FOO"|}; 166 | [%expect 167 | {| 168 | ((mime_type multipart) (mime_subtype related) (params ((boundary --::FOO)))) 169 | multipart/related; boundary="--::FOO" 170 | |}] 171 | ;; 172 | -------------------------------------------------------------------------------- /kernel/src/media_type.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | type t [@@deriving sexp_of, compare] 4 | 5 | val multipart_boundary : t -> Boundary.t option 6 | val set_multipart_boundary : t -> Boundary.t -> t 7 | val from_headers : Headers.t -> t option 8 | val set_headers : Headers.t -> t -> Headers.t 9 | val default : ?parent:t -> unit -> t 10 | 11 | (** [create type_ subtype] represents the mime-type "type_/subtype". *) 12 | val create : string -> string -> t 13 | 14 | val mime_type : t -> string 15 | val mime_subtype : t -> string 16 | val create_multipart : string -> boundary:Boundary.t -> t 17 | val message_rfc822 : t 18 | val text_plain : ?charset:string -> unit -> t 19 | val is_text : t -> bool 20 | val is_multipart : t -> bool 21 | val is_multipart_report : t -> bool 22 | val is_message_rfc822 : t -> bool 23 | val is_digest : t -> bool 24 | -------------------------------------------------------------------------------- /kernel/src/media_type_grammar.mly: -------------------------------------------------------------------------------- 1 | %{ 2 | open! Core 3 | 4 | %} 5 | 6 | /*(* Standard tokens *)*/ 7 | %token ERROR 8 | %token EOF 9 | 10 | %token STRING 11 | %token ATOM 12 | 13 | %token EQUALS 14 | %token SLASH 15 | %token SEMICOLON 16 | 17 | 18 | %start content_type 19 | %type content_type 20 | 21 | %% 22 | 23 | content_type : ctype SLASH csubtype param_list { ($1, $3, $4) } 24 | ; 25 | 26 | ctype : ATOM { $1 }; 27 | csubtype : ATOM { $1 }; 28 | 29 | /*(* Some implementations wrongfully add semicolons at the end of the Content-type field. 30 | This rule allows for it. 31 | *)*/ 32 | param_list : 33 | | param_list_aux EOF { $1 } 34 | | param_list_aux semicolon EOF { $1 } 35 | ; 36 | 37 | /*(* Be tolerant of repeated semicolons *)*/ 38 | semicolon : 39 | | SEMICOLON { () } 40 | | semicolon SEMICOLON { () } 41 | ; 42 | 43 | param_list_aux : 44 | { [] } 45 | | param_list_aux semicolon parameter { $3 :: $1 } 46 | ; 47 | 48 | parameter : attribute EQUALS value { ($1, $3) }; 49 | attribute : ATOM { $1 }; 50 | value : 51 | ATOM { $1 } 52 | | STRING { $1 } 53 | ; 54 | 55 | %% 56 | 57 | 58 | -------------------------------------------------------------------------------- /kernel/src/media_type_grammar_types.ml: -------------------------------------------------------------------------------- 1 | (* Field types *) 2 | type content_type = string * string * (string * string) list 3 | -------------------------------------------------------------------------------- /kernel/src/media_type_grammar_types.mli: -------------------------------------------------------------------------------- 1 | (** Field types *) 2 | 3 | type content_type = string * string * (string * string) list 4 | -------------------------------------------------------------------------------- /kernel/src/media_type_lexer.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val content_type : Lexing.lexbuf -> Media_type_grammar.token 4 | -------------------------------------------------------------------------------- /kernel/src/media_type_lexer.mll: -------------------------------------------------------------------------------- 1 | { 2 | open Media_type_grammar 3 | 4 | let unescape_staged = Core.String.Escaping.unescape ~escape_char:'\\';; 5 | let unescape = Core.unstage unescape_staged;; 6 | 7 | } 8 | (* Rules from RFCs. Some may need to be copied to specific places to parse 9 | * their individual parts *) 10 | 11 | (** RFC2234 - Core *) 12 | let cr = "\013" 13 | let lf = "\010" 14 | 15 | (* Deliberately chose to include bare CR and LF here, although the RFC suggests 16 | * them to be included as part of the text characters. 17 | * The rationale being that, when parsing e-mail from a text file, they will 18 | * probably mean CRLF. 19 | * The issue should not arise in conforming e-mails. 20 | *) 21 | let crlf_conforming = cr lf 22 | let crlf_non_conforming = crlf_conforming | lf 23 | let crlf = crlf_non_conforming 24 | 25 | let wsp = [' ' '\t'] 26 | 27 | (** RFC2822 3.2.1 -- Primitive tokens *) 28 | 29 | let no_ws_ctl = [ '\001'-'\008' '\011' '\012' '\014'-'\031' '\127'] 30 | 31 | (* See crlf *) 32 | let text_non_conforming = [ '\001'-'\009' '\011' '\012' '\014'-'\127' ] 33 | 34 | (** RFC2822 3.2.2 -- Quoted characters *) 35 | let obs_qp = "\\" [ '\000' - '\127' ] 36 | (* Allows for escaping of newlines *) 37 | let crlf_qp_non_conforming = "\\" crlf 38 | 39 | let quoted_pair = "\\" text_non_conforming | crlf_qp_non_conforming | obs_qp 40 | 41 | (** RFC2822 3.2.3 -- Folding whitespace and comments *) 42 | (* Obs: If this matches a CRLF, there's forcibly wsp afterwards *) 43 | let obs_fws = wsp + (crlf wsp +) * 44 | let fws = ((wsp * crlf) ? wsp + ) | obs_fws 45 | 46 | (** RFC2822 3.2.5 -- Quoted strings *) 47 | let qtext = no_ws_ctl | [ '\033' '\035'-'\091' '\093'-'\126' ] 48 | let qcontent = qtext | quoted_pair 49 | 50 | let quoted_string_contents = (fws ? qcontent) * fws ? 51 | (* 52 | let quoted_string = cfws ? '"' quoted_string_contents '"' cfws ? 53 | (** RFC2822 3.2.6 -- Miscellaneous tokens *) 54 | let word = atom | quoted_string 55 | 56 | let obs_phrase = word (word | "." | cfws) * 57 | let phrase = word + | obs_phrase 58 | *) 59 | 60 | (**********************************************************) 61 | 62 | (* RFC 2045 5.1 -- Syntax of the Content-type header field *) 63 | (* Removed space characters, control characters, those outsize US-ASCII and 64 | * tspecials *) 65 | let token_char = 66 | ([ ^ '\n' '\r' ' ' 67 | '(' ')' '<' '>' '@' ',' ';' ':' '\\' '"' '/' '[' ']' '?' '=' 68 | '\000' - '\031' '\127' ]) 69 | 70 | let token = token_char + 71 | 72 | rule 73 | 74 | (******************) 75 | (* HEADER PARSING *) 76 | (******************) 77 | (* These lexers are independent from the former, and work the way traditional 78 | lexers do, without a dispatcher or state handlers *) 79 | 80 | 81 | (** Allows other lexers to skip comments. 82 | 83 | Example usage: 84 | rule my_lexer = parse 85 | | '(' -> { my_lexer (comment 1 lexbuf) } 86 | | .. other patterns .. 87 | 88 | *) 89 | comment level = parse 90 | | '(' { comment (level + 1) lexbuf } 91 | | ')' fws * { 92 | if level <= 1 then 93 | begin 94 | assert (level = 1); lexbuf 95 | end 96 | else 97 | comment (level - 1) lexbuf } 98 | | '\\' ? _ { comment level lexbuf } 99 | and 100 | 101 | (* Parses a Content-type field *) 102 | content_type = parse 103 | | '(' { content_type (comment 1 lexbuf) } 104 | | fws { content_type lexbuf } 105 | | '"' (quoted_string_contents as str) '"' { STRING (unescape str) } 106 | | '/' { SLASH } 107 | | token as str { ATOM (str) } 108 | | '=' { EQUALS } 109 | | ';' { SEMICOLON } 110 | | _ as c { ERROR (Core.sprintf "Unexpected char: %c" c) } 111 | | eof { EOF } 112 | 113 | 114 | { 115 | 116 | } 117 | -------------------------------------------------------------------------------- /kernel/src/mimestring.ml: -------------------------------------------------------------------------------- 1 | open Core.Core_stable 2 | 3 | (** Case-insensitive strings *) 4 | module Case_insensitive = struct 5 | module Stable = struct 6 | module V1 = struct 7 | type t = String.V1.t [@@deriving sexp, bin_io] 8 | 9 | let compare = Core.String.Caseless.compare 10 | let comparator = Core.String.Caseless.comparator 11 | 12 | type comparator_witness = Core.String.Caseless.comparator_witness 13 | 14 | let hash = Core.String.Caseless.hash 15 | let hash_fold_t = Core.String.Caseless.hash_fold_t 16 | end 17 | end 18 | 19 | open Core 20 | include String.Caseless 21 | 22 | let of_string = Fn.id 23 | let to_string = Fn.id 24 | let to_lowercase_string = String.lowercase 25 | let equal_string = equal 26 | end 27 | 28 | open Core 29 | 30 | module type S = sig 31 | type t [@@deriving sexp] 32 | 33 | val of_string : string -> t 34 | val to_lowercase_string : t -> string 35 | val equal_string : t -> string -> bool 36 | 37 | include Comparable.S_plain with type t := t 38 | include Hashable.S_plain with type t := t 39 | end 40 | 41 | let quote_escape = 42 | unstage (String.Escaping.escape ~escapeworthy:[ '"'; '\\' ] ~escape_char:'\\') 43 | ;; 44 | 45 | let quote str = String.concat [ "\""; quote_escape str; "\"" ] 46 | -------------------------------------------------------------------------------- /kernel/src/mimestring.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | (*_ For usage in functors *) 4 | 5 | module type S = sig 6 | type t [@@deriving sexp] 7 | 8 | val of_string : string -> t 9 | val to_lowercase_string : t -> string 10 | val equal_string : t -> string -> bool 11 | 12 | include Comparable.S_plain with type t := t 13 | include Hashable.S_plain with type t := t 14 | end 15 | 16 | module Case_insensitive : sig 17 | include 18 | S 19 | with type t = string 20 | and type comparator_witness = String.Caseless.comparator_witness 21 | 22 | val to_string : t -> string 23 | 24 | module Stable : sig 25 | module V1 : sig 26 | type nonrec t = t [@@deriving sexp, bin_io, compare, hash] 27 | 28 | include 29 | Stable 30 | with type t := t 31 | and type comparator_witness = String.Caseless.comparator_witness 32 | end 33 | end 34 | end 35 | 36 | val quote : string -> string 37 | -------------------------------------------------------------------------------- /kernel/src/octet_stream.ml: -------------------------------------------------------------------------------- 1 | module Stable = struct 2 | open Core.Core_stable 3 | 4 | module Encoding = struct 5 | module V1 = struct 6 | type t = 7 | [ `Base64 8 | | `Bit7 9 | | `Bit8 10 | | `Binary 11 | | `Quoted_printable 12 | | `Unknown of string 13 | ] 14 | [@@deriving sexp, bin_io] 15 | end 16 | end 17 | 18 | module V1 = struct 19 | type t = 20 | { encoding : Encoding.V1.t 21 | ; content : Bigstring_shared.Stable.V1.t 22 | } 23 | [@@deriving sexp, bin_io] 24 | end 25 | end 26 | 27 | open Core 28 | 29 | module Encoding = struct 30 | (** Text or binary are the type of the plaintext. For Base64, if the mode is text, '\n' 31 | is turned into '\r\n' when encoding, and viceversa. *) 32 | type known = 33 | [ `Base64 34 | | `Bit7 35 | | `Bit8 36 | | `Binary 37 | | `Quoted_printable 38 | ] 39 | [@@deriving sexp_of, compare, hash] 40 | 41 | type t = 42 | (* Stable.Encoding.V1.t = *) 43 | [ known 44 | | `Unknown of string 45 | ] 46 | [@@deriving sexp_of, compare, hash] 47 | 48 | let of_string encoding = 49 | match encoding |> String.strip |> String.lowercase with 50 | | "base64" -> `Base64 51 | | "7bit" -> `Bit7 52 | | "8bit" -> `Bit8 53 | | "binary" -> `Binary 54 | | "quoted-printable" -> `Quoted_printable 55 | | unknown -> `Unknown unknown 56 | ;; 57 | 58 | let to_string = function 59 | | `Base64 -> "base64" 60 | | `Bit7 -> "7bit" 61 | | `Bit8 -> "8bit" 62 | | `Binary -> "binary" 63 | | `Quoted_printable -> "quoted-printable" 64 | | `Unknown unknown -> unknown 65 | ;; 66 | 67 | let default = `Bit7 68 | let default' = `Bit7 69 | 70 | let of_headers ?(ignore_base64_for_multipart = true) headers = 71 | Headers.last headers "content-transfer-encoding" 72 | |> Option.map ~f:of_string 73 | |> function 74 | | Some `Base64 when ignore_base64_for_multipart -> 75 | let is_multipart = 76 | match Media_type.from_headers headers with 77 | | Some media_type -> Media_type.is_multipart media_type 78 | | None -> false 79 | in 80 | if is_multipart then Some default else Some `Base64 81 | | _ as encoding -> encoding 82 | ;; 83 | 84 | let of_headers_or_default ?ignore_base64_for_multipart headers = 85 | match of_headers ?ignore_base64_for_multipart headers with 86 | | Some t -> t 87 | | None -> default 88 | ;; 89 | end 90 | 91 | type t = Stable.V1.t = 92 | { encoding : Encoding.t 93 | ; content : Bigstring_shared.t 94 | } 95 | [@@deriving sexp_of, compare, hash] 96 | 97 | let encoding t = t.encoding 98 | let encoded_contents t = t.content 99 | let encoded_contents_string t = Bigstring_shared.to_string (encoded_contents t) 100 | let of_bigstring_shared ~encoding content = { encoding; content } 101 | 102 | let of_string ~encoding str = 103 | of_bigstring_shared ~encoding (Bigstring_shared.of_string str) 104 | ;; 105 | 106 | let empty = of_bigstring_shared ~encoding:Encoding.default Bigstring_shared.empty 107 | 108 | module Identity = struct 109 | let encode bstr = bstr 110 | let decode bstr = bstr 111 | end 112 | 113 | module Base64 = struct 114 | let buffer_size = 1024 115 | let empty_bytes = Bytes.create 0 116 | 117 | let decode (src : Bigstring_shared.t) = 118 | let src = (src :> Bigstring.t) in 119 | let dst = Bigbuffer.create ((Bigstring.length src + 3) / 4 * 3) in 120 | let buffer = Bytes.create buffer_size in 121 | let decoder = Base64_rfc2045.decoder `Manual in 122 | let rec loop ~pos = 123 | match Base64_rfc2045.decode decoder with 124 | | `Await -> 125 | if pos = Bigstring.length src 126 | then ( 127 | (* Signal end of input. *) 128 | Base64_rfc2045.src decoder empty_bytes 0 0; 129 | loop ~pos) 130 | else ( 131 | let len = Int.min (Bigstring.length src - pos) buffer_size in 132 | Bigstring.To_bytes.blit ~src ~src_pos:pos ~dst:buffer ~dst_pos:0 ~len; 133 | Base64_rfc2045.src decoder buffer 0 len; 134 | loop ~pos:(pos + len)) 135 | | `Wrong_padding -> 136 | (* Ignore padding issues. *) 137 | loop ~pos 138 | | `End -> Bigstring_shared.of_bigbuffer_volatile dst 139 | | `Flush str -> 140 | Bigbuffer.add_string dst str; 141 | loop ~pos 142 | | `Malformed _unparsed -> 143 | (* Ignored invalid characters. *) 144 | loop ~pos 145 | in 146 | loop ~pos:0 147 | ;; 148 | 149 | let encoded_length input = 150 | (* 3 characters becomes 4 *) 151 | let base64_3_4_expanded_length = (Bigstring.length input + 2) / 3 * 4 in 152 | (* "\r\n" is added for line breaks *) 153 | let base64_rfc2045_line_length = 76 in 154 | let base64_rfc2045_lines = 155 | (base64_3_4_expanded_length + (base64_rfc2045_line_length - 1)) 156 | / base64_rfc2045_line_length 157 | in 158 | base64_rfc2045_lines * (base64_rfc2045_line_length + String.length "\r\n") 159 | ;; 160 | 161 | let encode (src : Bigstring_shared.t) = 162 | let src = (src :> Bigstring.t) in 163 | let dst = Bigbuffer.create (encoded_length src) in 164 | let encoder = Base64_rfc2045.encoder `Manual in 165 | let buffer = Bytes.create buffer_size in 166 | Base64_rfc2045.dst encoder buffer 0 buffer_size; 167 | let rec flush = function 168 | | `Ok -> () 169 | | `Partial -> 170 | let len = buffer_size - Base64_rfc2045.dst_rem encoder in 171 | assert (len > 0); 172 | Bigbuffer.add_subbytes dst buffer ~pos:0 ~len; 173 | Base64_rfc2045.dst encoder buffer 0 buffer_size; 174 | flush (Base64_rfc2045.encode encoder `Await) 175 | in 176 | let rec loop ~pos = 177 | if pos >= Bigstring.length src 178 | then ( 179 | flush (Base64_rfc2045.encode encoder `End); 180 | Bigstring_shared.of_bigbuffer_volatile dst) 181 | else ( 182 | flush (Base64_rfc2045.encode encoder (`Char (Bigstring.get src pos))); 183 | loop ~pos:(pos + 1)) 184 | in 185 | loop ~pos:0 186 | ;; 187 | end 188 | 189 | module Quoted_printable = struct 190 | let decode bstr = 191 | (* The RFC2045 says that newlines can be converted to the platforms native 192 | format, so that's what we'll do. It's the same for both binary data and 193 | text data. If a CRLF sequence appears in the decoded data, that's because 194 | it was encoded as =0D=0A, which means the characters shouldn't be 195 | interpreted as EOL. *) 196 | let bigbuffer, _ = 197 | Quoted_printable_lexer.decode_quoted_printable 198 | (Bigstring_shared.length bstr) 199 | (Bigstring_shared.to_lexbuf bstr) 200 | in 201 | Bigstring_shared.of_bigbuffer_volatile bigbuffer 202 | ;; 203 | 204 | let encode bstr = 205 | let bigbuffer = 206 | Quoted_printable_lexer.encode_quoted_printable 207 | (Bigstring_shared.length bstr) 208 | (Bigstring_shared.to_lexbuf bstr) 209 | in 210 | Bigstring_shared.of_bigbuffer_volatile bigbuffer 211 | ;; 212 | end 213 | 214 | let decode t = 215 | match t.encoding with 216 | | `Base64 -> Some (Base64.decode t.content) 217 | | `Quoted_printable -> Some (Quoted_printable.decode t.content) 218 | | `Bit7 -> Some (Identity.decode t.content) 219 | | `Bit8 -> Some (Identity.decode t.content) 220 | | `Binary -> Some (Identity.decode t.content) 221 | | `Unknown _ -> None 222 | ;; 223 | 224 | let encode ~encoding bstr = 225 | let bstr = 226 | match encoding with 227 | | `Base64 -> Base64.encode bstr 228 | | `Quoted_printable -> Quoted_printable.encode bstr 229 | | `Bit7 -> Identity.encode bstr 230 | | `Bit8 -> Identity.encode bstr 231 | | `Binary -> Identity.encode bstr 232 | in 233 | let encoding = (encoding :> Encoding.t) in 234 | of_bigstring_shared ~encoding bstr 235 | ;; 236 | -------------------------------------------------------------------------------- /kernel/src/octet_stream.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | (** RFC 2045 MIME-encoded Bigstrings. *) 4 | module Encoding : sig 5 | (** Text or binary are the type of the plaintext. For Base64, if the mode is text, '\n' 6 | is turned into '\r\n' when encoding, and vice versa. *) 7 | type known = 8 | [ `Base64 9 | | `Bit7 10 | | `Bit8 11 | | `Binary 12 | | `Quoted_printable 13 | ] 14 | [@@deriving sexp_of, compare, hash] 15 | 16 | type t = 17 | [ known 18 | | `Unknown of string 19 | ] 20 | [@@deriving sexp_of, compare, hash] 21 | 22 | (*_ RFC 2045 says 7bit should be assumed if the Content-Transfer-Encoding heading is 23 | missing. *) 24 | 25 | val default : known 26 | val default' : t 27 | 28 | (** Determine an encoding based on email headers. [ignore_base64_for_multipart] is 29 | useful because some clients can't read RFCs and incorrectly indicate a transfer 30 | encoding of base64 for multipart messages. *) 31 | val of_headers_or_default 32 | : ?ignore_base64_for_multipart:bool (** default: true *) 33 | -> Headers.t 34 | -> t 35 | 36 | include Stringable.S with type t := t 37 | end 38 | 39 | type t [@@deriving sexp_of, compare, hash] 40 | 41 | val of_string : encoding:Encoding.t -> string -> t 42 | val of_bigstring_shared : encoding:Encoding.t -> Bigstring_shared.t -> t 43 | val empty : t 44 | val encoding : t -> Encoding.t 45 | val encoded_contents : t -> Bigstring_shared.t 46 | val encoded_contents_string : t -> string 47 | 48 | (** These are the expensive operation. *) 49 | 50 | val encode : encoding:Encoding.known -> Bigstring_shared.t -> t 51 | 52 | (** None if encoding is `Unknown. *) 53 | val decode : t -> Bigstring_shared.t option 54 | 55 | module Stable : sig 56 | module V1 : sig 57 | type nonrec t = t [@@deriving sexp, bin_io] 58 | end 59 | end 60 | -------------------------------------------------------------------------------- /kernel/src/quoted_printable_lexer.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val decode_quoted_printable 4 | : int 5 | -> Lexing.lexbuf 6 | -> Bigbuffer.t * [ `Ok | `Unexpected_characters ] 7 | 8 | (*_ quoted printable is ALWAYS encoded as text *) 9 | 10 | val encode_quoted_printable : int -> Lexing.lexbuf -> Bigbuffer.t 11 | -------------------------------------------------------------------------------- /kernel/src/quoted_printable_lexer.mll: -------------------------------------------------------------------------------- 1 | { 2 | 3 | (* Can't open Core, as the OCamlLex generated code requires Array.create 4 | * to be of type (int -> 'a -> 'a t), not (len:int -> 'a -> 'a t) *) 5 | module C = Core 6 | 7 | module Quoted_printable = struct 8 | 9 | (* Quoted printable functions *) 10 | let decode_hex a b = 11 | let decode_digit c = match c with 12 | | '0'..'9' -> (Char.code c) - (Char.code '0') + 0 13 | | 'A'..'F' -> (Char.code c) - (Char.code 'A') + 10 14 | | 'a'..'f' -> (Char.code c) - (Char.code 'a') + 10 15 | | _ -> invalid_arg "c" 16 | in 17 | Char.chr (((decode_digit a) * 16) + (decode_digit b)) 18 | ;; 19 | 20 | let hex_to = "0123456789ABCDEF" 21 | 22 | module Buffer = struct 23 | type t = 24 | { 25 | max_len : int; 26 | text : C.Bigbuffer.t; 27 | 28 | (* Invariant: The length of `word' is always less than `max_len' *) 29 | word : C.Bigbuffer.t; 30 | 31 | (* Invariant: `pos' is always less than `max_len' *) 32 | mutable pos : int; 33 | } 34 | ;; 35 | 36 | let create len = 37 | { 38 | text = C.Bigbuffer.create len; 39 | word = C.Bigbuffer.create 16; 40 | max_len = 76; 41 | pos = 0; 42 | } 43 | ;; 44 | 45 | let add_break t = 46 | if t.pos > 0 then 47 | begin 48 | C.Bigbuffer.add_string t.text "=\n"; 49 | t.pos <- 0 50 | end 51 | ;; 52 | 53 | (* Adds the current word to the buffer, wrapping it to the 54 | next line if necessary *) 55 | let commit_word t = 56 | if t.pos + C.Bigbuffer.length t.word >= t.max_len then add_break t; 57 | 58 | C.Bigbuffer.add_buffer t.text t.word; 59 | t.pos <- t.pos + C.Bigbuffer.length t.word; 60 | C.Bigbuffer.clear t.word 61 | ; 62 | ;; 63 | 64 | let wrap_bigword t len_next = 65 | if C.Bigbuffer.length t.word >= t.max_len - len_next then 66 | begin 67 | add_break t; 68 | commit_word t; 69 | add_break t; 70 | end 71 | ;; 72 | 73 | let add_char t c = 74 | wrap_bigword t 1; 75 | C.Bigbuffer.add_char t.word c; 76 | ;; 77 | 78 | 79 | let add_quoted_char t c = 80 | wrap_bigword t 3; 81 | let code = Char.code c in 82 | let high = (code lsr 4) land (16 - 1) in 83 | let low = code land (16 - 1) in 84 | C.Bigbuffer.add_char t.word '='; 85 | C.Bigbuffer.add_char t.word hex_to.[high]; 86 | C.Bigbuffer.add_char t.word hex_to.[low] 87 | ;; 88 | 89 | (* When calling this function, one must not immediately 90 | call add_new_line. 91 | *) 92 | let add_wsp t c = 93 | add_char t c; 94 | commit_word t 95 | ;; 96 | 97 | let add_new_line t c = 98 | C.Option.iter c ~f:(fun c -> add_quoted_char t c); 99 | commit_word t; 100 | C.Bigbuffer.add_char t.text '\n'; 101 | t.pos <- 0 102 | ;; 103 | 104 | let to_bigbuffer t = 105 | commit_word t; 106 | t.text 107 | ;; 108 | end 109 | 110 | (* TEST_MODULE = struct 111 | * TEST = C.String.length hex_to = 16;; 112 | * end *) 113 | end 114 | 115 | 116 | 117 | } 118 | (* Rules from RFCs. Some may need to be copied to specific places to parse 119 | * their individual parts *) 120 | 121 | (** RFC2234 - Core *) 122 | let cr = "\013" 123 | let lf = "\010" 124 | 125 | (* Deliberately chose to include bare CR and LF here, although the RFC suggests 126 | * them to be included as part of the text characters. 127 | * The rationale being that, when parsing e-mail from a text file, they will 128 | * probably mean CRLF. 129 | * The issue should not arise in conforming e-mails. 130 | *) 131 | let crlf_conforming = cr lf 132 | let crlf_non_conforming = crlf_conforming | lf 133 | let crlf = crlf_non_conforming 134 | 135 | let wsp = [' ' '\t'] 136 | 137 | (* Quoted-printable *) 138 | let hexdigit_non_strict = ['0'-'9' 'a'-'f' 'A'-'F'] 139 | let hexdigit_strict = hexdigit_non_strict # ['a'-'f'] 140 | let qp_wsp = [' ' '\t'] 141 | 142 | let qp_allowed = [ '\033'-'\060' '\062'-'\126' ] as c 143 | 144 | rule 145 | (* Quoted-printable text *) 146 | decode_quoted_printable buffer dirty = parse 147 | | wsp * crlf 148 | { 149 | C.Bigbuffer.add_char buffer '\n'; 150 | decode_quoted_printable buffer dirty lexbuf 151 | } 152 | | '=' crlf { decode_quoted_printable buffer dirty lexbuf } 153 | | '=' (hexdigit_strict as a) (hexdigit_strict as b) 154 | { 155 | C.Bigbuffer.add_char buffer (Quoted_printable.decode_hex a b); 156 | decode_quoted_printable buffer dirty lexbuf 157 | } 158 | | '=' (hexdigit_non_strict as a) (hexdigit_non_strict as b) 159 | { 160 | C.Bigbuffer.add_char buffer (Quoted_printable.decode_hex a b); 161 | decode_quoted_printable buffer `Unexpected_characters lexbuf 162 | } 163 | | (qp_allowed | wsp) as c 164 | { 165 | C.Bigbuffer.add_char buffer c; 166 | decode_quoted_printable buffer dirty lexbuf; 167 | } 168 | | _ as c 169 | { 170 | (* This characters shoudn't appear in a quoted-printable body, 171 | buy the most robust way to handle them is probably copying 172 | them verbatim *) 173 | C.Bigbuffer.add_char buffer c; 174 | decode_quoted_printable buffer `Unexpected_characters lexbuf 175 | } 176 | | eof { dirty } 177 | 178 | and 179 | 180 | (* Quoted-printable encoding with wrapping *) 181 | encode_quoted_printable buffer = parse 182 | | (wsp as c) ? '\n' 183 | { 184 | Quoted_printable.Buffer.add_new_line buffer c; 185 | encode_quoted_printable buffer lexbuf 186 | } 187 | | wsp as c 188 | { 189 | Quoted_printable.Buffer.add_wsp buffer c; 190 | encode_quoted_printable buffer lexbuf 191 | } 192 | | qp_allowed as c 193 | { 194 | Quoted_printable.Buffer.add_char buffer c; 195 | encode_quoted_printable buffer lexbuf 196 | } 197 | | _ as c 198 | { 199 | Quoted_printable.Buffer.add_quoted_char buffer c; 200 | encode_quoted_printable buffer lexbuf 201 | } 202 | | eof 203 | { 204 | () 205 | } 206 | 207 | { 208 | 209 | let decode_quoted_printable len lexbuf = 210 | let length_estimate = len in 211 | let bigbuffer = C.Bigbuffer.create length_estimate in 212 | let warnings = decode_quoted_printable bigbuffer `Ok lexbuf in 213 | (bigbuffer, warnings) 214 | ;; 215 | 216 | let encode_quoted_printable len lexbuf = 217 | let length_estimate = len in 218 | let buffer = Quoted_printable.Buffer.create length_estimate in 219 | encode_quoted_printable buffer lexbuf 220 | ; 221 | 222 | Quoted_printable.Buffer.to_bigbuffer buffer 223 | ;; 224 | 225 | } 226 | -------------------------------------------------------------------------------- /kernel/src/rfc.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | module RFC2045 = struct 4 | module Token = struct 5 | include (Mimestring.Case_insensitive : Mimestring.S) 6 | 7 | let is_valid str = 8 | (not (String.is_empty str)) 9 | && String.for_all str ~f:(function 10 | | '(' 11 | | ')' 12 | | '<' 13 | | '>' 14 | | '@' 15 | | ',' 16 | | ';' 17 | | ':' 18 | | '\\' 19 | | '"' 20 | | '/' 21 | | '[' 22 | | ']' 23 | | '?' 24 | | '=' -> false 25 | (* '\n', '\r', ' ' are excluded by the following: *) 26 | | '\033' .. '\126' -> true 27 | | _ -> false) 28 | ;; 29 | 30 | let is_valid_or_quote str = if is_valid str then str else Mimestring.quote str 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /kernel/src/rfc.mli: -------------------------------------------------------------------------------- 1 | (** Some functions for processing tokens from the BNF grammars that appear in the RFCs *) 2 | 3 | module RFC2045 : sig 4 | module Token : sig 5 | include Mimestring.S 6 | 7 | (** True if the string doesn't need to be quoted *) 8 | val is_valid : string -> bool 9 | 10 | (** Quotes a string if necessary *) 11 | val is_valid_or_quote : string -> string 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /kernel/src/string_monoid.mli: -------------------------------------------------------------------------------- 1 | include String_monoid_intf.String_monoid (** @inline *) 2 | 3 | module Private : sig 4 | val output : dst_output:(Underlying.t -> unit) -> t -> unit 5 | end 6 | -------------------------------------------------------------------------------- /kernel/src/string_monoid_intf.ml: -------------------------------------------------------------------------------- 1 | (** Simple library for concatenating immutable strings efficiently *) 2 | 3 | open! Core 4 | 5 | module type Underlying = sig 6 | type t = 7 | | String of String.t 8 | | Bigstring of Bigstring.t 9 | | Char of char 10 | end 11 | 12 | module type String_monoid = sig 13 | type t 14 | 15 | (** Primitive, constant-time operations *) 16 | val empty : t 17 | 18 | val nl : t 19 | val plus : t -> t -> t 20 | val length : t -> int 21 | 22 | (** Linear in the number of elements. *) 23 | val concat : ?sep:t -> t list -> t 24 | 25 | (** Linear in the number of elements in the list. *) 26 | val concat_string : ?sep:string -> string list -> t 27 | 28 | (*_ t_of_* is O(1), *_of_t is O(N), N being the length *) 29 | 30 | include Stringable.S with type t := t 31 | 32 | val of_bigstring : Bigstring.t -> t 33 | val to_bigstring : t -> Bigstring.t 34 | val of_char : char -> t 35 | 36 | (*_ 37 | For the library to fulfill it's purpose of minimal overhead 38 | string concatenation, the output functions must be tightly 39 | coupled with the low-level representation. 40 | 41 | Any new output channel should be implemented as new methods 42 | of the library itself. 43 | *) 44 | 45 | val output_bigbuffer : t -> Bigbuffer.t -> unit 46 | 47 | module Underlying : Underlying 48 | 49 | val fold : t -> init:'accum -> f:('accum -> Underlying.t -> 'accum) -> 'accum 50 | val iter : t -> f:(Underlying.t -> unit) -> unit 51 | val is_suffix : t -> suffix:string -> bool 52 | val is_prefix : t -> prefix:string -> bool 53 | val is_substring : t -> substring:string -> bool 54 | val is_string : t -> string:string -> bool 55 | end 56 | -------------------------------------------------------------------------------- /kernel/src/string_monoidable.ml: -------------------------------------------------------------------------------- 1 | module type S = sig 2 | type t 3 | 4 | val to_string_monoid : t -> String_monoid.t 5 | end 6 | -------------------------------------------------------------------------------- /kernel/src/string_monoidable.mli: -------------------------------------------------------------------------------- 1 | module type S = sig 2 | type t 3 | 4 | val to_string_monoid : t -> String_monoid.t 5 | end 6 | -------------------------------------------------------------------------------- /src/crypto.ml: -------------------------------------------------------------------------------- 1 | module Cryptokit = Cryptokit 2 | module CryptokitBignum = CryptokitBignum 3 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name email_message) 3 | (public_name email_message) 4 | (libraries angstrom async base64 core_unix.bigstring_unix core core_unix 5 | cryptokit email_address email_message_kernel core_kernel.nonempty_list re2 6 | core_unix.sys_unix core_unix.time_float_unix core_kernel.uuid 7 | core_unix.uuid) 8 | (preprocess 9 | (pps ppx_jane))) 10 | -------------------------------------------------------------------------------- /src/email.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | open! Async 3 | open! Import 4 | include Email_message_kernel 5 | 6 | let save ?temp_file ?perm ?fsync ?eol_except_raw_content t path = 7 | Writer.with_file_atomic ?temp_file ?perm ?fsync path ~f:(fun writer -> 8 | String_monoid.iter (to_string_monoid ?eol_except_raw_content t) ~f:(function 9 | | String_monoid.Underlying.Char c -> Writer.write_char writer c 10 | | String str -> Writer.write writer str 11 | | Bigstring bstr -> Writer.schedule_bigstring writer bstr); 12 | return ()) 13 | ;; 14 | 15 | module Content = Email_message_kernel.Content 16 | module Raw_content = Email_message_kernel.Raw_content 17 | module Simple = Email_simple 18 | -------------------------------------------------------------------------------- /src/email.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | open! Async 3 | open! Import 4 | 5 | include Email_message_kernel.Private.Email_intf.Email (** @inline *) 6 | 7 | (** Efficiently save [t] to disk with little additional allocation. 8 | 9 | [?temp_file], [?perm], [?fsync] are blindly passed to [Writer.with_file_atomic] *) 10 | val save 11 | : ?temp_file:string 12 | -> ?perm:Unix.file_perm 13 | -> ?fsync:bool (** default is [false] *) 14 | -> ?eol_except_raw_content:Lf_or_crlf.t (** default is [`LF] *) 15 | -> t 16 | -> string 17 | -> unit Deferred.t 18 | 19 | module Content = Email_message_kernel.Content 20 | module Raw_content = Email_message_kernel.Raw_content 21 | module Simple = Email_simple 22 | -------------------------------------------------------------------------------- /src/email_date.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | module Time = Time_float_unix 3 | 4 | let utc_offset_string time ~zone = 5 | let utc_offset = Time.utc_offset time ~zone in 6 | let is_utc = Time.Span.( = ) utc_offset Time.Span.zero in 7 | if is_utc 8 | then "Z" 9 | else 10 | String.concat 11 | [ (if Time.Span.( < ) utc_offset Time.Span.zero then "-" else "+") 12 | ; Time.Ofday.to_string_trimmed 13 | (Time.Ofday.of_span_since_start_of_day_exn (Time.Span.abs utc_offset)) 14 | ] 15 | ;; 16 | 17 | let rfc822_date ?(zone = force Time.Zone.local) now = 18 | let offset_string = 19 | utc_offset_string ~zone now |> String.filter ~f:(fun c -> Base.Char.( <> ) c ':') 20 | in 21 | let now_string = Time.format now "%a, %d %b %Y %H:%M:%S" ~zone in 22 | sprintf "%s %s" now_string offset_string 23 | ;; 24 | 25 | open Angstrom 26 | open Let_syntax 27 | 28 | (* Folding whitespace and comments. See RFC2822#3.2.3 *) 29 | let comment = 30 | let comment_text = 31 | skip (function 32 | (* slight simplification of the RFC *) 33 | | '(' | ')' | '\\' -> false 34 | | _ -> true) 35 | in 36 | let quoted_pair = 37 | let%map (_ : char) = char '\\' *> any_char in 38 | () 39 | in 40 | fix (fun comment -> 41 | char '(' *> skip_many (comment_text <|> quoted_pair <|> comment) <* char ')') 42 | ;; 43 | 44 | let single_whitespace_or_comment = comment <|> skip Char.is_whitespace 45 | let folding_whitespace = skip_many1 single_whitespace_or_comment "FWS" 46 | let optional_folding_whitespace = skip_many single_whitespace_or_comment "?FWS" 47 | 48 | let rec skip_min_max ~min ~max thing = 49 | assert (min <= max); 50 | if min > 0 51 | then thing *> skip_min_max ~min:(min - 1) ~max:(max - 1) thing 52 | else if max > 0 53 | then option () (skip_min_max ~min:1 ~max thing) 54 | else return () 55 | ;; 56 | 57 | let parse_two_digit_int = 58 | consumed (skip_min_max (skip Char.is_digit) ~min:2 ~max:2) >>| Int.of_string 59 | ;; 60 | 61 | let parse_two_to_four_digit_int = 62 | consumed (skip_min_max (skip Char.is_digit) ~min:2 ~max:4) >>| Int.of_string 63 | ;; 64 | 65 | let parse_one_or_two_digit_int = 66 | consumed (skip_min_max (skip Char.is_digit) ~min:1 ~max:2) >>| Int.of_string 67 | ;; 68 | 69 | let parse_day_of_week = 70 | choice (List.map Day_of_week.all ~f:(Fn.compose string_ci Day_of_week.to_string)) 71 | "day of week" 72 | ;; 73 | 74 | let parse_month = 75 | choice 76 | (List.map Month.all ~f:(fun month -> 77 | const month <$> string_ci (Month.to_string month))) 78 | "month" 79 | ;; 80 | 81 | let parse_time_zone = 82 | let utc_offset = 83 | (let%mapn sign = 84 | Angstrom.choice [ const Sign.Pos <$> char '+'; const Sign.Neg <$> char '-' ] 85 | "sign" 86 | and hours = parse_two_digit_int "hours" 87 | and minutes = parse_two_digit_int "minutes" in 88 | let utc_offset = Time.Span.create ~sign ~hr:hours ~min:minutes () in 89 | if not 90 | (Time.Span.between 91 | utc_offset 92 | ~low:(Time.Span.neg Time.Span.day) 93 | ~high:Time.Span.day) 94 | then raise_s [%message "The supplied UTC offset is semantically invalid."]; 95 | utc_offset) 96 | "utc offset" 97 | in 98 | let military_time_zone = 99 | const Time.Span.zero 100 | <$> (List.init 26 ~f:(fun i -> 101 | Char.of_int_exn (Char.to_int 'A' + i) |> Char.to_string) 102 | |> List.filter ~f:(String.( <> ) "J") 103 | |> List.map ~f:string_ci 104 | |> choice) 105 | "military zone" 106 | in 107 | let obsolete_zone = 108 | [ "UT", Time.Span.zero 109 | ; "GMT", Time.Span.zero 110 | ; "EST", Time.Span.create ~sign:Sign.Neg ~hr:5 () 111 | ; "EDT", Time.Span.create ~sign:Sign.Neg ~hr:4 () 112 | ; "CST", Time.Span.create ~sign:Sign.Neg ~hr:6 () 113 | ; "CDT", Time.Span.create ~sign:Sign.Neg ~hr:5 () 114 | ; "MST", Time.Span.create ~sign:Sign.Neg ~hr:7 () 115 | ; "MDT", Time.Span.create ~sign:Sign.Neg ~hr:6 () 116 | ; "PST", Time.Span.create ~sign:Sign.Neg ~hr:8 () 117 | ; "PDT", Time.Span.create ~sign:Sign.Neg ~hr:7 () 118 | ] 119 | |> List.map ~f:(fun (abbrev, offset) -> const offset <$> string_ci abbrev) 120 | |> choice 121 | "obsolote zone" 122 | in 123 | choice [ obsolete_zone; military_time_zone; utc_offset ] "time zone" 124 | ;; 125 | 126 | let untruncate_year year = 127 | (* As per https://tools.ietf.org/html/rfc5322#section-4.3 *) 128 | if year <= 49 then 2000 + year else if year <= 999 then 1900 + year else year 129 | ;; 130 | 131 | let parse_date = 132 | (let%mapn () = 133 | option () (parse_day_of_week *> option ' ' (char ',') *> folding_whitespace) 134 | and day = parse_one_or_two_digit_int "day" <* folding_whitespace 135 | and month = parse_month 136 | and year = 137 | folding_whitespace *> (parse_two_to_four_digit_int >>| untruncate_year "year") 138 | in 139 | Date.create_exn ~y:year ~m:month ~d:day) 140 | "date" 141 | ;; 142 | 143 | let parse_time_of_day = 144 | (let%mapn hour = parse_two_digit_int "hour" 145 | and minutes = char ':' *> parse_two_digit_int "minute" 146 | and seconds = option 0 (char ':' *> parse_two_digit_int "second") in 147 | Time.Ofday.create ~hr:hour ~min:minutes ~sec:seconds ()) 148 | "time of day" 149 | ;; 150 | 151 | let rfc2822_date_parser = 152 | let%mapn date = parse_date <* folding_whitespace 153 | and time_of_day = parse_time_of_day <* folding_whitespace 154 | and utc_offset = parse_time_zone in 155 | let time_no_zone = Time.of_date_ofday ~zone:Time.Zone.utc date time_of_day in 156 | Time.sub time_no_zone utc_offset, utc_offset 157 | ;; 158 | 159 | (* See https://tools.ietf.org/html/rfc5322#section-3.3 for the full spec. Also note 160 | https://tools.ietf.org/html/rfc5322#appendix-A.5 on whitespace. 161 | 162 | https://github.com/moment/moment/blob/022dc038af5ebafafa375f4566fb23366f4e4aa8/src/lib/create/from-string.js#L189 163 | (alongside the RFC), was used as a reference for this implementation. *) 164 | let of_string_exn_with_utc_offset date = 165 | match 166 | parse_string 167 | ~consume:All 168 | (optional_folding_whitespace *> rfc2822_date_parser 169 | <* optional_folding_whitespace 170 | <* end_of_input) 171 | date 172 | with 173 | | Ok time_and_utc_offset -> time_and_utc_offset 174 | | Error message -> failwith ("Failed to parse RFC822 " ^ message) 175 | ;; 176 | 177 | let of_string_exn date = 178 | let time, _utc_offset = of_string_exn_with_utc_offset date in 179 | time 180 | ;; 181 | 182 | let of_string_exn_with_time_zone date = 183 | let time, utc_offset = of_string_exn_with_utc_offset date in 184 | let utc_offset_parts = Time.Span.to_parts utc_offset in 185 | let hours_to_seconds hours = hours * 60 * 60 in 186 | let minutes_to_seconds minutes = minutes * 60 in 187 | let utc_offset_in_seconds = 188 | Time.Span.of_int_sec 189 | (Sign.to_int utc_offset_parts.sign 190 | * (hours_to_seconds utc_offset_parts.hr + minutes_to_seconds utc_offset_parts.min) 191 | ) 192 | in 193 | time, Time.Zone.of_utc_offset_in_seconds_round_down utc_offset_in_seconds 194 | ;; 195 | -------------------------------------------------------------------------------- /src/email_date.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | (** Generate an RFC822-style date *) 4 | val rfc822_date : ?zone:Time_float.Zone.t -> Time_float.t -> string 5 | 6 | (** Parse an RFC822-style string into a [Time.t]. 7 | 8 | Note that if the weekday is provided, it will not be semantically validated. *) 9 | val of_string_exn : string -> Time_float.t 10 | 11 | (** Similar to [of_string_exn], but also return back the time zone that was parsed from 12 | the provided string. *) 13 | val of_string_exn_with_time_zone : string -> Time_float.t * Time_float.Zone.t 14 | -------------------------------------------------------------------------------- /src/email_message.ml: -------------------------------------------------------------------------------- 1 | module Bigstring_shared = Email_message_kernel.Bigstring_shared 2 | module Email = Email 3 | module Email_address = Email_address 4 | module Email_headers = Email_message_kernel.Headers 5 | module Email_selector = Selector 6 | module Email_wrapper = Wrapper 7 | module Email_date = Email_date 8 | module Mimestring = Email_message_kernel.Mimestring 9 | module Octet_stream = Email_message_kernel.Octet_stream 10 | module String_monoid = String_monoid 11 | module Lf_or_crlf = Email_message_kernel.Lf_or_crlf 12 | 13 | module Email_message_stable = struct 14 | module Email = struct 15 | include Email.Stable 16 | module Raw_content = Email_message_kernel.Raw_content.Stable 17 | module Simple = Email_message_kernel.Simple.Stable 18 | end 19 | 20 | module Email_address = Email_address.Stable 21 | module Email_wrapper = Email_wrapper.Stable 22 | module Email_headers = Email_message_kernel.Headers.Stable 23 | end 24 | 25 | module Private = struct 26 | include Email_message_kernel.Private 27 | end 28 | -------------------------------------------------------------------------------- /src/email_simple.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | open! Async 3 | open! Import 4 | 5 | (*_ break dependency cycle *) 6 | 7 | module Email := Email_message_kernel 8 | 9 | (** @inline *) 10 | include 11 | Email_message_kernel.Private.Email_simple_intf.Email_simple 12 | with module Content := Email_message_kernel.Simple.Content 13 | and module Expert := Email_message_kernel.Simple.Expert 14 | and module Stable := Email_message_kernel.Simple.Stable 15 | 16 | type attachment_name = string [@@deriving sexp_of] 17 | 18 | (** For parsing attachments. Use [create ~attachments] to add attachments. Convenience 19 | functions for email parts that have "Content-Disposition: attachment" *) 20 | module Attachment : sig 21 | module Id : sig 22 | type t [@@deriving compare, sexp_of] 23 | end 24 | 25 | type t 26 | 27 | (** In a given email, each attachment has a unique [Id.t] that is determined by the 28 | email structure. *) 29 | val id : t -> Id.t 30 | 31 | (** The headers surrounding this attachment *) 32 | val headers : t -> Headers.t 33 | 34 | (** [Some email] if this is an attached message/rfc822 content *) 35 | val embedded_email : t -> Email.t option 36 | 37 | (** These are expensive operations *) 38 | val raw_data : t -> Bigstring_shared.t Or_error.t 39 | 40 | val md5 : t -> string Or_error.t 41 | val sha256 : t -> string Or_error.t 42 | val filename : t -> attachment_name 43 | 44 | (** [filename] decoded as per [Headers.Encoded_word.decode] *) 45 | val decoded_filename : t -> attachment_name 46 | 47 | (** This function is similar to [decode_filename], but it includes charset details. 48 | Right now, the email attachment downloader uses it to convert attachment names to 49 | UTF-8 using lib/iconv. In the future, we might want to add UTF-8 conversion directly 50 | into [Email_message] and swap this out with [decode_filename_with_utf_8_conversion]. 51 | However, we can't do that at the moment because of a vulnerability in iconv *) 52 | type decode_error = 53 | [ `Multiple_distinct_charsets_in_attachment_name of 54 | [ `Encoded of Headers.Encoded_word.Charset.t * attachment_name 55 | | `Plain of attachment_name 56 | ] 57 | Nonempty_list.t 58 | | `Empty_attachment_name 59 | ] 60 | 61 | val decoded_filename_with_charset 62 | : t 63 | -> (Headers.Encoded_word.Charset.t option * attachment_name, decode_error) result 64 | 65 | val to_file : t -> string -> unit Deferred.Or_error.t 66 | end 67 | 68 | module Content : sig 69 | include Email_message_kernel.Private.Email_simple_intf.Content 70 | 71 | val of_file 72 | : ?content_type:Mimetype.t 73 | -> ?encoding:Octet_stream.Encoding.known 74 | -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list 75 | -> string 76 | -> t Deferred.t 77 | 78 | (** Save content to disk *) 79 | val to_file : t -> string -> unit Deferred.Or_error.t 80 | end 81 | 82 | val create 83 | : ?from:Email_address.t (** defaults to *) 84 | -> to_:Email_address.t list 85 | -> ?cc:Email_address.t list 86 | -> ?reply_to:Email_address.t 87 | -> subject:string 88 | -> ?id:string 89 | -> ?in_reply_to:string 90 | -> ?date:Time_float.t 91 | -> ?auto_generated:unit 92 | -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list 93 | -> ?attachments:(attachment_name * Content.t) list 94 | -> ?no_tracing_headers:[ `Because_not_using_standard_email_infra ] 95 | -> Content.t 96 | -> t 97 | 98 | val create_utf8 99 | : ?from:Email_address.t (** defaults to *) 100 | -> to_:Email_address.t list 101 | -> ?cc:Email_address.t list 102 | -> ?reply_to:Email_address.t 103 | -> subject:string 104 | -> ?id:string 105 | -> ?in_reply_to:string 106 | -> ?date:Time_float.t 107 | -> ?auto_generated:unit 108 | -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list 109 | -> ?attachments:(attachment_name * Content.t) list 110 | -> ?no_tracing_headers:[ `Because_not_using_standard_email_infra ] 111 | -> Content.t 112 | -> t 113 | 114 | (** A unique value to be used in a Message-Id header *) 115 | val make_id : unit -> Headers.Value.t 116 | 117 | val local_address : unit -> Email_address.t 118 | 119 | (** [all_attachments] looks recursively through the e-mail parts, looking for attachments. 120 | 121 | [~include_inline_parts] (default is `None) controls whether this function will attempt 122 | to interpret inline parts as attachments. [`Named_or_has_content_id] most aggressively 123 | classifies parts as attachments, including inline parts that are either named or have 124 | a Content-Id header. [`Named] will include inline parts that are named. 125 | 126 | If [~look_through_attached_mails:true] (the default), it will separately include both 127 | e-mail attachments as well as the attachments to those e-mails. Otherwise it will 128 | include e-mail attachments but not (separately) any of the attached e-mails' 129 | attachments. 130 | 131 | [decode_filename_using] defines which character sets should be used to decode 132 | attachment filenames. This option is mainly there to keep legacy behaviour, where 133 | KS-C-5601 encoded filenames aren't decoded by default. *) 134 | val all_attachments 135 | : ?decode_filename_using:Headers.Encoded_word.Charset.t list 136 | -> ?include_inline_parts:[ `None | `Named | `Named_or_has_content_id ] 137 | -> ?look_through_attached_mails:bool 138 | -> t 139 | -> Attachment.t list 140 | 141 | val find_attachment : t -> attachment_name -> Attachment.t option 142 | 143 | (** [map_attachments] recurses into message/rfc822 parts. However, if a message/rfc822 144 | part is replaced, there is no further recursion. 145 | 146 | [decode_filename_using] defines which character sets should be used to decode 147 | attachment filenames. This option is mainly there to keep legacy behaviour, where 148 | KS-C-5601 encoded filenames aren't decoded by default. *) 149 | val map_attachments 150 | : ?include_inline_parts:[ `None | `Named | `Named_or_has_content_id ] 151 | -> ?decode_filename_using:Headers.Encoded_word.Charset.t list 152 | -> t 153 | -> f:(Attachment.t -> [ `Keep | `Replace of t ]) 154 | -> t 155 | 156 | module Expert : sig 157 | include Email_message_kernel.Private.Email_simple_intf.Expert 158 | 159 | val create_raw 160 | : ?from:string (** defaults to *) 161 | -> to_:string list 162 | -> ?cc:string list 163 | -> ?reply_to:string 164 | -> subject:string 165 | -> ?id:string 166 | -> ?in_reply_to:string 167 | -> ?date:string 168 | -> ?auto_generated:unit 169 | -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list 170 | -> ?attachments:(attachment_name * Content.t) list 171 | -> ?no_tracing_headers:[ `Because_not_using_standard_email_infra ] 172 | -> Content.t 173 | -> t 174 | end 175 | 176 | module Stable : sig 177 | include Email_message_kernel.Private.Email_simple_intf.Stable 178 | 179 | module Attachment : sig 180 | module Id : sig 181 | module V1 : Stable_without_comparator with type t = Attachment.Id.t 182 | end 183 | end 184 | end 185 | -------------------------------------------------------------------------------- /src/import.ml: -------------------------------------------------------------------------------- 1 | module Headers = Email_message_kernel.Headers 2 | module Bigstring_shared = Email_message_kernel.Bigstring_shared 3 | module Octet_stream = Email_message_kernel.Octet_stream 4 | module Email_content = Email_message_kernel.Content 5 | module Mimestring = Email_message_kernel.Mimestring 6 | module Lf_or_crlf = Email_message_kernel.Lf_or_crlf 7 | -------------------------------------------------------------------------------- /src/selector.ml: -------------------------------------------------------------------------------- 1 | module Stable = struct 2 | open Core.Core_stable 3 | 4 | module Base = struct 5 | module V1 = struct 6 | type t = 7 | [ `exists_header of string * Re2.Stable.V1_no_options.t 8 | | `all_headers of string * Re2.Stable.V1_no_options.t 9 | ] 10 | [@@deriving bin_shape, sexp] 11 | 12 | let%expect_test _ = 13 | print_endline [%bin_digest: t]; 14 | [%expect {| cbce30b485bdbeb9e93285287a91e7f5 |}] 15 | ;; 16 | end 17 | end 18 | 19 | module V1 = struct 20 | type t = Base.V1.t Blang.V1.t [@@deriving bin_shape, sexp] 21 | 22 | let%expect_test _ = 23 | print_endline [%bin_digest: t]; 24 | [%expect {| 96b50dce93691f115ea076e70421edf9 |}] 25 | ;; 26 | end 27 | end 28 | 29 | open Core 30 | open! Import 31 | module Regex = Re2 32 | 33 | module Base = struct 34 | type t = 35 | (* When adding to this type, don't forget to add to examples below. *) 36 | [ `exists_header of string * Regex.t 37 | | `all_headers of string * Regex.t 38 | ] 39 | [@@deriving sexp_of] 40 | 41 | let matches' t headers = 42 | match t with 43 | | `exists_header (header, regex) -> 44 | let headers = Headers.find_all headers header in 45 | List.exists headers ~f:(Regex.matches regex) 46 | | `all_headers (header, regex) -> 47 | let headers = Headers.find_all headers header in 48 | List.for_all headers ~f:(Regex.matches regex) 49 | ;; 50 | 51 | let matches t email = matches' t (Email.headers email) 52 | 53 | let examples = 54 | [ `exists_header ("cc", Regex.of_string ".*@janestreet.com") 55 | ; `all_headers ("cc", Regex.of_string ".*@janestreet.com") 56 | ] 57 | ;; 58 | end 59 | 60 | type t = Base.t Blang.t [@@deriving sexp_of] 61 | 62 | let matches' t headers = Blang.eval t (fun base -> Base.matches' base headers) 63 | let matches t email = matches' t (Email.headers email) 64 | let example : t = Blang.and_ (List.map Base.examples ~f:Blang.base) 65 | -------------------------------------------------------------------------------- /src/selector.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | open! Import 3 | 4 | module Base : sig 5 | (*_ The "exists" query can equally be accomplished using [sexp query], but 6 | not the "all" one. *) 7 | 8 | type t = 9 | [ `exists_header of string * Re2.t 10 | | `all_headers of string * Re2.t 11 | ] 12 | [@@deriving sexp_of] 13 | 14 | val matches : t -> Email.t -> bool 15 | val matches' : t -> Headers.t -> bool 16 | val examples : t list 17 | end 18 | 19 | type t = Base.t Blang.t [@@deriving sexp_of] 20 | 21 | val matches : t -> Email.t -> bool 22 | val matches' : t -> Headers.t -> bool 23 | val example : t 24 | 25 | module Stable : sig 26 | module Base : sig 27 | module V1 : sig 28 | type t = [ | Base.t ] [@@deriving bin_shape, sexp] 29 | end 30 | end 31 | 32 | module V1 : sig 33 | type nonrec t = t [@@deriving bin_shape, sexp] 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /src/string_monoid.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | open! Import 3 | include Email_message_kernel.String_monoid 4 | 5 | module Underlying = struct 6 | include Underlying 7 | 8 | let output_channel ~channel = function 9 | | String str -> Out_channel.output_string channel str 10 | | Bigstring bstr -> Bigstring_unix.really_output channel bstr 11 | | Char c -> Out_channel.output_char channel c 12 | ;; 13 | 14 | let output_unix ~writer = function 15 | | String str -> Async.Writer.write writer str 16 | | Bigstring bstr -> Async.Writer.write_bigstring writer bstr 17 | | Char c -> Async.Writer.write_char writer c 18 | ;; 19 | end 20 | 21 | let output_channel t channel = 22 | Private.output ~dst_output:(Underlying.output_channel ~channel) t 23 | ;; 24 | 25 | let output_unix t writer = Private.output ~dst_output:(Underlying.output_unix ~writer) t 26 | -------------------------------------------------------------------------------- /src/string_monoid.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | open! Async 3 | 4 | include Email_message_kernel.Private.String_monoid_intf.String_monoid (** @inline *) 5 | 6 | (*_ 7 | For the library to fulfill it's purpose of minimal overhead 8 | string concatenation, the output functions must be tightly 9 | coupled with the low-level representation. 10 | 11 | Any new output channel should be implemented as new methods 12 | of the library itself. 13 | *) 14 | 15 | val output_unix : t -> Writer.t -> unit 16 | val output_channel : t -> Out_channel.t -> unit 17 | -------------------------------------------------------------------------------- /src/wrapper.ml: -------------------------------------------------------------------------------- 1 | module Stable = struct 2 | open Core.Core_stable 3 | module Email_simple = Email_simple.Stable 4 | module Email_address = Email_address.Stable 5 | 6 | module V1 = struct 7 | type t = 8 | { header : Email_simple.Content.V1.t 9 | ; from : [ `Keep | `Change_to of Email_address.V1.t ] 10 | ; to_ : [ `Keep | `Change_to of Email_address.V1.t list ] 11 | ; cc : [ `Keep | `Change_to of Email_address.V1.t list ] 12 | ; subject : [ `Keep | `Prepend of string ] 13 | } 14 | [@@deriving sexp, bin_io] 15 | end 16 | end 17 | 18 | open! Core 19 | open! Import 20 | 21 | type t = Stable.V1.t = 22 | { header : Email_simple.Content.t 23 | ; from : [ `Keep | `Change_to of Email_address.t ] 24 | ; to_ : [ `Keep | `Change_to of Email_address.t list ] 25 | ; cc : [ `Keep | `Change_to of Email_address.t list ] 26 | ; subject : [ `Keep | `Prepend of string ] 27 | } 28 | [@@deriving sexp_of] 29 | 30 | let create ?(from = `Keep) ?(to_ = `Keep) ?(cc = `Keep) ?(subject = `Keep) header = 31 | { header; from; to_; cc; subject } 32 | ;; 33 | 34 | let create_from_email email = 35 | let get_header x = Headers.last ~normalize:`None (Email.headers email) x in 36 | let from = 37 | match get_header "From" with 38 | | None | Some "" -> `Keep 39 | | Some from -> `Change_to (Email_address.of_string_exn from) 40 | in 41 | let to_ = 42 | match get_header "To" with 43 | | None | Some "" -> `Keep 44 | | Some to_ -> `Change_to (Email_address.list_of_string_exn to_) 45 | in 46 | let cc = 47 | match get_header "Cc" with 48 | | None | Some "" -> `Keep 49 | | Some cc -> `Change_to (Email_address.list_of_string_exn cc) 50 | in 51 | let subject = 52 | match get_header "Subject" with 53 | | None | Some "" -> `Keep 54 | | Some subject -> `Prepend subject 55 | in 56 | let email = 57 | Email.modify_headers 58 | email 59 | ~f: 60 | (Headers.filter ~f:(fun ~name ~value:_ -> 61 | let open String.Caseless.Replace_polymorphic_compare in 62 | name <> "From" && name <> "To" && name <> "Cc" && name <> "Subject")) 63 | in 64 | create ~from ~to_ ~cc ~subject (Email_simple.Content.of_email email) 65 | ;; 66 | 67 | let content_of_email email = 68 | Email_simple.Content.create_custom 69 | ~content_type:(Email_simple.Mimetype.of_string "message/rfc822") 70 | (Email.to_string email) 71 | ;; 72 | 73 | (* We must be very careful with the email headers that we use in the new email. We use the 74 | following policies: 75 | 76 | (1) Add "From", "To", "Cc", "Subject" according to the supplied arguments to [create] 77 | (2) Copy over all other headers except: 78 | (i) DKIM-Signature - We break the signing by altering the email content 79 | (ii) Return-Path - We don't want the altered email to ever bounce back to the 80 | original sender 81 | (iii) Content-Transfer-Encoding, Content-Type, Content-Disposition - We structure 82 | the email differently. These wouldn't make sense anymore 83 | *) 84 | let add { header; from; to_; cc; subject } email = 85 | let content = Email_simple.Content.mixed [ header; content_of_email email ] in 86 | let headers = Email.headers email in 87 | let get_headers x = Headers.find_all ~normalize:`None headers x in 88 | let get_header x = Headers.last ~normalize:`None headers x in 89 | let from = 90 | match from with 91 | | `Keep -> get_header "From" |> Option.value ~default:"" 92 | | `Change_to addr -> Email_address.to_string addr 93 | in 94 | let to_ = 95 | match to_ with 96 | | `Keep -> get_headers "To" 97 | | `Change_to addrs -> List.map addrs ~f:Email_address.to_string 98 | in 99 | let cc = 100 | match cc with 101 | | `Keep -> get_headers "Cc" 102 | | `Change_to addrs -> List.map addrs ~f:Email_address.to_string 103 | in 104 | let subject = 105 | let subj = get_header "Subject" |> Option.value ~default:"" in 106 | match subject with 107 | | `Keep -> subj 108 | | `Prepend str -> sprintf "%s %s" str subj 109 | in 110 | let extra_headers = 111 | Headers.filter headers ~f:(fun ~name ~value:_ -> 112 | match name with 113 | | "From" 114 | | "To" 115 | | "Cc" 116 | | "Subject" 117 | | "Message-Id" 118 | | "Date" 119 | | "DKIM-Signature" 120 | | "Return-Path" 121 | | "Content-Transfer-Encoding" 122 | | "Content-Type" 123 | | "Content-Disposition" -> false 124 | | _ -> true) 125 | |> Headers.to_list 126 | in 127 | let id = get_header "Message-Id" in 128 | let date = get_header "Date" in 129 | Email_simple.Expert.create_raw ?id ?date ~from ~to_ ~cc ~subject ~extra_headers content 130 | ;; 131 | -------------------------------------------------------------------------------- /src/wrapper.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | (** {v 4 | This module exposes a function to add a custom wrapper around an existing email. Gmail 5 | will result in the following formatting: 6 | 7 | CUSTOM WRAPPER TEXT 8 | 9 | ---------- Forwarded message ---------- 10 | From: (1) 11 | To: (2) 12 | Cc: (3) 13 | Date: (4) 14 | Subject: (5) 15 | (6) 16 | 17 | where (1)-(6) are taken from the original email. 18 | v} *) 19 | 20 | type t [@@deriving sexp_of] 21 | 22 | (** Create a [Wrapper.t] that can be used to wrap emails *) 23 | 24 | val create 25 | : ?from:[ `Keep | `Change_to of Email_address.t ] (** default: `Keep *) 26 | -> ?to_:[ `Keep | `Change_to of Email_address.t list ] (** default: `Keep *) 27 | -> ?cc:[ `Keep | `Change_to of Email_address.t list ] (** default: `Keep *) 28 | -> ?subject:[ `Keep | `Prepend of string ] (** default: `Keep *) 29 | -> Email_simple.Content.t 30 | -> t 31 | 32 | (** Like [create], but extract [?from], [?to_], [?cc] and [?subject] from the headers 33 | "From:", "To:", "Cc:" and "Subject:" respectively. *) 34 | val create_from_email : Email.t -> t 35 | 36 | (** Transform an email by wrapping it according to the [Wrapper.t] *) 37 | val add : t -> Email.t -> Email.t 38 | 39 | module Stable : sig 40 | module V1 : sig 41 | type nonrec t = t [@@deriving sexp, bin_io] 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /test/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name email_message_test) 3 | (libraries async core email_message expect_test_helpers_async 4 | expect_test_helpers_core core_unix.time_float_unix) 5 | (preprocess 6 | (pps ppx_jane))) 7 | -------------------------------------------------------------------------------- /test/email_message_test.ml: -------------------------------------------------------------------------------- 1 | module Test_boundary = Test_boundary 2 | module Test_email = Test_email 3 | module Test_email_content = Test_email_content 4 | module Test_email_date = Test_email_date 5 | module Test_email_save = Test_email_save 6 | module Test_email_simple = Test_email_simple 7 | module Test_encoded_word = Test_encoded_word 8 | module Test_headers = Test_headers 9 | module Test_media_type = Test_media_type 10 | module Test_octet_stream = Test_octet_stream 11 | module Test_related_parts = Test_related_parts 12 | module Test_rfc = Test_rfc 13 | -------------------------------------------------------------------------------- /test/test_boundary.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | open Async 3 | open Email_message 4 | open Email_message.Private 5 | open Expect_test_helpers_core 6 | open Boundary 7 | 8 | let boundary = of_string "BOUNDARY" 9 | 10 | let split str = 11 | let bs = Bigstring_shared.to_string in 12 | let prologue, parts, epilogue = split boundary (Bigstring_shared.of_string str) in 13 | let prologue = Option.map prologue ~f:bs in 14 | let parts = List.map parts ~f:bs in 15 | let epilogue = Option.map epilogue ~f:bs in 16 | print_s 17 | [%message 18 | "" (prologue : string option) (parts : string list) (epilogue : string option)] 19 | ;; 20 | 21 | let join (prologue, parts, epilogue) = 22 | let prologue = Option.map prologue ~f:Bigstring_shared.of_string in 23 | let epilogue = Option.map epilogue ~f:Bigstring_shared.of_string in 24 | let parts = List.map parts ~f:String_monoid.of_string in 25 | let joined = join_without_checking_for_conflicts ?prologue ~parts ?epilogue boundary in 26 | (* Expect tests ignore leading and trailing whitespace (and common indentation) 27 | on both the 'expected' and 'actual' outputs. Wrapping in '######'s ensures 28 | that no whitespace is stripped. *) 29 | printf "######\n%s\n######" (String_monoid.to_string joined) 30 | ;; 31 | 32 | let%expect_test "split" = 33 | (* Simple tests with no prologue or epilogue *) 34 | split "--BOUNDARY\n\n--BOUNDARY--"; 35 | [%expect {| ((prologue ()) (parts ("")) (epilogue ())) |}]; 36 | split "--BOUNDARY\nP\n--BOUNDARY--"; 37 | [%expect {| ((prologue ()) (parts (P)) (epilogue ())) |}]; 38 | split "--BOUNDARY\r\nP\r\n--BOUNDARY--"; 39 | [%expect {| ((prologue ()) (parts (P)) (epilogue ())) |}]; 40 | split "--BOUNDARY\nP\n--BOUNDARY\nQ\n--BOUNDARY--"; 41 | [%expect {| ((prologue ()) (parts (P Q)) (epilogue ())) |}]; 42 | (* Prologue and epilogue *) 43 | split "A\n--BOUNDARY\nP\n--BOUNDARY--"; 44 | [%expect 45 | {| 46 | ((prologue (A)) 47 | (parts (P)) 48 | (epilogue ())) 49 | |}]; 50 | split "--BOUNDARY\nP\n--BOUNDARY--\nB"; 51 | [%expect 52 | {| 53 | ((prologue ()) 54 | (parts (P)) 55 | (epilogue ("\nB"))) 56 | |}]; 57 | split "A\n--BOUNDARY\nP\n--BOUNDARY--\nB"; 58 | [%expect 59 | {| 60 | ((prologue (A)) 61 | (parts (P)) 62 | (epilogue ("\nB"))) 63 | |}]; 64 | split "A\r\n--BOUNDARY\r\nP\r\n--BOUNDARY--\r\nB"; 65 | [%expect 66 | {| 67 | ((prologue (A)) 68 | (parts (P)) 69 | (epilogue ("\r\nB"))) 70 | |}]; 71 | (* Preserve extra whitespace *) 72 | split "\nA\n\n--BOUNDARY\n\nP\n\n--BOUNDARY--\nB\n"; 73 | [%expect 74 | {| 75 | ((prologue ("\nA\n")) 76 | (parts ("\nP\n")) 77 | (epilogue ("\nB\n"))) 78 | |}]; 79 | (* Whitespace padding on boundary line *) 80 | split "--BOUNDARY \nB\n--BOUNDARY--"; 81 | [%expect 82 | {| 83 | ((prologue ("--BOUNDARY \nB")) 84 | (parts ()) 85 | (epilogue ())) 86 | |}]; 87 | (* Content with something that looks like a boundary *) 88 | split "--BOUNDARY\nnot a --BOUNDARY\nnot a =\n--BOUNDARY either\n--BOUNDARY--"; 89 | [%expect 90 | {| 91 | ((prologue ()) 92 | (parts ("not a --BOUNDARY\nnot a =\n--BOUNDARY either")) 93 | (epilogue ())) 94 | |}]; 95 | return () 96 | ;; 97 | 98 | let%expect_test "join" = 99 | (* Simple tests with no prologue or epilogue *) 100 | join (None, [ "" ], None); 101 | [%expect 102 | {| 103 | ###### 104 | --BOUNDARY 105 | 106 | --BOUNDARY-- 107 | ###### 108 | |}]; 109 | let%bind () = 110 | join (None, [ "P" ], None); 111 | [%expect 112 | {| 113 | ###### 114 | --BOUNDARY 115 | P 116 | --BOUNDARY-- 117 | ###### 118 | |}]; 119 | return () 120 | in 121 | let%bind () = 122 | join (None, [ "P"; "Q" ], None); 123 | [%expect 124 | {| 125 | ###### 126 | --BOUNDARY 127 | P 128 | --BOUNDARY 129 | Q 130 | --BOUNDARY-- 131 | ###### 132 | |}]; 133 | return () 134 | in 135 | (* Prologue and epilogue *) 136 | join (Some "A", [ "P" ], None); 137 | [%expect 138 | {| 139 | ###### 140 | A 141 | --BOUNDARY 142 | P 143 | --BOUNDARY-- 144 | ###### 145 | |}]; 146 | join (None, [ "P" ], Some "\nB"); 147 | [%expect 148 | {| 149 | ###### 150 | --BOUNDARY 151 | P 152 | --BOUNDARY-- 153 | B 154 | ###### 155 | |}]; 156 | join (Some "A", [ "P" ], Some "\nB"); 157 | [%expect 158 | {| 159 | ###### 160 | A 161 | --BOUNDARY 162 | P 163 | --BOUNDARY-- 164 | B 165 | ###### 166 | |}]; 167 | (* Preserve extra whitespace *) 168 | join (Some "\nA\n", [ "\nP\n" ], Some "\nB\n"); 169 | [%expect 170 | {| 171 | ###### 172 | 173 | A 174 | 175 | --BOUNDARY 176 | 177 | P 178 | 179 | --BOUNDARY-- 180 | B 181 | 182 | ###### 183 | |}]; 184 | return () 185 | ;; 186 | 187 | module%test [@name "non-compliant"] _ = struct 188 | (* The following tests document undefined behavior. *) 189 | 190 | let%expect_test "non-compliant [split]" = 191 | (* Parsing of weird and malformed data into a sensible form *) 192 | split "--BOUNDARY\n--BOUNDARY--"; 193 | [%expect {| ((prologue ()) (parts ("")) (epilogue ())) |}]; 194 | split ""; 195 | [%expect 196 | {| 197 | ((prologue ("")) 198 | (parts ()) 199 | (epilogue ())) 200 | |}]; 201 | split "\n"; 202 | [%expect 203 | {| 204 | ((prologue ("\n")) 205 | (parts ()) 206 | (epilogue ())) 207 | |}]; 208 | (* Missing boundary markers *) 209 | split "A\n--BOUNDARY--\nB"; 210 | [%expect {| ((prologue (A)) (parts ()) (epilogue ("\nB"))) |}]; 211 | split "--BOUNDARY--\nB"; 212 | [%expect {| ((prologue ("")) (parts ()) (epilogue ("\nB"))) |}]; 213 | split "--BOUNDARY--\n"; 214 | [%expect {| ((prologue ("")) (parts ()) (epilogue ("\n"))) |}]; 215 | split "--BOUNDARY--"; 216 | [%expect 217 | {| 218 | ((prologue ("")) 219 | (parts ()) 220 | (epilogue ())) 221 | |}]; 222 | split "A\n--BOUNDARY--"; 223 | [%expect 224 | {| 225 | ((prologue (A)) 226 | (parts ()) 227 | (epilogue ())) 228 | |}]; 229 | return () 230 | ;; 231 | 232 | let%expect_test "non-compliant [join]" = 233 | join (Some "A", [], Some "\nB"); 234 | [%expect 235 | {| 236 | ###### 237 | A 238 | B 239 | ###### 240 | |}]; 241 | join (None, [], Some "\nB"); 242 | [%expect 243 | {| 244 | ###### 245 | 246 | B 247 | ###### 248 | |}]; 249 | join (None, [], None); 250 | [%expect 251 | {| 252 | ###### 253 | 254 | 255 | ###### 256 | |}]; 257 | join (Some "A", [], None); 258 | [%expect 259 | {| 260 | ###### 261 | A 262 | ###### 263 | |}]; 264 | return () 265 | ;; 266 | end 267 | -------------------------------------------------------------------------------- /test/test_boundary.mli: -------------------------------------------------------------------------------- 1 | (*_ This signature is deliberately empty. *) 2 | -------------------------------------------------------------------------------- /test/test_email.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | open Async 3 | open Email_message 4 | open Email 5 | 6 | let parse s = 7 | let parsed = of_string s in 8 | printf !"%{sexp:t}\n" parsed; 9 | let roundtripped = to_string parsed in 10 | printf !"Successfully roundtripped: %{sexp:bool}" (String.equal s roundtripped) 11 | ;; 12 | 13 | let%expect_test "simple" = 14 | parse "From: foo@bar.com\nTo: foo@bar.com\n\nhello world"; 15 | [%expect 16 | {| 17 | ((headers ((From " foo@bar.com") (To " foo@bar.com"))) 18 | (raw_content ("hello world"))) 19 | Successfully roundtripped: true 20 | |}]; 21 | return () 22 | ;; 23 | 24 | let%expect_test "no newlines" = 25 | parse ""; 26 | [%expect 27 | {| 28 | ((headers ()) (raw_content ())) 29 | Successfully roundtripped: true 30 | |}]; 31 | (* Header lines should be terminated with "\n". We add the missing "\n" when we 32 | [to_string]. *) 33 | parse "Header: hello world"; 34 | [%expect 35 | {| 36 | ((headers ((Header " hello world"))) (raw_content ())) 37 | Successfully roundtripped: false 38 | |}]; 39 | return () 40 | ;; 41 | 42 | let%expect_test "1 newline" = 43 | (* This is malformed. I could imagine having [None] for [raw_content] as well. *) 44 | parse "\n"; 45 | [%expect 46 | {| 47 | ((headers ()) (raw_content (""))) 48 | Successfully roundtripped: true 49 | |}]; 50 | (* This is malformed. I could imagine having [Some ""] for [raw_content] as well. *) 51 | parse "Header: hello world\n"; 52 | [%expect 53 | {| 54 | ((headers ((Header " hello world"))) (raw_content ())) 55 | Successfully roundtripped: true 56 | |}]; 57 | (* This case is weird, see below for an explanation *) 58 | parse "Header: hello world\nBody"; 59 | [%expect 60 | {| 61 | ((headers ((Header " hello world"))) (raw_content (Body))) 62 | Successfully roundtripped: false 63 | |}]; 64 | return () 65 | ;; 66 | 67 | let%expect_test "2 newlines" = 68 | parse "\n\n"; 69 | [%expect 70 | {| 71 | ((headers ()) (raw_content ("\n"))) 72 | Successfully roundtripped: true 73 | |}]; 74 | parse "Header: hello world\n\nBody"; 75 | [%expect 76 | {| 77 | ((headers ((Header " hello world"))) (raw_content (Body))) 78 | Successfully roundtripped: true 79 | |}]; 80 | return () 81 | ;; 82 | 83 | let%expect_test "weird headers" = 84 | (* Google and Exim both change to "body mode" (and adds a blank line) on the first 85 | line that "doesn't look like a header" for some slightly different 86 | interpretation of that phrase: 87 | 88 | - Google allows ASCII 32-57,59-126 (printable characters minus colon) in a 89 | header name 90 | 91 | - Exim allows ASCII 33-57,59-126 (printable characters minus colon minus space) 92 | in a header name 93 | 94 | RFC 5322 does not specify how to handle an invalid header line. The following 95 | scenarios are possible: 96 | 97 | 1) An invalid header (according to the above rules) is written somewhere in the 98 | header block. Any following headers are incorrectly treated as the start of 99 | the message body. 100 | 101 | 2) An MUA forgets to put a blank line between the header block and the body. 102 | As long as the first body line does not look like a header (according to the 103 | above rules), the "right thing" happens. *) 104 | (* Make sure we can handle the obsolete syntax of headers with whitespace before the 105 | colon. This doesn't roundtrip because we remove the whitespace before the ":"*) 106 | parse "From: foo@bar.com\nObsolete-header : hello world\n"; 107 | [%expect 108 | {| 109 | ((headers ((From " foo@bar.com") (Obsolete-header " hello world"))) 110 | (raw_content ())) 111 | Successfully roundtripped: false 112 | |}]; 113 | (* Whitespace should not be a part of a header field. Google considers this a 114 | valid header. Exim treats this as the start of the body. *) 115 | parse "From: foo@bar.com\nMalformed header: hello world\n"; 116 | [%expect 117 | {| 118 | ((headers ((From " foo@bar.com"))) 119 | (raw_content ("Malformed header: hello world\n"))) 120 | Successfully roundtripped: false 121 | |}]; 122 | (* RFC 5322 says that field names must contain at least 1 character, however 123 | Google and Exim both don't have this requirement. In addition, we get some 124 | messages in the wild that have broken headers like this. *) 125 | parse "From: foo@bar.com\n: hello world\n"; 126 | [%expect 127 | {| 128 | ((headers ((From " foo@bar.com") ("" " hello world"))) (raw_content ())) 129 | Successfully roundtripped: true 130 | |}]; 131 | return () 132 | ;; 133 | 134 | (* RFC 2822 Section 2.2.3: 135 | 136 | {v 137 | 138 | The general rule is that wherever this standard allows for folding white space (not 139 | simply WSP characters), a CRLF may be inserted before any WSP. For example, the header 140 | field: 141 | 142 | Subject: This is a test 143 | 144 | can be represented as: 145 | 146 | Subject: This 147 | is a test 148 | 149 | v} 150 | *) 151 | let%expect_test "Folding whitespace" = 152 | parse 153 | ([ "Subject: This is a multiline"; " subject"; ""; "body" ] |> String.concat ~sep:"\n"); 154 | [%expect 155 | {| 156 | ((headers ((Subject " This is a multiline\ 157 | \n subject"))) 158 | (raw_content (body))) 159 | Successfully roundtripped: true 160 | |}]; 161 | return () 162 | ;; 163 | -------------------------------------------------------------------------------- /test/test_email.mli: -------------------------------------------------------------------------------- 1 | (*_ This signature is deliberately empty. *) 2 | -------------------------------------------------------------------------------- /test/test_email_content.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | open Async 3 | open Email_message 4 | open Expect_test_helpers_core 5 | open Email.Content 6 | 7 | let parse s = 8 | let email = Email.of_string s in 9 | let unparsed = Email.raw_content email |> Email.Raw_content.to_bigstring_shared in 10 | let parsed = parse email |> ok_exn in 11 | print_s [%message "" ~_:(parsed : t)]; 12 | let round_tripped = to_bigstring_shared parsed in 13 | assert ([%compare.equal: Bigstring_shared.t] unparsed round_tripped) 14 | ;; 15 | 16 | let%expect_test "simple content" = 17 | parse "From: foo@bar.com\n\nhello world"; 18 | [%expect 19 | {| 20 | (Data ( 21 | (encoding Bit7) 22 | (content "hello world"))) 23 | |}]; 24 | return () 25 | ;; 26 | 27 | let%expect_test "simple multipart" = 28 | parse 29 | "Content-Type: multipart/alternative; boundary=BOUNDARY\n\n\ 30 | --BOUNDARY\n\ 31 | Content-Type: text/plain; charset=UTF-8\n\n\ 32 | Simple body\n\n\ 33 | --BOUNDARY\n\ 34 | Content-Type: text/html; charset=UTF-8\n\ 35 | Content-Transfer-Encoding: quoted-printable\n\n\ 36 |
Simple body
\n\n\ 37 | --BOUNDARY--"; 38 | [%expect 39 | {| 40 | (Multipart ( 41 | (boundary BOUNDARY) 42 | (prologue ()) 43 | (epilogue ()) 44 | (parts ( 45 | ((headers ((Content-Type " text/plain; charset=UTF-8"))) 46 | (raw_content ("Simple body\n"))) 47 | ((headers ( 48 | (Content-Type " text/html; charset=UTF-8") 49 | (Content-Transfer-Encoding " quoted-printable"))) 50 | (raw_content ("
Simple body
\n"))))) 51 | (container_headers (( 52 | Content-Type " multipart/alternative; boundary=BOUNDARY"))))) 53 | |}]; 54 | return () 55 | ;; 56 | 57 | let%expect_test "nested multipart" = 58 | parse 59 | "Content-Type: multipart/mixed; boundary=BOUNDARY1\n\n\ 60 | --BOUNDARY1\n\ 61 | Content-Type: multipart/alternative; boundary=BOUNDARY2\n\n\ 62 | --BOUNDARY2\n\ 63 | Content-Type: text/plain; charset=UTF-8\n\n\ 64 | Simple body\n\n\ 65 | --BOUNDARY2\n\ 66 | Content-Type: text/html; charset=UTF-8\n\n\ 67 |
Simple body
\n\n\ 68 | --BOUNDARY2--\n\ 69 | --BOUNDARY1\n\ 70 | Content-Type: text/plain; charset=US-ASCII; name=\"attachment.txt\"\n\ 71 | Content-Disposition: attachment; filename=\"attachment.txt\"\n\ 72 | Content-Transfer-Encoding: base64\n\n\ 73 | Zm9v\n\ 74 | --BOUNDARY1--"; 75 | [%expect 76 | {| 77 | (Multipart ( 78 | (boundary BOUNDARY1) 79 | (prologue ()) 80 | (epilogue ()) 81 | (parts ( 82 | ((headers ((Content-Type " multipart/alternative; boundary=BOUNDARY2"))) 83 | (raw_content ( 84 | "--BOUNDARY2\nContent-Type: text/plain; charset=UTF-8\n\nSimple body\n\n--BOUNDARY2\nContent-Type: text/html; charset=UTF-8\n\n
Simple body
\n\n--BOUNDARY2--"))) 85 | ((headers ( 86 | (Content-Type " text/plain; charset=US-ASCII; name=\"attachment.txt\"") 87 | (Content-Disposition " attachment; filename=\"attachment.txt\"") 88 | (Content-Transfer-Encoding " base64"))) 89 | (raw_content (Zm9v))))) 90 | (container_headers ((Content-Type " multipart/mixed; boundary=BOUNDARY1"))))) 91 | |}]; 92 | return () 93 | ;; 94 | 95 | let%expect_test "message/rfc822" = 96 | parse "Content-Type: message/rfc822\n\nFrom: foo@bar.com\n\nSample body"; 97 | [%expect 98 | {| (Message ((headers ((From " foo@bar.com"))) (raw_content ("Sample body")))) |}]; 99 | return () 100 | ;; 101 | -------------------------------------------------------------------------------- /test/test_email_content.mli: -------------------------------------------------------------------------------- 1 | (*_ This signature is deliberately empty. *) 2 | -------------------------------------------------------------------------------- /test/test_email_date.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | open Email_message 3 | open Expect_test_helpers_core 4 | open Email_date 5 | module Time = Time_float_unix 6 | 7 | let parse_and_print time = 8 | print_string (Time.to_string_iso8601_basic (of_string_exn time) ~zone:Time.Zone.utc) 9 | ;; 10 | 11 | let parse_and_print_with_time_zone time = 12 | let time, time_zone = of_string_exn_with_time_zone time in 13 | let time = Time.to_string_iso8601_basic time ~zone:Time.Zone.utc in 14 | print_string [%string "%{time}, %{Time.Zone.to_string time_zone}"] 15 | ;; 16 | 17 | let%expect_test "of_rfc822_date neg offset" = 18 | parse_and_print "Fri, 03 Dec 2010 16:02:30 -0600 (CST)"; 19 | [%expect {| 2010-12-03T22:02:30.000000Z |}]; 20 | parse_and_print_with_time_zone "Fri, 03 Dec 2010 16:02:30 -0600 (CST)"; 21 | [%expect {| 2010-12-03T22:02:30.000000Z, UTC-6 |}] 22 | ;; 23 | 24 | let%expect_test "of_rfc822_date offset with non zero minutes" = 25 | parse_and_print "Fri, 3 Dec 2010 16:02:30 +0530 (IST)"; 26 | [%expect {| 2010-12-03T10:32:30.000000Z |}]; 27 | parse_and_print_with_time_zone "Fri, 3 Dec 2010 16:02:30 +0530 (IST)"; 28 | [%expect {| 2010-12-03T10:32:30.000000Z, UTC+5:30 |}]; 29 | parse_and_print "Fri, 03 Dec 2010 16:02:30 -0330 (NT)"; 30 | [%expect {| 2010-12-03T19:32:30.000000Z |}]; 31 | parse_and_print_with_time_zone "Fri, 03 Dec 2010 22:02:30 -0330 (NT)"; 32 | [%expect {| 2010-12-04T01:32:30.000000Z, UTC-3:30 |}] 33 | ;; 34 | 35 | let%expect_test "of_rfc822_date GMT" = 36 | parse_and_print "Fri, 3 Dec 2010 16:02:30 +0000 (GMT)"; 37 | [%expect {| 2010-12-03T16:02:30.000000Z |}]; 38 | parse_and_print_with_time_zone "Fri, 3 Dec 2010 16:02:30 +0000 (GMT)"; 39 | [%expect {| 2010-12-03T16:02:30.000000Z, UTC |}] 40 | ;; 41 | 42 | let%expect_test "of_rfc822_date pos offset" = 43 | parse_and_print "Fri, 03 Dec 2010 16:02:30 +0600 (BST)"; 44 | [%expect {| 2010-12-03T10:02:30.000000Z |}]; 45 | parse_and_print_with_time_zone "Fri, 03 Dec 2010 16:02:30 +0600 (BST)"; 46 | [%expect {| 2010-12-03T10:02:30.000000Z, UTC+6 |}] 47 | ;; 48 | 49 | let%expect_test "of_rfc822_date seconds are optional" = 50 | parse_and_print "Fri, 03 Dec 2010 16:02 +0600 (BST)"; 51 | [%expect {| 2010-12-03T10:02:00.000000Z |}] 52 | ;; 53 | 54 | let%expect_test "of_rfc822_date 2 digit year meaning 20th century (obsolete format)" = 55 | parse_and_print "01 Jan 98 11:00 +0000"; 56 | [%expect {| 1998-01-01T11:00:00.000000Z |}] 57 | ;; 58 | 59 | let%expect_test "of_rfc822_date 2 digit year meaning 21st century (obsolete format)" = 60 | parse_and_print "01 Jan 20 11:00 +0000"; 61 | [%expect {| 2020-01-01T11:00:00.000000Z |}] 62 | ;; 63 | 64 | let%expect_test "of_rfc822_date embedded comments and extra whitespace" = 65 | parse_and_print " Fri, (hello) 03 Dec 2010 16:02:30 (BST)\n+0600"; 66 | [%expect {| 2010-12-03T10:02:30.000000Z |}] 67 | ;; 68 | 69 | let%expect_test "of_rfc822_date no day-of-week" = 70 | parse_and_print "03 Dec 2010 16:02:30 +0600 (BST)"; 71 | [%expect {| 2010-12-03T10:02:30.000000Z |}] 72 | ;; 73 | 74 | let%expect_test "of_rfc822_date obsolete timezones" = 75 | List.iter 76 | [ "UT"; "GMT"; "EST"; "EDT"; "CST"; "CDT"; "MST"; "MDT"; "PST"; "PDT" ] 77 | ~f:(fun tz -> 78 | parse_and_print ("03 Dec 2010 16:02:30 " ^ tz); 79 | Out_channel.newline stdout); 80 | [%expect 81 | {| 82 | 2010-12-03T16:02:30.000000Z 83 | 2010-12-03T16:02:30.000000Z 84 | 2010-12-03T21:02:30.000000Z 85 | 2010-12-03T20:02:30.000000Z 86 | 2010-12-03T22:02:30.000000Z 87 | 2010-12-03T21:02:30.000000Z 88 | 2010-12-03T23:02:30.000000Z 89 | 2010-12-03T22:02:30.000000Z 90 | 2010-12-04T00:02:30.000000Z 91 | 2010-12-03T23:02:30.000000Z 92 | |}] 93 | ;; 94 | 95 | (* As per https://tools.ietf.org/html/rfc5322#section-4.3 all these military timezones are 96 | unpredictable and hence should be parsed as '-0000' *) 97 | let%expect_test "of_rfc822_date military timezones" = 98 | let single_letter_tzs = 99 | List.init 26 ~f:(fun i -> Char.of_int_exn (Char.to_int 'A' + i) |> Char.to_string) 100 | |> List.filter ~f:(String.( <> ) "J") 101 | in 102 | let parsed = 103 | List.map single_letter_tzs ~f:(fun tz -> 104 | of_string_exn ("Fri, 03 Dec 2010 16:02:30 " ^ tz)) 105 | in 106 | print_string (Time.to_string_iso8601_basic (List.hd_exn parsed) ~zone:Time.Zone.utc); 107 | List.iter parsed ~f:(fun time -> require_equal (module Time) (List.hd_exn parsed) time); 108 | [%expect {| 2010-12-03T16:02:30.000000Z |}] 109 | ;; 110 | 111 | let%expect_test "of_rfc822_date fail semantically incorrect date" = 112 | show_raise (fun () -> of_string_exn "Fri, 39 Dec 2010 16:02:30 +0000 (GMT)"); 113 | [%expect 114 | {| 115 | (raised ( 116 | Invalid_argument 117 | "Date.create_exn ~y:2010 ~m:Dec ~d:39 error: 31 day month violation")) 118 | |}] 119 | ;; 120 | 121 | let%expect_test "of_rfc822_date fail bad format" = 122 | show_raise (fun () -> of_string_exn "bad format"); 123 | [%expect {| (raised (Failure "Failed to parse RFC822 date > day: skip")) |}] 124 | ;; 125 | 126 | let%expect_test "of_rfc822_date fail no TZinfo" = 127 | show_raise (fun () -> of_string_exn "Fri, 03 Dec 2010 16:02:30 (BST)"); 128 | [%expect {| (raised (Failure "Failed to parse RFC822 time zone: no more choices")) |}] 129 | ;; 130 | 131 | let%expect_test "of_rfc822_date semantically invalid UTC offset" = 132 | show_raise (fun () -> of_string_exn "Fri, 03 Dec 2010 16:02:30 -3600 (CST)"); 133 | [%expect {| (raised "The supplied UTC offset is semantically invalid.") |}] 134 | ;; 135 | 136 | let%expect_test "rfc822_date" = 137 | print_string (Email_date.rfc822_date Time.epoch); 138 | [%expect {| Wed, 31 Dec 1969 19:00:00 -0500 |}] 139 | ;; 140 | 141 | let%test_unit ("rfc822_date round-trip" [@tags "64-bits-only"]) = 142 | let open! Quickcheck.Let_syntax in 143 | Quickcheck.test 144 | (* Unfortunately we cannot use Time.quickcheck_generator since that generates times 145 | with sub-second precision and a RFC822 date is only precise to the second. 146 | 147 | unix time 3_000_000_000 is in the year 2065. 148 | *) 149 | (let%map seconds_since_epoch = Int64.gen_incl (-100L) 3_000_000_000L in 150 | Float.of_int64 seconds_since_epoch |> Time.Span.of_sec |> Time.of_span_since_epoch) 151 | ~sexp_of:[%sexp_of: Time.t] 152 | ~f:(fun time -> 153 | [%test_eq: Time.t] time (Email_date.rfc822_date time |> of_string_exn)) 154 | ;; 155 | -------------------------------------------------------------------------------- /test/test_email_date.mli: -------------------------------------------------------------------------------- 1 | (*_ This signature is deliberately empty. *) 2 | -------------------------------------------------------------------------------- /test/test_email_save.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | open Async 3 | open Email_message 4 | open Expect_test_helpers_async 5 | 6 | let save ?eol_except_raw_content str = 7 | let email = Email.of_string str in 8 | with_temp_dir (fun tmpdir -> 9 | let path = tmpdir ^/ "email" in 10 | let%bind () = Email.save ?eol_except_raw_content email path in 11 | let%bind on_disk = Reader.file_contents path in 12 | String.concat_map on_disk ~f:(fun c -> 13 | if Char.equal c '\r' then "\\r" else String.of_char c) 14 | |> printf "%s"; 15 | Deferred.unit) 16 | ;; 17 | 18 | let%expect_test "save" = 19 | let email_str = "A: B\nC: D\n\nLine 1\nLine 2" in 20 | let%bind () = save email_str in 21 | [%expect 22 | {| 23 | A: B 24 | C: D 25 | 26 | Line 1 27 | Line 2 28 | |}]; 29 | let%bind () = save ~eol_except_raw_content:`CRLF email_str in 30 | [%expect 31 | {| 32 | A: B\r 33 | C: D\r 34 | \r 35 | Line 1 36 | Line 2 37 | |}]; 38 | Deferred.unit 39 | ;; 40 | -------------------------------------------------------------------------------- /test/test_email_save.mli: -------------------------------------------------------------------------------- 1 | (*_ This signature is deliberately empty. *) 2 | -------------------------------------------------------------------------------- /test/test_email_simple.mli: -------------------------------------------------------------------------------- 1 | (*_ This signature is deliberately empty. *) 2 | -------------------------------------------------------------------------------- /test/test_encoded_word.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | open Email_message.Email_headers.Encoded_word 3 | 4 | let%expect_test "decode" = 5 | let test str = printf "%S\n" (decode str |> Or_error.ok_exn) in 6 | test "hello = there?!?"; 7 | [%expect {| "hello = there?!?" |}]; 8 | test "hello\n there\n\tagain\n my\nfriend"; 9 | [%expect {| "hello there\tagain my\nfriend" |}]; 10 | (* Some test vectors from: https://tools.ietf.org/html/rfc2047 *) 11 | test "=?ISO-8859-1?Q?a?="; 12 | [%expect {| "a" |}]; 13 | test "=?ISO-8859-1?Q?a?= b"; 14 | [%expect {| "a b" |}]; 15 | test "=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?="; 16 | [%expect {| "ab" |}]; 17 | test "=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?="; 18 | [%expect {| "ab" |}]; 19 | test "=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?="; 20 | [%expect {| "ab" |}]; 21 | test "=?ISO-8859-1?Q?a_b?="; 22 | [%expect {| "a b" |}]; 23 | test "=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?_b?="; 24 | [%expect {| "a b" |}]; 25 | test " =?US-ASCII?Q?Keith_Moore?= "; 26 | [%expect {| " Keith Moore " |}]; 27 | test " =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= "; 28 | [%expect {| " Keld J\248rn Simonsen " |}]; 29 | test " =?ISO-8859-1?Q?Andr=E9?= Pirard "; 30 | [%expect {| " Andr\233 Pirard " |}]; 31 | test 32 | "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\n\ 33 | =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?="; 34 | [%expect {| "If you can read this you understand the example." |}]; 35 | (* Big5 example *) 36 | test "=?Big5?B?pl7C0DogW0V4dGVybmFsXSBSZTogPDxKYW5lc3RyZWV0Pj4gVW5tYXRjaGVk?="; 37 | [%expect {| "\166^\194\208: [External] Re: <> Unmatched" |}]; 38 | (* a UTF8 Example *) 39 | test "=?UTF-8?B?SGkgVGhlcmVcIQo=?="; 40 | [%expect {| "Hi There\\!\n" |}]; 41 | (* mixed encodings *) 42 | test "Hello =?US-ASCII?Q?ascii?= =?ISO-8859-1?B?YmluYXJ5?= =?UTF-8?Q?world?="; 43 | [%expect {| "Hello asciibinaryworld" |}]; 44 | test "Ocaml =?UTF-8?B?8J+Qqg==?= Code"; 45 | [%expect {| "Ocaml \240\159\144\170 Code" |}] 46 | ;; 47 | 48 | let%expect_test "decode-with-charset" = 49 | let test str = 50 | let decoded = decode_with_charset str |> Or_error.ok_exn in 51 | print_s 52 | [%sexp 53 | (decoded 54 | : [ `Encoded of 55 | [ `Ascii 56 | | `Big5 57 | | `GB2312 58 | | `Latin1 59 | | `Latin2 60 | | `Utf8 61 | | `Windows1252 62 | | `KS_C_5601_1987 63 | ] 64 | * string 65 | | `Plain of string 66 | ] 67 | list)] 68 | in 69 | test "hello = there?!?"; 70 | [%expect {| ((Plain hello) (Plain " ") (Plain =) (Plain " ") (Plain there?!?)) |}]; 71 | test "hello\n there\n\tagain\n my\nfriend"; 72 | [%expect 73 | {| 74 | ((Plain hello) (Plain " ") (Plain there) (Plain "\t") (Plain again) 75 | (Plain " ") (Plain my) (Plain "\n") (Plain friend)) 76 | |}]; 77 | (* Some test vectors from: https://tools.ietf.org/html/rfc2047 *) 78 | test "=?ISO-8859-1?Q?a?="; 79 | [%expect {| ((Encoded (Latin1 a))) |}]; 80 | test "=?ISO-8859-1?Q?a?= b"; 81 | [%expect {| ((Encoded (Latin1 a)) (Plain " ") (Plain b)) |}]; 82 | test "=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?="; 83 | [%expect {| ((Encoded (Latin1 a)) (Encoded (Latin1 b))) |}]; 84 | test "=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?="; 85 | [%expect {| ((Encoded (Latin1 a)) (Encoded (Latin1 b))) |}]; 86 | test "=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?="; 87 | [%expect {| ((Encoded (Latin1 a)) (Encoded (Latin1 b))) |}]; 88 | test "=?ISO-8859-1?Q?a_b?="; 89 | [%expect {| ((Encoded (Latin1 "a b"))) |}]; 90 | test "=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?_b?="; 91 | [%expect {| ((Encoded (Latin1 a)) (Encoded (Latin1 " b"))) |}]; 92 | test " =?US-ASCII?Q?Keith_Moore?= "; 93 | [%expect 94 | {| 95 | ((Plain " ") (Encoded (Ascii "Keith Moore")) (Plain " ") 96 | (Plain )) 97 | |}]; 98 | test " =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= "; 99 | [%expect 100 | {| 101 | ((Plain " ") (Encoded (Latin1 "Keld J\248rn Simonsen")) (Plain " ") 102 | (Plain )) 103 | |}]; 104 | test " =?ISO-8859-1?Q?Andr=E9?= Pirard "; 105 | [%expect 106 | {| 107 | ((Plain " ") (Encoded (Latin1 "Andr\233")) (Plain " ") (Plain Pirard) 108 | (Plain " ") (Plain )) 109 | |}]; 110 | test 111 | "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\n\ 112 | =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?="; 113 | [%expect 114 | {| 115 | ((Encoded (Latin1 "If you can read this yo")) 116 | (Encoded (Latin2 "u understand the example."))) 117 | |}]; 118 | (* Big5 example *) 119 | test "=?Big5?B?pl7C0DogW0V4dGVybmFsXSBSZTogPDxKYW5lc3RyZWV0Pj4gVW5tYXRjaGVk?="; 120 | [%expect 121 | {| ((Encoded (Big5 "\166^\194\208: [External] Re: <> Unmatched"))) |}]; 122 | (* a UTF8 Example *) 123 | test "=?UTF-8?B?SGkgVGhlcmVcIQo=?="; 124 | [%expect {| ((Encoded (Utf8 "Hi There\\!\n"))) |}]; 125 | (* mixed encodings *) 126 | test "Hello =?US-ASCII?Q?ascii?= =?ISO-8859-1?B?YmluYXJ5?= =?UTF-8?Q?world?="; 127 | [%expect 128 | {| 129 | ((Plain Hello) (Plain " ") (Encoded (Ascii ascii)) (Encoded (Latin1 binary)) 130 | (Encoded (Utf8 world))) 131 | |}]; 132 | test "Ocaml =?UTF-8?B?8J+Qqg==?= Code"; 133 | [%expect 134 | {| 135 | ((Plain Ocaml) (Plain " ") (Encoded (Utf8 "\240\159\144\170")) (Plain " ") 136 | (Plain Code)) 137 | |}] 138 | ;; 139 | 140 | let%expect_test _ = 141 | let test str = printf "%S\n" (decode str |> Or_error.ok_exn) in 142 | (* The RFC is pretty clear in saying that spaces inside the encode word are not supported. 143 | Yet we've seen some incorrectly encoded headers in the wild. 144 | 145 | Adding this test to document that the current behaviour is correct. 146 | *) 147 | test 148 | "=?UTF-8?Q?encoded word with spaces ?= =?UTF-8?Q?and new\n\ 149 | lines?= =?UTF-8?Q?should=20not=20be=20decoded?="; 150 | [%expect 151 | {| "=?UTF-8?Q?encoded word with spaces ?= =?UTF-8?Q?and new\nlines?= should not be decoded" |}] 152 | ;; 153 | -------------------------------------------------------------------------------- /test/test_encoded_word.mli: -------------------------------------------------------------------------------- 1 | (*_ This signature is deliberately empty. *) 2 | -------------------------------------------------------------------------------- /test/test_headers.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | open Email_message 3 | module Expect_test_config = Core.Expect_test_config 4 | 5 | let%expect_test "whitespace" = 6 | let headers = 7 | Email_headers.of_list 8 | ~normalize:`None 9 | [ "header1", "1" 10 | ; "header2", " 2" 11 | ; "header3", "=?ISO-8859-1?Q?a?=" 12 | ; "header4", " =?ISO-8859-1?Q?a?=" 13 | ; "header5", " =?ISO-8859-1?Q?a?= \n =?ISO-8859-1?Q?b?= \n c" 14 | ] 15 | in 16 | let print_headers ~normalize = 17 | Email_headers.to_list ~normalize headers 18 | |> List.iter ~f:(fun (name, value) -> printf "|%s|%s|\n" name value) 19 | in 20 | print_headers ~normalize:`None; 21 | [%expect 22 | {| 23 | |header1|1| 24 | |header2| 2| 25 | |header3|=?ISO-8859-1?Q?a?=| 26 | |header4| =?ISO-8859-1?Q?a?=| 27 | |header5| =?ISO-8859-1?Q?a?= 28 | =?ISO-8859-1?Q?b?= 29 | c| 30 | |}]; 31 | print_headers ~normalize:`Whitespace; 32 | [%expect 33 | {| 34 | |header1|1| 35 | |header2|2| 36 | |header3|=?ISO-8859-1?Q?a?=| 37 | |header4|=?ISO-8859-1?Q?a?=| 38 | |header5|=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?= c| 39 | |}]; 40 | print_headers 41 | ~normalize:(`Whitespace_and_encoding (`Any_charset, `Pretend_all_charsets_are_same)); 42 | [%expect 43 | {| 44 | |header1|1| 45 | |header2|2| 46 | |header3|a| 47 | |header4|a| 48 | |header5|ab c| 49 | |}] 50 | ;; 51 | -------------------------------------------------------------------------------- /test/test_headers.mli: -------------------------------------------------------------------------------- 1 | (*_ This signature is deliberately empty. *) 2 | -------------------------------------------------------------------------------- /test/test_media_type.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | open Async 3 | open Email_message 4 | open Email_message.Private 5 | open Expect_test_helpers_core 6 | open Media_type 7 | 8 | let from_headers headers = 9 | let result = Option.value_exn (from_headers headers) in 10 | print_s [%message "" ~_:(result : t)] 11 | ;; 12 | 13 | let%expect_test "[Media_type.last]" = 14 | let headers = 15 | [ "Content-Type", "multipart/mixed;\nboundary=\"BOUNDARY\"" ] 16 | |> Email_headers.of_list ~normalize:`Whitespace 17 | in 18 | from_headers headers; 19 | [%expect 20 | {| 21 | ((mime_type multipart) 22 | (mime_subtype mixed) 23 | (params ((boundary BOUNDARY)))) 24 | |}]; 25 | return () 26 | ;; 27 | -------------------------------------------------------------------------------- /test/test_media_type.mli: -------------------------------------------------------------------------------- 1 | (*_ This signature is deliberately empty. *) 2 | -------------------------------------------------------------------------------- /test/test_octet_stream.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | open Async 3 | open Email_message 4 | open Octet_stream 5 | 6 | let test ~encoding plaintext = 7 | let coded = encode ~encoding (Bigstring_shared.of_string plaintext) in 8 | printf "%s" (Octet_stream.encoded_contents_string coded); 9 | let decoded = decode coded |> Option.value_exn |> Bigstring_shared.to_string in 10 | [%test_result: string] decoded ~expect:plaintext 11 | ;; 12 | 13 | let%expect_test "base64" = 14 | let test = test ~encoding:`Base64 in 15 | test "any carnal pleasure."; 16 | [%expect {| YW55IGNhcm5hbCBwbGVhc3VyZS4= |}]; 17 | test "any carnal pleasure"; 18 | [%expect {| YW55IGNhcm5hbCBwbGVhc3VyZQ== |}]; 19 | test "any carnal pleasur"; 20 | [%expect {| YW55IGNhcm5hbCBwbGVhc3Vy |}]; 21 | test "any carnal pleasu"; 22 | [%expect {| YW55IGNhcm5hbCBwbGVhc3U= |}]; 23 | test "any carnal pleas"; 24 | [%expect {| YW55IGNhcm5hbCBwbGVhcw== |}]; 25 | test 26 | "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor \ 27 | incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud \ 28 | exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute \ 29 | irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla \ 30 | pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia \ 31 | deserunt mollit anim id est laborum."; 32 | [%expect 33 | {| 34 | TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg 35 | c2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu 36 | YSBhbGlxdWEuIFV0IGVuaW0gYWQgbWluaW0gdmVuaWFtLCBxdWlzIG5vc3RydWQgZXhlcmNpdGF0 37 | aW9uIHVsbGFtY28gbGFib3JpcyBuaXNpIHV0IGFsaXF1aXAgZXggZWEgY29tbW9kbyBjb25zZXF1 38 | YXQuIER1aXMgYXV0ZSBpcnVyZSBkb2xvciBpbiByZXByZWhlbmRlcml0IGluIHZvbHVwdGF0ZSB2 39 | ZWxpdCBlc3NlIGNpbGx1bSBkb2xvcmUgZXUgZnVnaWF0IG51bGxhIHBhcmlhdHVyLiBFeGNlcHRl 40 | dXIgc2ludCBvY2NhZWNhdCBjdXBpZGF0YXQgbm9uIHByb2lkZW50LCBzdW50IGluIGN1bHBhIHF1 41 | aSBvZmZpY2lhIGRlc2VydW50IG1vbGxpdCBhbmltIGlkIGVzdCBsYWJvcnVtLg== 42 | |}]; 43 | return () 44 | ;; 45 | 46 | let%expect_test "quoted printable" = 47 | let test = test ~encoding:`Quoted_printable in 48 | test 49 | "If you believe that truth=beauty, then surely mathematics is the most beautiful \ 50 | branch of philosophy."; 51 | [%expect 52 | {| 53 | If you believe that truth=3Dbeauty, then surely mathematics is the most = 54 | beautiful branch of philosophy. 55 | |}]; 56 | test "\000\001\002a\003 \n"; 57 | [%expect {| =00=01=02a=03 =20 |}]; 58 | test "\000\001\002a\003 \n"; 59 | [%expect {| =00=01=02a=03 =20 |}]; 60 | test "\000\001\002a\003 \n"; 61 | [%expect {| =00=01=02a=03 =20 |}]; 62 | test "\000\001\002a\003 \n"; 63 | [%expect {| =00=01=02a=03 =20 |}]; 64 | test "This text is fairly long and should be wrapped by a conforming implementation."; 65 | [%expect 66 | {| 67 | This text is fairly long and should be wrapped by a conforming = 68 | implementation. 69 | |}]; 70 | test 71 | "123456789A123456789B123456789C123456789D123456789E123456789F123456789G123456789H123456789I"; 72 | [%expect 73 | {| 74 | 123456789A123456789B123456789C123456789D123456789E123456789F123456789G12345= 75 | 6789H123456789I 76 | |}]; 77 | test 78 | "123456789A123456789B123456789C123456789D123456789E123456789F123456789G=23456789H123456789I"; 79 | [%expect 80 | {| 81 | 123456789A123456789B123456789C123456789D123456789E123456789F123456789G=3D23= 82 | 456789H123456789I 83 | |}]; 84 | return () 85 | ;; 86 | -------------------------------------------------------------------------------- /test/test_octet_stream.mli: -------------------------------------------------------------------------------- 1 | (*_ This signature is deliberately empty. *) 2 | -------------------------------------------------------------------------------- /test/test_related_parts.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | open Async 3 | open Email_message 4 | 5 | let test_email = 6 | (* This is the test email from https://tools.ietf.org/html/rfc2392 except I had to 7 | remove the "type=Text/HTML" from the top-most "Content-Type" header. *) 8 | String.strip 9 | {| 10 | From: foo1@bar.net 11 | To: foo2@bar.net 12 | Subject: A simple example 13 | Mime-Version: 1.0 14 | Content-Type: multipart/related; boundary="boundary-example-1" 15 | --boundary-example 1 16 | Content-Type: Text/HTML; charset=US-ASCII 17 | 18 | to the other body part, for example through a statement such as: 19 | IETF logo 20 | 21 | --boundary-example-1 22 | Content-ID: 23 | Content-Type: IMAGE/GIF 24 | Content-Transfer-Encoding: BASE64 25 | 26 | R0lGODlhGAGgAPEAAP/////ZRaCgoAAAACH+PUNvcHlyaWdodCAoQykgMTk5 27 | NSBJRVRGLiBVbmF1dGhvcml6ZWQgZHVwbGljYXRpb24gcHJvaGliaXRlZC4A 28 | etc... 29 | 30 | --boundary-example-1-- 31 | |} 32 | ;; 33 | 34 | let%expect_test _ = 35 | let email = Email.of_string test_email in 36 | Email.Simple.all_related_parts email 37 | |> [%sexp_of: (string * Email.Simple.Content.t) list] 38 | |> print_s; 39 | [%expect 40 | {| 41 | ((foo4*foo1@bar.net 42 | ((headers 43 | ((Content-ID " ") (Content-Type " IMAGE/GIF") 44 | (Content-Transfer-Encoding " BASE64"))) 45 | (raw_content 46 | ( "R0lGODlhGAGgAPEAAP/////ZRaCgoAAAACH+PUNvcHlyaWdodCAoQykgMTk5\ 47 | \nNSBJRVRGLiBVbmF1dGhvcml6ZWQgZHVwbGljYXRpb24gcHJvaGliaXRlZC4A\ 48 | \netc...\ 49 | \n"))))) 50 | |}]; 51 | return () 52 | ;; 53 | -------------------------------------------------------------------------------- /test/test_related_parts.mli: -------------------------------------------------------------------------------- 1 | (*_ This signature is deliberately empty. *) 2 | -------------------------------------------------------------------------------- /test/test_rfc.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | open Async 3 | open Email_message.Private 4 | open Expect_test_helpers_core 5 | open Rfc.RFC2045.Token 6 | 7 | let%expect_test "RFC2045.Token" = 8 | let is_valid_or_quote strs = 9 | let results = List.map strs ~f:(fun str -> str, is_valid_or_quote str) in 10 | print_s [%message "" ~_:(results : (string * string) list)] 11 | in 12 | is_valid_or_quote [ "abcdefghijkl"; "abc=dka"; ""; "\"" ]; 13 | [%expect 14 | {| 15 | ((abcdefghijkl abcdefghijkl) 16 | (abc=dka "\"abc=dka\"") 17 | ("" "\"\"") 18 | ("\"" "\"\\\"\"")) 19 | |}]; 20 | let is_valid strs = 21 | let results = List.map strs ~f:(fun str -> str, `Is_valid (is_valid str)) in 22 | print_s [%message "" ~_:(results : (string * [ `Is_valid of bool ]) list)] 23 | in 24 | is_valid 25 | [ "abcdefghijkl" 26 | ; "LoremIpsum" 27 | ; "3.141" 28 | ; "---" 29 | ; "a" 30 | ; "abc=dka" 31 | ; "" 32 | ; "\"" 33 | ; "@" 34 | ; "," 35 | ; ":" 36 | ; ";" 37 | ; "\x00" 38 | ; "\x7F" 39 | ; "\x80" 40 | ]; 41 | [%expect 42 | {| 43 | ((abcdefghijkl (Is_valid true)) 44 | (LoremIpsum (Is_valid true)) 45 | (3.141 (Is_valid true)) 46 | (--- (Is_valid true)) 47 | (a (Is_valid true)) 48 | (abc=dka (Is_valid false)) 49 | ("" (Is_valid false)) 50 | ("\"" (Is_valid false)) 51 | (@ (Is_valid false)) 52 | (, (Is_valid false)) 53 | (: (Is_valid false)) 54 | (";" (Is_valid false)) 55 | ("\000" (Is_valid false)) 56 | ("\127" (Is_valid false)) 57 | ("\128" (Is_valid false))) 58 | |}]; 59 | return () 60 | ;; 61 | -------------------------------------------------------------------------------- /test/test_rfc.mli: -------------------------------------------------------------------------------- 1 | (*_ This signature is deliberately empty. *) 2 | --------------------------------------------------------------------------------