├── .gitignore
├── .ocamlformat
├── CHANGES.md
├── CONTRIBUTING.md
├── LICENSE.md
├── Makefile
├── README.md
├── dune-project
├── email_address
    └── src
    │   ├── dune
    │   ├── email_address.ml
    │   ├── email_address.mli
    │   ├── email_address_parser_stable_v1.ml
    │   └── email_address_parser_stable_v1.mli
├── email_message.opam
├── kernel
    └── src
    │   ├── bigstring_shared.ml
    │   ├── bigstring_shared.mli
    │   ├── boundary.ml
    │   ├── boundary.mli
    │   ├── dune
    │   ├── email.ml
    │   ├── email.mli
    │   ├── email_content.ml
    │   ├── email_content.mli
    │   ├── email_grammar.mly
    │   ├── email_grammar_types.ml
    │   ├── email_grammar_types.mli
    │   ├── email_intf.ml
    │   ├── email_lexer.mli
    │   ├── email_lexer.mll
    │   ├── email_lexer_state.ml
    │   ├── email_lexer_state.mli
    │   ├── email_message_kernel.ml
    │   ├── email_raw_content.ml
    │   ├── email_raw_content.mli
    │   ├── email_simple.ml
    │   ├── email_simple.mli
    │   ├── email_simple_intf.ml
    │   ├── encoded_word.ml
    │   ├── encoded_word.mli
    │   ├── headers.ml
    │   ├── headers.mli
    │   ├── lf_or_crlf.ml
    │   ├── lf_or_crlf.mli
    │   ├── magic_mime_external.ml
    │   ├── media_type.ml
    │   ├── media_type.mli
    │   ├── media_type_grammar.mly
    │   ├── media_type_grammar_types.ml
    │   ├── media_type_grammar_types.mli
    │   ├── media_type_lexer.mli
    │   ├── media_type_lexer.mll
    │   ├── mimestring.ml
    │   ├── mimestring.mli
    │   ├── octet_stream.ml
    │   ├── octet_stream.mli
    │   ├── quoted_printable_lexer.mli
    │   ├── quoted_printable_lexer.mll
    │   ├── rfc.ml
    │   ├── rfc.mli
    │   ├── string_monoid.ml
    │   ├── string_monoid.mli
    │   ├── string_monoid_intf.ml
    │   ├── string_monoidable.ml
    │   └── string_monoidable.mli
├── src
    ├── crypto.ml
    ├── dune
    ├── email.ml
    ├── email.mli
    ├── email_date.ml
    ├── email_date.mli
    ├── email_message.ml
    ├── email_simple.ml
    ├── email_simple.mli
    ├── import.ml
    ├── selector.ml
    ├── selector.mli
    ├── string_monoid.ml
    ├── string_monoid.mli
    ├── wrapper.ml
    └── wrapper.mli
└── test
    ├── dune
    ├── email_message_test.ml
    ├── test_boundary.ml
    ├── test_boundary.mli
    ├── test_email.ml
    ├── test_email.mli
    ├── test_email_content.ml
    ├── test_email_content.mli
    ├── test_email_date.ml
    ├── test_email_date.mli
    ├── test_email_save.ml
    ├── test_email_save.mli
    ├── test_email_simple.ml
    ├── test_email_simple.mli
    ├── test_encoded_word.ml
    ├── test_encoded_word.mli
    ├── test_headers.ml
    ├── test_headers.mli
    ├── test_media_type.ml
    ├── test_media_type.mli
    ├── test_octet_stream.ml
    ├── test_octet_stream.mli
    ├── test_related_parts.ml
    ├── test_related_parts.mli
    ├── test_rfc.ml
    └── test_rfc.mli


/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 | *.install
3 | *.merlin
4 | _opam
5 | 
6 | 


--------------------------------------------------------------------------------
/.ocamlformat:
--------------------------------------------------------------------------------
1 | profile=janestreet
2 | 


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
 1 | ## Release v0.17.0
 2 | 
 3 | - `Email_address` module now has a `to_string_utf8` function to generate a UTF-8 encoded
 4 |   representation of an email address
 5 | 
 6 | - Split [Email_message] into [Email_message] and [Email_message_kernel]. The
 7 |   [Email_message_kernel] library is compatible with JS of OCaml.
 8 |   
 9 | - Tweak the normalization of encoded header values. We previously decoded before
10 |   normalizing, now we normalize before decoding.
11 |   
12 | - Improve parsing of email addresses to catch more invalid addresses as errors
13 | 
14 | ## Release v0.16.0
15 | 
16 | - `Email_simple.all_attachments` and `Email_simple.map_attachments`: Added optional
17 |   `include_inline_parts` parameter to control whether to attempt to interpret inline
18 |    parts as attachments
19 | 
20 | - `Email_date.rfc822_date` now accepts an optional `zone` parameter which determines what
21 |    timezone to use in the output string
22 | 
23 | ## Old pre-v0.15 changelogs (very likely stale and incomplete)
24 | 
25 | ## 113.24.00
26 | 
27 | - Bugfixes and minor API improvements.
28 | 
29 | ## 113.00.00
30 | 
31 | - Extended and improved Email_message API.
32 | 
33 | ## 112.17.00
34 | 
35 | Moved from janestreet-alpha
36 | 
37 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | This repository contains open source software that is developed and
 2 | maintained by [Jane Street][js].
 3 | 
 4 | Contributions to this project are welcome and should be submitted via
 5 | GitHub pull requests.
 6 | 
 7 | Signing contributions
 8 | ---------------------
 9 | 
10 | We require that you sign your contributions. Your signature certifies
11 | that you wrote the patch or otherwise have the right to pass it on as
12 | an open-source patch. The rules are pretty simple: if you can certify
13 | the below (from [developercertificate.org][dco]):
14 | 
15 | ```
16 | Developer Certificate of Origin
17 | Version 1.1
18 | 
19 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
20 | 1 Letterman Drive
21 | Suite D4700
22 | San Francisco, CA, 94129
23 | 
24 | Everyone is permitted to copy and distribute verbatim copies of this
25 | license document, but changing it is not allowed.
26 | 
27 | 
28 | Developer's Certificate of Origin 1.1
29 | 
30 | By making a contribution to this project, I certify that:
31 | 
32 | (a) The contribution was created in whole or in part by me and I
33 |     have the right to submit it under the open source license
34 |     indicated in the file; or
35 | 
36 | (b) The contribution is based upon previous work that, to the best
37 |     of my knowledge, is covered under an appropriate open source
38 |     license and I have the right under that license to submit that
39 |     work with modifications, whether created in whole or in part
40 |     by me, under the same open source license (unless I am
41 |     permitted to submit under a different license), as indicated
42 |     in the file; or
43 | 
44 | (c) The contribution was provided directly to me by some other
45 |     person who certified (a), (b) or (c) and I have not modified
46 |     it.
47 | 
48 | (d) I understand and agree that this project and the contribution
49 |     are public and that a record of the contribution (including all
50 |     personal information I submit with it, including my sign-off) is
51 |     maintained indefinitely and may be redistributed consistent with
52 |     this project or the open source license(s) involved.
53 | ```
54 | 
55 | Then you just add a line to every git commit message:
56 | 
57 | ```
58 | Signed-off-by: Joe Smith <joe.smith@email.com>
59 | ```
60 | 
61 | Use your real name (sorry, no pseudonyms or anonymous contributions.)
62 | 
63 | If you set your `user.name` and `user.email` git configs, you can sign
64 | your commit automatically with git commit -s.
65 | 
66 | [dco]: http://developercertificate.org/
67 | [js]: https://opensource.janestreet.com/
68 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2005--2025 Jane Street Group, LLC <opensource-contacts@janestreet.com>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | INSTALL_ARGS := $(if $(PREFIX),--prefix $(PREFIX),)
 2 | 
 3 | default:
 4 | 	dune build
 5 | 
 6 | install:
 7 | 	dune install $(INSTALL_ARGS)
 8 | 
 9 | uninstall:
10 | 	dune uninstall $(INSTALL_ARGS)
11 | 
12 | reinstall: uninstall install
13 | 
14 | clean:
15 | 	dune clean
16 | 
17 | .PHONY: default install uninstall reinstall clean
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | "Email Message"
 2 | ===============
 3 | 
 4 | `email_message` is a library that defines types that represent an RFC2822 email.
 5 | 
 6 | # Parsing an email
 7 | You can use `Email.of_string` to parse an email (e.g. from a ".eml" file).
 8 | 
 9 | Once you have your hands on an `Email.t`, you can use various functions in the
10 | `Email.Simple` and `Email` modules to inspect parts of the email. For example:
11 | 
12 | - `Email.Simple.subject` : the subject of the email
13 | - `Email.Simple.from` : the From header sender of the email
14 | - `Email.headers` : all the email headers
15 | - `Email.Simple.all_attachments` the email attachments
16 | 
17 | # Constructing an email
18 | The `Email.Simple` module exposes various functions for constructing an email. For example:
19 | 
20 | ```ocaml
21 | Email.Simple.create
22 |   ~from:(Email_address.of_string "sender@example.com")
23 |   ~to_:[ (Email_address.of_string "recipient@example.com") ]
24 |   ~subject:"Example email"
25 |   (Email.Simple.Content.text_utf8 "This is an example email.")
26 | ```
27 | 


--------------------------------------------------------------------------------
/dune-project:
--------------------------------------------------------------------------------
1 | (lang dune 3.17)
2 | 


--------------------------------------------------------------------------------
/email_address/src/dune:
--------------------------------------------------------------------------------
1 | (library
2 |  (name email_address)
3 |  (public_name email_message.email_address)
4 |  (libraries angstrom core base64)
5 |  (preprocess
6 |   (pps ppx_jane)))
7 | 


--------------------------------------------------------------------------------
/email_address/src/email_address.ml:
--------------------------------------------------------------------------------
  1 | open! Core.Core_stable
  2 | open! Core.Int.Replace_polymorphic_compare
  3 | 
  4 | module Stable_caseless_string = struct
  5 |   module U = Core.String.Caseless
  6 | 
  7 |   module V1 = struct
  8 |     module T = struct
  9 |       type t = String.V1.t [@@deriving sexp, bin_io, stable_witness, equal]
 10 | 
 11 |       let compare = U.compare
 12 |       let hash_fold_t = U.hash_fold_t
 13 | 
 14 |       type comparator_witness = U.comparator_witness
 15 | 
 16 |       let comparator = U.comparator
 17 |     end
 18 | 
 19 |     include T
 20 |     include Comparable.V1.With_stable_witness.Make (T)
 21 |   end
 22 | end
 23 | 
 24 | module Stable = struct
 25 |   module Domain = struct
 26 |     module V1 = Stable_caseless_string.V1
 27 |   end
 28 | 
 29 |   module V1 = struct
 30 |     module T = struct
 31 |       type t = Email_address_parser_stable_v1.t =
 32 |         { (* [prefix = None] means no brackets. *)
 33 |           prefix : String.V1.t option [@compare.ignore] [@hash.ignore]
 34 |         ; local_part : String.V1.t
 35 |         ; domain : Domain.V1.t option
 36 |         }
 37 |       [@@deriving fields ~getters, compare, hash, stable_witness]
 38 | 
 39 |       let equal = [%compare.equal: t]
 40 |       let create ?prefix ?domain local_part = { prefix; local_part; domain }
 41 | 
 42 |       let with_default_domain email ~default_domain =
 43 |         { email with domain = Core.Option.first_some email.domain default_domain }
 44 |       ;;
 45 | 
 46 |       let of_string ?default_domain input_str =
 47 |         let open Core in
 48 |         let open! Int.Replace_polymorphic_compare in
 49 |         match
 50 |           Angstrom.parse_string
 51 |             ~consume:Prefix
 52 |             Email_address_parser_stable_v1.email_only
 53 |             input_str
 54 |         with
 55 |         | Error error ->
 56 |           Or_error.error_s
 57 |             [%message
 58 |               "Failed to parse email address" (error : string) (input_str : string)]
 59 |         | Ok email -> Or_error.return (with_default_domain email ~default_domain)
 60 |       ;;
 61 | 
 62 |       let of_string_exn ?default_domain input_str =
 63 |         of_string ?default_domain input_str |> Core.Or_error.ok_exn
 64 |       ;;
 65 | 
 66 |       let compose ~prefix ~address_part =
 67 |         match prefix with
 68 |         | None -> address_part
 69 |         | Some prefix -> Core.sprintf "%s<%s>" prefix address_part
 70 |       ;;
 71 | 
 72 |       let compose_utf8 ~prefix ~address_part =
 73 |         match prefix with
 74 |         | None -> address_part
 75 |         | Some prefix ->
 76 |           let encoded_prefix = Base64.encode_string prefix in
 77 |           let prefix_utf8 = Core.sprintf "=?%s?B?%s?=" "UTF-8" encoded_prefix in
 78 |           Core.sprintf "%s<%s>" prefix_utf8 address_part
 79 |       ;;
 80 | 
 81 |       let to_string t =
 82 |         let address_part =
 83 |           match t.domain with
 84 |           | None -> t.local_part
 85 |           | Some domain -> Core.sprintf "%s@%s" t.local_part domain
 86 |         in
 87 |         compose ~prefix:t.prefix ~address_part
 88 |       ;;
 89 | 
 90 |       let to_string_utf8 t =
 91 |         let address_part =
 92 |           match t.domain with
 93 |           | None -> t.local_part
 94 |           | Some domain -> Core.sprintf "%s@%s" t.local_part domain
 95 |         in
 96 |         compose_utf8 ~prefix:t.prefix ~address_part
 97 |       ;;
 98 | 
 99 |       include Sexpable.Of_stringable.V1 (struct
100 |           type nonrec t = t
101 | 
102 |           let to_string = to_string
103 |           let of_string s = of_string_exn s
104 |         end)
105 | 
106 |       let t_sexp_grammar = Sexplib0.Sexp_grammar.coerce [%sexp_grammar: string]
107 | 
108 |       include Binable.Of_stringable.V1 [@alert "-legacy"] (struct
109 |           type nonrec t = t
110 | 
111 |           let to_string = to_string
112 |           let of_string s = of_string_exn s
113 |         end)
114 |     end
115 | 
116 |     module With_comparator = struct
117 |       include T
118 |       include Comparator.V1.Make (T)
119 |     end
120 | 
121 |     include With_comparator
122 |     include Comparable.V1.With_stable_witness.Make (With_comparator)
123 |   end
124 | end
125 | 
126 | open Core
127 | open! Int.Replace_polymorphic_compare
128 | 
129 | module Domain = struct
130 |   include String.Caseless
131 | 
132 |   let to_string = Fn.id
133 |   let of_string = Fn.id
134 | end
135 | 
136 | module T = Stable.V1.With_comparator
137 | include T
138 | 
139 | let list_of_string ?default_domain input_str =
140 |   match
141 |     Angstrom.parse_string
142 |       ~consume:Prefix
143 |       Email_address_parser_stable_v1.email_list_only
144 |       input_str
145 |   with
146 |   | Error error ->
147 |     Or_error.error_s
148 |       [%message "Failed to parse email address(es)" (error : string) (input_str : string)]
149 |   | Ok emails ->
150 |     Or_error.return (List.map ~f:(with_default_domain ~default_domain) emails)
151 | ;;
152 | 
153 | let list_of_string_exn ?default_domain input_str =
154 |   list_of_string ?default_domain input_str |> Core.Or_error.ok_exn
155 | ;;
156 | 
157 | let list_to_header_value ts = String.concat ~sep:",\n\t" (List.map ts ~f:to_string)
158 | 
159 | let address_part ?(brackets = false) ?(lowercase_domain = false) t =
160 |   let prefix = if brackets then Some "" else None in
161 |   let domain =
162 |     if not lowercase_domain then t.domain else Option.map t.domain ~f:String.lowercase
163 |   in
164 |   { t with prefix; domain }
165 | ;;
166 | 
167 | let address_part_string ?brackets ?lowercase_domain t =
168 |   to_string (address_part ?brackets ?lowercase_domain t)
169 | ;;
170 | 
171 | let set_address_part t address_part = of_string (compose ~prefix:t.prefix ~address_part)
172 | let set_local_part t local_part = { t with local_part }
173 | let set_domain t domain = { t with domain }
174 | let set_prefix t prefix = { t with prefix }
175 | let arg_type = Command.Arg_type.create of_string_exn
176 | 
177 | include Comparable.Make_plain_using_comparator (T)
178 | include Hashable.Make_plain (T)
179 | 
180 | module Expert = struct
181 |   module Parser = Email_address_parser_stable_v1
182 | end
183 | 
184 | module For_test = struct
185 |   type t = Email_address_parser_stable_v1.t [@@deriving sexp_of]
186 | end
187 | 
188 | module Caseless = struct
189 |   module T = struct
190 |     type nonrec t = t =
191 |       { prefix : String.Stable.V1.t option [@compare.ignore] [@hash.ignore]
192 |       ; local_part : Stable_caseless_string.V1.t
193 |       ; domain : Stable_caseless_string.V1.t option
194 |       }
195 |     [@@deriving compare, hash, sexp]
196 |   end
197 | 
198 |   include T
199 |   include Hashable.Make_plain (T)
200 |   include Comparable.Make_plain (T)
201 | end
202 | 


--------------------------------------------------------------------------------
/email_address/src/email_address.mli:
--------------------------------------------------------------------------------
  1 | open Core
  2 | 
  3 | module Domain : sig
  4 |   type t = String.Caseless.t [@@deriving sexp_of, compare]
  5 | 
  6 |   include Stringable with type t := t
  7 | 
  8 |   include
  9 |     Comparable.S_plain
 10 |     with type t := t
 11 |      and type comparator_witness = String.Caseless.comparator_witness
 12 | 
 13 |   include Hashable.S_plain with type t := t
 14 | end
 15 | 
 16 | type t [@@deriving sexp_of, sexp_grammar, compare, hash]
 17 | 
 18 | val create : ?prefix:string -> ?domain:Domain.t -> string -> t
 19 | val of_string : ?default_domain:string -> string -> t Or_error.t
 20 | val of_string_exn : ?default_domain:string -> string -> t
 21 | val list_of_string : ?default_domain:string -> string -> t list Or_error.t
 22 | val list_of_string_exn : ?default_domain:string -> string -> t list
 23 | val to_string : t -> string
 24 | val to_string_utf8 : t -> string
 25 | val list_to_header_value : t list -> string
 26 | val local_part : t -> string
 27 | val set_local_part : t -> string -> t
 28 | val domain : t -> Domain.t option
 29 | val set_domain : t -> Domain.t option -> t
 30 | 
 31 | val address_part
 32 |   :  ?brackets:bool (** default: [false] *)
 33 |   -> ?lowercase_domain:bool
 34 |   -> t
 35 |   -> t
 36 | 
 37 | val address_part_string
 38 |   :  ?brackets:bool (** default: [false] *)
 39 |   -> ?lowercase_domain:bool
 40 |   -> t
 41 |   -> string
 42 | 
 43 | (** [set_address_part] expects an email address without prefix or angle brackets e.g.
 44 |     USER@DOMAIN. *)
 45 | val set_address_part : t -> string -> t Or_error.t
 46 | 
 47 | val prefix : t -> string option
 48 | 
 49 | (** [set_prefix] will remove angle brackets if given [None], otherwise angle brackets are
 50 |     added before the given prefix. *)
 51 | val set_prefix : t -> string option -> t
 52 | 
 53 | val arg_type : t Command.Arg_type.t
 54 | 
 55 | (* Hash and comparisons are based on the address part (local_part + domain)
 56 |    only. *)
 57 | 
 58 | include Comparable.S_plain with type t := t
 59 | include Hashable.S_plain with type t := t
 60 | 
 61 | module Caseless : sig
 62 |   type nonrec t = t [@@deriving sexp_of, compare, hash]
 63 | 
 64 |   include Comparable.S_plain with type t := t
 65 |   include Hashable.S_plain with type t := t
 66 | end
 67 | 
 68 | module For_test : sig
 69 |   type nonrec t = t [@@deriving sexp_of]
 70 | end
 71 | 
 72 | module Expert : sig
 73 |   module Parser : sig
 74 |     val email : t Angstrom.t
 75 |     val skip_whitespace : unit Angstrom.t
 76 |     val prefix : string Angstrom.t
 77 |   end
 78 | end
 79 | 
 80 | module Stable : sig
 81 |   module V1 : sig
 82 |     type nonrec t = t [@@deriving bin_io, hash, sexp_grammar, stable_witness, equal]
 83 | 
 84 |     include
 85 |       Stable_comparable.With_stable_witness.V1
 86 |       with type t := t
 87 |       with type comparator_witness = comparator_witness
 88 | 
 89 |     val of_string_exn : ?default_domain:string -> string -> t
 90 |     val to_string : t -> string
 91 |   end
 92 | 
 93 |   module Domain : sig
 94 |     module V1 :
 95 |       Stable_comparable.With_stable_witness.V1
 96 |       with type t := Domain.t
 97 |       with type comparator_witness = Domain.comparator_witness
 98 |   end
 99 | end
100 | 


--------------------------------------------------------------------------------
/email_address/src/email_address_parser_stable_v1.ml:
--------------------------------------------------------------------------------
  1 | open! Core
  2 | open! Core.Int.Replace_polymorphic_compare
  3 | open Angstrom
  4 | open Angstrom.Let_syntax
  5 | 
  6 | type t =
  7 |   { prefix : string option
  8 |   ; local_part : string
  9 |   ; domain : string option
 10 |   }
 11 | [@@deriving sexp_of]
 12 | 
 13 | let is_whitespace_char = function
 14 |   | ' ' | '\r' | '\n' | '\t' -> true
 15 |   | _ -> false
 16 | ;;
 17 | 
 18 | let is_unquoted_prefix_char = function
 19 |   | '<' | '>' | '@' | ',' | '"' -> false
 20 |   | _ -> true
 21 | ;;
 22 | 
 23 | let skip_whitespace = skip_while is_whitespace_char
 24 | let skip_while1 f = satisfy f *> skip_while f
 25 | 
 26 | let skip_satisfied_or_escaped1 f =
 27 |   skip_many1 (char '\\' *> advance 1 <|> skip_while1 (fun c -> Char.( <> ) '\\' c && f c))
 28 | ;;
 29 | 
 30 | let quoted_prefix : unit Angstrom.t =
 31 |   let not_a_quote = Char.( <> ) '"' in
 32 |   char '"' *> option () (skip_satisfied_or_escaped1 not_a_quote) <* char '"'
 33 | ;;
 34 | 
 35 | let unquoted_prefix : unit Angstrom.t =
 36 |   satisfy (fun c -> is_unquoted_prefix_char c && not (is_whitespace_char c))
 37 |   *> skip_while is_unquoted_prefix_char
 38 | ;;
 39 | 
 40 | let prefix : string Angstrom.t =
 41 |   (* Includes the whitespace. *)
 42 |   consumed ((quoted_prefix <|> unquoted_prefix <?> "prefix") *> skip_whitespace)
 43 | ;;
 44 | 
 45 | let is_address_char = function
 46 |   | '<' | '>' | '@' | ',' -> false
 47 |   | c -> not (is_whitespace_char c)
 48 | ;;
 49 | 
 50 | let is_domain_char = function
 51 |   | '\'' | '"' -> false
 52 |   | c -> is_address_char c
 53 | ;;
 54 | 
 55 | let local_part = consumed (skip_satisfied_or_escaped1 is_address_char) <?> "local_part"
 56 | 
 57 | let maybe_at_domain =
 58 |   option
 59 |     None
 60 |     (let%map (_ : char) = char '@'
 61 |      and domain = take_while1 is_domain_char in
 62 |      Some domain)
 63 |   <?> "domain"
 64 | ;;
 65 | 
 66 | let email_without_prefix_no_quote =
 67 |   let%map local_part
 68 |   and domain = maybe_at_domain in
 69 |   local_part, domain
 70 | ;;
 71 | 
 72 | let email_without_prefix_quote q = char q *> email_without_prefix_no_quote <* char q
 73 | 
 74 | let email_without_prefix_maybe_quote =
 75 |   (* Order matters here.
 76 |      Because [email_without_prefix_no_quote] may contain quotes in the local part
 77 |      we need to match the quoted version first.
 78 |      Otherwise we will 'successfully' match the string and be left with an unconsumed trailing quote.
 79 |   *)
 80 |   email_without_prefix_quote '\''
 81 |   <|> email_without_prefix_quote '"'
 82 |   <|> email_without_prefix_no_quote
 83 | ;;
 84 | 
 85 | let email_without_prefix =
 86 |   let%map local_part, domain = email_without_prefix_maybe_quote in
 87 |   { prefix = None; local_part; domain }
 88 | ;;
 89 | 
 90 | let email_with_prefix =
 91 |   let%map prefix = option "" prefix
 92 |   and (_ : char) = char '<'
 93 |   and local_part, domain = email_without_prefix_maybe_quote
 94 |   and (_ : char) = char '>' in
 95 |   { prefix = Some prefix; local_part; domain }
 96 | ;;
 97 | 
 98 | let email =
 99 |   (* Order matters here.
100 |      `email_without_prefix` can (sometimes) parse the prefix of
101 |      `email_with_prefix`.
102 |      This means we must match `email_with_prefix` first, if this fails we
103 |      fall back to `email_without_prefix`.
104 |      (Otherwise [email_without_prefix] would match and then fail due to unconsumed input)
105 |   *)
106 |   email_with_prefix <|> email_without_prefix <?> "email"
107 | ;;
108 | 
109 | let email_list =
110 |   let delim = skip_whitespace *> char ',' <* skip_whitespace in
111 |   sep_by delim email <?> "email_list"
112 | ;;
113 | 
114 | let parse_only x = skip_whitespace *> x <* skip_whitespace <* end_of_input
115 | let email_only = parse_only email
116 | let email_list_only = parse_only email_list
117 | 


--------------------------------------------------------------------------------
/email_address/src/email_address_parser_stable_v1.mli:
--------------------------------------------------------------------------------
 1 | type t =
 2 |   { prefix : string option
 3 |   ; local_part : string
 4 |   ; domain : string option
 5 |   }
 6 | [@@deriving sexp_of]
 7 | 
 8 | val skip_whitespace : unit Angstrom.t
 9 | val prefix : string Angstrom.t
10 | val email : t Angstrom.t
11 | val email_only : t Angstrom.t
12 | val email_list_only : t list Angstrom.t
13 | 


--------------------------------------------------------------------------------
/email_message.opam:
--------------------------------------------------------------------------------
 1 | opam-version: "2.0"
 2 | maintainer: "Jane Street developers"
 3 | authors: ["Jane Street Group, LLC"]
 4 | homepage: "https://github.com/janestreet/email_message"
 5 | bug-reports: "https://github.com/janestreet/email_message/issues"
 6 | dev-repo: "git+https://github.com/janestreet/email_message.git"
 7 | doc: "https://ocaml.janestreet.com/ocaml-core/latest/doc/email_message/index.html"
 8 | license: "MIT"
 9 | build: [
10 |   ["dune" "build" "-p" name "-j" jobs]
11 | ]
12 | depends: [
13 |   "ocaml" {>= "5.1.0"}
14 |   "async"
15 |   "core"
16 |   "core_kernel"
17 |   "core_unix"
18 |   "ppx_jane"
19 |   "re2"
20 |   "angstrom"    {>= "0.15.0"}
21 |   "base64"      {>= "3.4.0"}
22 |   "cryptokit"   {>= "1.16" & < "1.17"}
23 |   "dune"        {>= "3.17.0"}
24 |   "magic-mime"
25 | ]
26 | available: arch != "arm32" & arch != "x86_32"
27 | synopsis: "E-mail message parser"
28 | description: "
29 | "
30 | 


--------------------------------------------------------------------------------
/kernel/src/bigstring_shared.ml:
--------------------------------------------------------------------------------
  1 | module Stable = struct
  2 |   open! Core.Core_stable
  3 | 
  4 |   module V1 = struct
  5 |     type t = Core.Bigstring.Stable.V1.t_frozen [@@deriving sexp, bin_io, compare, hash]
  6 | 
  7 |     let equal = [%compare.equal: t]
  8 |   end
  9 | end
 10 | 
 11 | type t = Stable.V1.t [@@deriving sexp_of, compare, hash, equal]
 12 | 
 13 | open Core
 14 | open Poly
 15 | open Bigstring
 16 | 
 17 | let to_bigstring t = t
 18 | let of_bigstring t = t
 19 | let to_string_monoid t = String_monoid.of_bigstring t
 20 | let of_string_monoid t = String_monoid.to_bigstring t
 21 | let to_string t = to_string t
 22 | let of_string s = of_string s
 23 | let empty = Bigstring.create 0
 24 | let length t = Bigstring.length t
 25 | 
 26 | let sub ?pos ?len t =
 27 |   let pos, len =
 28 |     match pos, len with
 29 |     | None, None -> 0, length t
 30 |     | None, Some len -> 0, len
 31 |     | Some pos, None -> pos, length t - pos
 32 |     | Some pos, Some len -> pos, len
 33 |   in
 34 |   Bigstring.sub_shared ~pos ~len t
 35 | ;;
 36 | 
 37 | let to_lexbuf t =
 38 |   let offset = ref 0 in
 39 |   let len = length t in
 40 |   Lexing.from_function (fun dst n ->
 41 |     let read = min n (len - !offset) in
 42 |     Bigstring.To_bytes.blit ~src:t ~src_pos:!offset ~len:read ~dst ~dst_pos:0;
 43 |     offset := !offset + read;
 44 |     read)
 45 | ;;
 46 | 
 47 | let foldi t ~init ~f =
 48 |   let len = length t in
 49 |   let rec loop init pos =
 50 |     if pos >= len then init else loop (f pos init t.{pos}) (pos + 1)
 51 |   in
 52 |   loop init 0
 53 | ;;
 54 | 
 55 | (* Copied from String.split_lines. *)
 56 | let iter_lines_rev t ~f =
 57 |   let back_up_at_newline ~t ~pos ~eol =
 58 |     pos := !pos - if !pos > 0 && get t (!pos - 1) = '\r' then 2 else 1;
 59 |     eol := !pos + 1
 60 |   in
 61 |   let n = length t in
 62 |   if n = 0
 63 |   then ()
 64 |   else (
 65 |     (* Invariant: [-1 <= pos < eol]. *)
 66 |     let pos = ref (n - 1) in
 67 |     let eol = ref n in
 68 |     (* We treat the end of the string specially, because if the string ends with a
 69 |        newline, we don't want an extra empty string at the end of the output. *)
 70 |     if get t !pos = '\n' then back_up_at_newline ~t ~pos ~eol;
 71 |     while !pos >= 0 do
 72 |       if get t !pos <> '\n'
 73 |       then decr pos
 74 |       else (
 75 |         (* Becuase [pos < eol], we know that [start <= eol]. *)
 76 |         let start = !pos + 1 in
 77 |         f (sub t ~pos:start ~len:(!eol - start));
 78 |         back_up_at_newline ~t ~pos ~eol)
 79 |     done;
 80 |     f (sub t ~pos:0 ~len:!eol))
 81 | ;;
 82 | 
 83 | let split_lines t =
 84 |   let acc = ref [] in
 85 |   iter_lines_rev t ~f:(fun line -> acc := line :: !acc);
 86 |   !acc
 87 | ;;
 88 | 
 89 | let lines_seq ?include_empty_last_line t =
 90 |   let open Sequence.Generator in
 91 |   let open Sequence.Generator.Let_syntax in
 92 |   let rec traverse ~sol ~pos =
 93 |     let prev_char_is_cr = pos <> 0 && get t (pos - 1) = '\r' in
 94 |     if pos = length t
 95 |     then
 96 |       if (* Safe because [length t > 0] *)
 97 |          Option.is_some include_empty_last_line || not (get t (pos - 1) = '\n')
 98 |       then (
 99 |         let len = pos - sol in
100 |         let%bind () = yield (sub t ~pos:sol ~len) in
101 |         return ())
102 |       else return ()
103 |     else if get t pos <> '\n'
104 |     then traverse ~sol ~pos:(pos + 1)
105 |     else (
106 |       let len = pos - sol - if prev_char_is_cr then 1 else 0 in
107 |       let%bind () = yield (sub t ~pos:sol ~len) in
108 |       let pos' = pos + 1 in
109 |       traverse ~sol:pos' ~pos:pos')
110 |   in
111 |   if length t = 0 then Sequence.empty else Sequence.Generator.run (traverse ~sol:0 ~pos:0)
112 | ;;
113 | 
114 | let iter_lines t ~f = Sequence.iter (lines_seq t) ~f
115 | 
116 | let%expect_test "split_lines and iter_lines" =
117 |   let split_lines t = split_lines (of_string t) |> List.map ~f:to_string in
118 |   let split_lines_via_iter_lines t =
119 |     let acc = ref [] in
120 |     iter_lines (of_string t) ~f:(fun line -> acc := line :: !acc);
121 |     List.rev_map !acc ~f:to_string
122 |   in
123 |   let impls =
124 |     [ "Bigstring.iter_lines_rev", split_lines
125 |     ; "Bigstring.iter_lines", split_lines_via_iter_lines
126 |     ; "String.split_lines", String.split_lines
127 |     ]
128 |   in
129 |   List.iter
130 |     ~f:(fun s ->
131 |       let results = List.map impls ~f:(fun (desc, f) -> desc, f s) in
132 |       let all_equal =
133 |         List.dedup_and_sort results ~compare:(fun (_, r1) (_, r2) ->
134 |           [%compare: string list] r1 r2)
135 |         |> List.length
136 |         |> Int.equal 1
137 |       in
138 |       if not all_equal
139 |       then
140 |         raise_s
141 |           [%message
142 |             "Mismatching implementations"
143 |               ~input:(s : string)
144 |               ~_:(results : (string * string list) list)])
145 |     [ ""
146 |     ; "\n"
147 |     ; "a"
148 |     ; "a\n"
149 |     ; "a\nb"
150 |     ; "a\nb\n"
151 |     ; "a\n\n"
152 |     ; "a\n\nb"
153 |     ; "a\r\n\nb"
154 |     ; "\ra\r\n\nb"
155 |     ; "\ra\r\n\nb\r"
156 |     ; "\ra\r\n\nb\r\n"
157 |     ]
158 | ;;
159 | 
160 | let of_bigbuffer_volatile buffer =
161 |   (* If this isn't done, the buffer might contain extra uninitialized characters *)
162 |   Bigstring.sub_shared
163 |     ~pos:0
164 |     ~len:(Bigbuffer.length buffer)
165 |     (Bigbuffer.volatile_contents buffer)
166 | ;;
167 | 
168 | let substr_index ?(pos = 0) t ~pattern =
169 |   if length pattern = 0
170 |   then Some pos
171 |   else (
172 |     let c = Bigstring.get pattern 0 in
173 |     let last_index = Bigstring.length t - Bigstring.length pattern in
174 |     let rec loop pos =
175 |       if pos > last_index
176 |       then None
177 |       else (
178 |         match Bigstring.find c t ~pos ~len:(last_index - pos + 1) with
179 |         | None -> None
180 |         | Some pos ->
181 |           assert (pos <= last_index);
182 |           let found_ = Bigstring.sub_shared t ~pos ~len:(Bigstring.length pattern) in
183 |           if Bigstring.equal pattern found_ then Some pos else loop (pos + 1))
184 |     in
185 |     loop pos)
186 | ;;
187 | 


--------------------------------------------------------------------------------
/kernel/src/bigstring_shared.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | 
 3 | (** Immutable sequences of bytes which can be windowed efficiently. *)
 4 | type t = private Bigstring.t [@@deriving sexp_of, compare, hash, equal]
 5 | 
 6 | val of_bigstring : Bigstring.t -> t
 7 | val to_bigstring : t -> Bigstring.t
 8 | 
 9 | include Stringable.S with type t := t
10 | include String_monoidable.S with type t := t
11 | 
12 | val to_lexbuf : t -> Lexing.lexbuf
13 | 
14 | (** Empty, immutable Bigstring *)
15 | val empty : t
16 | 
17 | val length : t -> int
18 | val sub : ?pos:int -> ?len:int -> t -> t
19 | val foldi : t -> init:'b -> f:(int -> 'b -> char -> 'b) -> 'b
20 | 
21 | (** [include_empty_last_line] determines whether a string that ends in "\n" has an empty
22 |     string as the last line.
23 | 
24 |     [iter_lines] and [split_lines] do not include an empty last line. *)
25 | val lines_seq : ?include_empty_last_line:unit -> t -> t Sequence.t
26 | 
27 | val iter_lines : t -> f:(t -> unit) -> unit
28 | val split_lines : t -> t list
29 | 
30 | (** Gets a bigstring from a bigbuffer with minimal memory overhead. *)
31 | val of_bigbuffer_volatile : Bigbuffer.t -> t
32 | 
33 | val of_string_monoid : String_monoid.t -> t
34 | val substr_index : ?pos:int -> t -> pattern:t -> int option
35 | 
36 | module Stable : sig
37 |   module V1 : sig
38 |     type nonrec t = t [@@deriving equal]
39 | 
40 |     include Stable_without_comparator with type t := t
41 |   end
42 | end
43 | 


--------------------------------------------------------------------------------
/kernel/src/boundary.ml:
--------------------------------------------------------------------------------
  1 | module Stable = struct
  2 |   open Core.Core_stable
  3 | 
  4 |   module V1 = struct
  5 |     type t = string [@@deriving sexp, bin_io, compare]
  6 |   end
  7 | end
  8 | 
  9 | open Core
 10 | 
 11 | type t = string [@@deriving sexp_of, compare, hash]
 12 | type boundary = t
 13 | 
 14 | let of_string = Fn.id
 15 | let to_string = Fn.id
 16 | 
 17 | module Open = struct
 18 |   let to_string_monoid t = String_monoid.concat_string [ "\n"; "--"; t; "\n" ]
 19 | end
 20 | 
 21 | module Close = struct
 22 |   let to_string_monoid t = String_monoid.concat_string [ "\n"; "--"; t; "--" ]
 23 | end
 24 | 
 25 | module Open_first = struct
 26 |   let to_string_monoid t = String_monoid.concat_string [ "--"; t; "\n" ]
 27 | end
 28 | 
 29 | let split t bstr =
 30 |   let lf = Bigstring_shared.of_string "\n" in
 31 |   let crlf = Bigstring_shared.of_string "\r\n" in
 32 |   let dashdash = Bigstring_shared.of_string "--" in
 33 |   let t = Bigstring_shared.of_string ("--" ^ t) in
 34 |   let match_after ~pos bstr ~pattern =
 35 |     let len = Bigstring_shared.length pattern in
 36 |     Option.some_if
 37 |       (pos >= 0
 38 |        && pos + len <= Bigstring_shared.length bstr
 39 |        && [%compare.equal: Bigstring_shared.t]
 40 |             pattern
 41 |             (Bigstring_shared.sub bstr ~pos ~len))
 42 |       (pos + len)
 43 |   in
 44 |   let match_before ~pos:end_ bstr ~pattern =
 45 |     let start = end_ - Bigstring_shared.length pattern in
 46 |     match_after ~pos:start bstr ~pattern
 47 |     |> Option.map ~f:(fun end_' ->
 48 |       assert (end_ = end_');
 49 |       start)
 50 |   in
 51 |   let match_crlf direction ~pos bstr =
 52 |     if pos = 0 || pos = Bigstring_shared.length bstr
 53 |     then Some pos
 54 |     else (
 55 |       let match_ =
 56 |         match direction with
 57 |         | `After -> fun pattern -> match_after ~pos bstr ~pattern
 58 |         | `Before -> fun pattern -> match_before ~pos bstr ~pattern
 59 |       in
 60 |       Option.first_some (match_ crlf) (match_ lf))
 61 |   in
 62 |   let rec find_boundary pos =
 63 |     match Bigstring_shared.substr_index bstr ~pos ~pattern:t with
 64 |     | None ->
 65 |       (* No more occurrences of [BOUNDARY] so definitely at EOF *)
 66 |       `Eof
 67 |     | Some pos ->
 68 |       let no_prologue = pos = 0 in
 69 |       (* Ensure we are at the start of a line (after [CR]LF, or beginning of bigstring) *)
 70 |       (match match_crlf `Before ~pos bstr with
 71 |        | None ->
 72 |          find_boundary (pos + 1) (* skip a character to avoid getting stuck in a loop *)
 73 |        | Some begin_ ->
 74 |          let pos = pos + Bigstring_shared.length t in
 75 |          let is_terminal, pos =
 76 |            match match_after ~pos bstr ~pattern:dashdash with
 77 |            | Some pos -> true, pos
 78 |            | None -> false, pos
 79 |          in
 80 |          (* Ensure we are at the end of a line (before [CR]LF, or end of bigstring) *)
 81 |          (match match_crlf `After ~pos bstr with
 82 |           | None -> find_boundary pos
 83 |           | Some end_ ->
 84 |             if is_terminal
 85 |             then `Close_boundary (begin_, pos)
 86 |             else if no_prologue
 87 |             then `Open_boundary_first end_
 88 |             else `Open_boundary (begin_, end_)))
 89 |   in
 90 |   let rec loop pos acc has_prologue =
 91 |     let sub ?stop () =
 92 |       let stop = Option.value stop ~default:(Bigstring_shared.length bstr) in
 93 |       let len = stop - pos in
 94 |       if len <= 0 then Bigstring_shared.empty else Bigstring_shared.sub ~pos ~len bstr
 95 |     in
 96 |     match find_boundary pos with
 97 |     | `Open_boundary_first pos -> loop pos acc false
 98 |     | `Open_boundary (stop, pos) ->
 99 |       let chunk = sub ~stop () in
100 |       loop pos (chunk :: acc) has_prologue
101 |     | `Close_boundary (stop, pos) ->
102 |       let chunk = sub ~stop () in
103 |       let epilogue =
104 |         if pos < Bigstring_shared.length bstr
105 |         then Some (Bigstring_shared.sub ~pos bstr)
106 |         else None
107 |       in
108 |       chunk :: acc, epilogue, has_prologue
109 |     | `Eof ->
110 |       let chunk = sub () in
111 |       chunk :: acc, None, has_prologue
112 |   in
113 |   (* RFC 2046: A multipart body may have a prologue and an epilogue *)
114 |   let parts, epilogue, has_prologue = loop 0 [] true in
115 |   match List.rev parts with
116 |   | [] -> Some bstr, [], epilogue
117 |   | prologue :: parts when has_prologue -> Some prologue, parts, epilogue
118 |   | parts -> None, parts, epilogue
119 | ;;
120 | 
121 | module Generator = struct
122 |   type nonrec t = t Sequence.t
123 | 
124 |   let sexp_of_t t =
125 |     [%sexp ((Sequence.take t 5 |> Sequence.to_list) @ [ "..." ] : string list)]
126 |   ;;
127 | 
128 |   (* This boundary pattern ensures that the boundary should not appear in
129 |      - Headers
130 |      - Quoted-printable text
131 |      - Base64 encoded content.
132 | 
133 |      The only posibility is that it might appear in plaintext, but
134 |      that would be incredibly rare.
135 | 
136 |      We avoid conflicts by generating IDs with different numbers as needed.
137 |   *)
138 |   let default =
139 |     Sequence.unfold ~init:0 ~f:(fun num ->
140 |       let str = sprintf "--==::BOUNDARY::%06d::==--" num in
141 |       Some (str, num + 1))
142 |   ;;
143 | 
144 |   let%expect_test _ =
145 |     Sequence.take default 5 |> Sequence.iter ~f:print_endline;
146 |     [%expect
147 |       {|
148 |       --==::BOUNDARY::000000::==--
149 |       --==::BOUNDARY::000001::==--
150 |       --==::BOUNDARY::000002::==--
151 |       --==::BOUNDARY::000003::==--
152 |       --==::BOUNDARY::000004::==--
153 |       |}]
154 |   ;;
155 | 
156 |   (* Increment the last numeric component to avoid number conflicts. *)
157 |   let from_existing_boundary str = Sequence.append (Sequence.singleton str) default
158 | 
159 |   let%expect_test _ =
160 |     Sequence.take (from_existing_boundary "BOUNDARY") 5 |> Sequence.iter ~f:print_endline;
161 |     [%expect
162 |       {|
163 |       BOUNDARY
164 |       --==::BOUNDARY::000000::==--
165 |       --==::BOUNDARY::000001::==--
166 |       --==::BOUNDARY::000002::==--
167 |       --==::BOUNDARY::000003::==--
168 |       |}]
169 |   ;;
170 | 
171 |   let find_nonflicting t parts =
172 |     Sequence.find_exn t ~f:(fun t ->
173 |       List.for_all parts ~f:(fun part ->
174 |         (* This will incorrectly report a conflict if [BOUNDARY] occurs in the text or is
175 |            a prefix/suffix of any existing boundary. This is very unlikely to happen in
176 |            practice, and would just result in an alternative boundary being used. *)
177 |         not (String_monoid.is_substring part ~substring:t)))
178 |   ;;
179 | 
180 |   let%expect_test _ =
181 |     find_nonflicting
182 |       (from_existing_boundary "BOUNDARY")
183 |       [ String_monoid.of_string "foobar" ]
184 |     |> print_endline;
185 |     [%expect {| BOUNDARY |}]
186 |   ;;
187 | 
188 |   let%expect_test _ =
189 |     find_nonflicting
190 |       (from_existing_boundary "BOUNDARY")
191 |       [ String_monoid.of_string "--BOUNDARY--" ]
192 |     |> print_endline;
193 |     [%expect {| --==::BOUNDARY::000000::==-- |}]
194 |   ;;
195 | 
196 |   let%expect_test _ =
197 |     find_nonflicting
198 |       (from_existing_boundary "BOUNDARY")
199 |       [ String_monoid.of_string "...BOUNDARY...--==::BOUNDARY::000000::==--..." ]
200 |     |> print_endline;
201 |     [%expect {| --==::BOUNDARY::000001::==-- |}]
202 |   ;;
203 | end
204 | 
205 | let generate_non_conflicting_boundary ?prologue ~parts ?epilogue t =
206 |   Generator.find_nonflicting
207 |     t
208 |     ((Option.to_list prologue |> List.map ~f:Bigstring_shared.to_string_monoid)
209 |      @ parts
210 |      @ (Option.to_list epilogue |> List.map ~f:Bigstring_shared.to_string_monoid))
211 | ;;
212 | 
213 | let join_without_checking_for_conflicts ?prologue ~parts ?epilogue t =
214 |   if List.is_empty parts
215 |   then (
216 |     match prologue, epilogue with
217 |     | Some prologue, Some epilogue ->
218 |       String_monoid.plus
219 |         (Bigstring_shared.to_string_monoid prologue)
220 |         (Bigstring_shared.to_string_monoid epilogue)
221 |     | Some content, None | None, Some content -> Bigstring_shared.to_string_monoid content
222 |     | None, None -> String_monoid.of_string "\n")
223 |   else (
224 |     (* Different types of boundaries that may appear in a message *)
225 |     let boundary_open_first = t |> Open_first.to_string_monoid in
226 |     let boundary_open = t |> Open.to_string_monoid in
227 |     let boundary_close = t |> Close.to_string_monoid in
228 |     let first_boundary =
229 |       if Option.is_some prologue then boundary_open else boundary_open_first
230 |     in
231 |     let prologue =
232 |       Option.value_map
233 |         prologue
234 |         ~f:Bigstring_shared.to_string_monoid
235 |         ~default:String_monoid.empty
236 |     in
237 |     let inner_boundary = boundary_open in
238 |     let last_boundary = boundary_close in
239 |     let epilogue =
240 |       Option.value_map
241 |         epilogue
242 |         ~f:Bigstring_shared.to_string_monoid
243 |         ~default:String_monoid.empty
244 |     in
245 |     String_monoid.concat
246 |       [ prologue
247 |       ; first_boundary
248 |       ; String_monoid.concat ~sep:inner_boundary parts
249 |       ; last_boundary
250 |       ; epilogue
251 |       ])
252 | ;;
253 | 


--------------------------------------------------------------------------------
/kernel/src/boundary.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | 
 3 | type t [@@deriving sexp_of]
 4 | type boundary = t
 5 | 
 6 | (** Creates a boundary from the value of the "boundary" parameter in a Content-type header
 7 |     (RFC2046, p.19) *)
 8 | 
 9 | include Stringable.S with type t := t
10 | 
11 | module Generator : sig
12 |   type t [@@deriving sexp_of]
13 | 
14 |   (** A boundary generator that should clash rarely. *)
15 |   val default : t
16 | 
17 |   (** A generator using an existing boundary as a template. *)
18 |   val from_existing_boundary : boundary -> t
19 | end
20 | 
21 | (** Use the generator to find a boundary that doesn't conflict. *)
22 | val generate_non_conflicting_boundary
23 |   :  ?prologue:Bigstring_shared.t
24 |   -> parts:String_monoid.t list
25 |   -> ?epilogue:Bigstring_shared.t
26 |   -> Generator.t
27 |   -> t
28 | 
29 | (** Combine parts using the given boundary. This assumes that the boundary doesn't
30 |     conflict. *)
31 | val join_without_checking_for_conflicts
32 |   :  ?prologue:Bigstring_shared.t
33 |   -> parts:String_monoid.t list
34 |   -> ?epilogue:Bigstring_shared.t
35 |   -> t
36 |   -> String_monoid.t
37 | 
38 | (** Splits a multipart body into a list of messages, and, if there are, an optional
39 |     prologue and epilogue. *)
40 | val split
41 |   :  t
42 |   -> Bigstring_shared.t
43 |   -> Bigstring_shared.t option * Bigstring_shared.t list * Bigstring_shared.t option
44 | 
45 | module Stable : sig
46 |   module V1 : sig
47 |     type nonrec t = t [@@deriving sexp, bin_io]
48 |   end
49 | end
50 | 


--------------------------------------------------------------------------------
/kernel/src/dune:
--------------------------------------------------------------------------------
 1 | (library
 2 |  (name email_message_kernel)
 3 |  (public_name email_message.kernel)
 4 |  (libraries angstrom base64 base64.rfc2045 core email_address magic-mime
 5 |    core_kernel.nonempty_list core_kernel.reversed_list)
 6 |  (preprocess
 7 |   (pps ppx_jane)))
 8 | 
 9 | (ocamllex email_lexer media_type_lexer quoted_printable_lexer)
10 | 
11 | (ocamlyacc email_grammar media_type_grammar)
12 | 


--------------------------------------------------------------------------------
/kernel/src/email.ml:
--------------------------------------------------------------------------------
  1 | module Stable_no_v1_bin_io = struct
  2 |   open! Core.Core_stable
  3 | 
  4 |   module V1 = struct
  5 |     type t =
  6 |       { headers : Headers.Stable.V1.t
  7 |       ; raw_content : Email_raw_content.Stable.V1.t
  8 |       }
  9 |     [@@deriving sexp, compare]
 10 |   end
 11 | end
 12 | 
 13 | open Core
 14 | open Or_error.Let_syntax
 15 | 
 16 | module T = struct
 17 |   type t = Stable_no_v1_bin_io.V1.t =
 18 |     { headers : Headers.t
 19 |     ; raw_content : Email_raw_content.t
 20 |     }
 21 |   [@@deriving sexp_of, fields ~getters ~iterators:create, compare, hash]
 22 | end
 23 | 
 24 | include T
 25 | include Comparable.Make_plain (T)
 26 | include Hashable.Make_plain (T)
 27 | 
 28 | (* The default type of a message depends on the type of its parent,
 29 |    so we need to pass it around. *)
 30 | let of_bigstring_shared ?(normalize_headers = `None) bstr =
 31 |   let lexbuf = Bigstring_shared.to_lexbuf bstr in
 32 |   let%map (`Message (headers, content_offset)) =
 33 |     try
 34 |       Ok
 35 |         (Email_grammar.message (Email_lexer.message (Email_lexer_state.create ())) lexbuf)
 36 |     with
 37 |     | _ ->
 38 |       (* Looks like lexer just throws Failure, not Parsing.Parse_error *)
 39 |       let pos = lexbuf.Lexing.lex_curr_p in
 40 |       Or_error.error_string
 41 |         (sprintf
 42 |            "Error parsing email at line %d, column %d"
 43 |            pos.Lexing.pos_lnum
 44 |            (pos.Lexing.pos_cnum - pos.Lexing.pos_bol))
 45 |   in
 46 |   let headers = Headers.of_list ~normalize:normalize_headers headers in
 47 |   let raw_content =
 48 |     match content_offset with
 49 |     | `Truncated -> None
 50 |     | `Bad_headers pos -> Some (Bigstring_shared.sub ~pos bstr)
 51 |     | `Content_offset pos -> Some (Bigstring_shared.sub ~pos bstr)
 52 |   in
 53 |   { headers
 54 |   ; raw_content = Email_raw_content.Expert.of_bigstring_shared_option raw_content
 55 |   }
 56 | ;;
 57 | 
 58 | let of_string ?normalize_headers str =
 59 |   of_bigstring_shared ?normalize_headers (Bigstring_shared.of_string str)
 60 |   |> Or_error.ok_exn
 61 | ;;
 62 | 
 63 | let of_bigstring bstr =
 64 |   of_bigstring_shared (Bigstring_shared.of_bigstring bstr) |> Or_error.ok_exn
 65 | ;;
 66 | 
 67 | let of_bigbuffer buffer = of_bigstring (Bigbuffer.big_contents buffer)
 68 | 
 69 | (* Message bodies are optional. I highly doubt anybody would handle [None] differently
 70 |    from [Some ""], so we don't expose this detail. It allows us to be smarter with
 71 |    [to_string] so we don't add a newline. *)
 72 | let to_string_monoid ?(eol_except_raw_content = `LF) t =
 73 |   let optional_body =
 74 |     match Email_raw_content.Expert.to_bigstring_shared_option t.raw_content with
 75 |     | None -> []
 76 |     | Some raw_content ->
 77 |       [ String_monoid.concat
 78 |           [ String_monoid.of_string (Lf_or_crlf.to_string eol_except_raw_content)
 79 |           ; String_monoid.of_bigstring (Bigstring_shared.to_bigstring raw_content)
 80 |           ]
 81 |       ]
 82 |   in
 83 |   String_monoid.concat
 84 |     (Headers.to_string_monoid ~eol:eol_except_raw_content t.headers :: optional_body)
 85 | ;;
 86 | 
 87 | let to_string ?eol_except_raw_content t =
 88 |   String_monoid.to_string (to_string_monoid ?eol_except_raw_content t)
 89 | ;;
 90 | 
 91 | let to_bigstring ?eol_except_raw_content t =
 92 |   String_monoid.to_bigstring (to_string_monoid ?eol_except_raw_content t)
 93 | ;;
 94 | 
 95 | let to_bigstring_shared ?eol_except_raw_content t =
 96 |   Bigstring_shared.of_string_monoid (to_string_monoid ?eol_except_raw_content t)
 97 | ;;
 98 | 
 99 | let create = Fields.create
100 | let set_headers t headers = { t with headers }
101 | let set_raw_content t raw_content = { t with raw_content }
102 | let modify_headers t ~f = set_headers t (f t.headers)
103 | let modify_raw_content t ~f = set_raw_content t (f (raw_content t))
104 | 
105 | module Stable = struct
106 |   module V1 = struct
107 |     include Stable_no_v1_bin_io.V1
108 | 
109 |     include
110 |       Binable.Of_binable_without_uuid [@alert "-legacy"]
111 |         (Bigstring.Stable.V1)
112 |         (struct
113 |           type nonrec t = t
114 | 
115 |           let to_binable t = to_bigstring t
116 |           let of_binable = of_bigstring
117 |         end)
118 |   end
119 | end
120 | 


--------------------------------------------------------------------------------
/kernel/src/email.mli:
--------------------------------------------------------------------------------
1 | include Email_intf.Email (** @inline *)
2 | 


--------------------------------------------------------------------------------
/kernel/src/email_content.ml:
--------------------------------------------------------------------------------
  1 | open! Core
  2 | 
  3 | module Multipart : sig
  4 |   type t = private
  5 |     { boundary : Boundary.t
  6 |     ; prologue : Bigstring_shared.t option
  7 |     ; epilogue : Bigstring_shared.t option
  8 |     ; parts : Email.t list
  9 |     ; container_headers : Headers.t
 10 |     }
 11 |   [@@deriving sexp_of]
 12 | 
 13 |   val create_unsafe
 14 |     :  boundary:Boundary.t
 15 |     -> ?prologue:Bigstring_shared.t
 16 |     -> ?epilogue:Bigstring_shared.t
 17 |     -> Email.t list
 18 |     -> container_headers:Headers.t
 19 |     -> t
 20 | 
 21 |   val create
 22 |     :  ?boundary:Boundary.t
 23 |     -> ?prologue:Bigstring_shared.t
 24 |     -> ?epilogue:Bigstring_shared.t
 25 |     -> ?container_headers:Headers.t
 26 |     -> Email.t list
 27 |     -> t
 28 | 
 29 |   val set
 30 |     :  t
 31 |     -> ?boundary:Boundary.t
 32 |     -> ?prologue:Bigstring_shared.t option
 33 |     -> ?epilogue:Bigstring_shared.t option
 34 |     -> ?parts:Email.t list
 35 |     -> ?container_headers:Headers.t
 36 |     -> unit
 37 |     -> t
 38 | 
 39 |   include String_monoidable.S with type t := t
 40 | end = struct
 41 |   type t =
 42 |     { boundary : Boundary.t
 43 |     ; prologue : Bigstring_shared.t option
 44 |     ; epilogue : Bigstring_shared.t option
 45 |     ; parts : Email.t list
 46 |     ; container_headers : Headers.t
 47 |     }
 48 |   [@@deriving sexp_of]
 49 | 
 50 |   let create_unsafe ~boundary ?prologue ?epilogue parts ~container_headers =
 51 |     { boundary; prologue; epilogue; parts; container_headers }
 52 |   ;;
 53 | 
 54 |   let create ?boundary ?prologue ?epilogue ?(container_headers = Headers.empty) parts =
 55 |     let boundary =
 56 |       Boundary.generate_non_conflicting_boundary
 57 |         ?prologue
 58 |         ~parts:(List.map parts ~f:Email.to_string_monoid)
 59 |         ?epilogue
 60 |         (Option.value_map
 61 |            boundary
 62 |            ~default:Boundary.Generator.default
 63 |            ~f:Boundary.Generator.from_existing_boundary)
 64 |     in
 65 |     create_unsafe ~boundary ?prologue ?epilogue parts ~container_headers
 66 |   ;;
 67 | 
 68 |   let set
 69 |     t
 70 |     ?(boundary = t.boundary)
 71 |     ?(prologue = t.prologue)
 72 |     ?(epilogue = t.epilogue)
 73 |     ?(parts = t.parts)
 74 |     ?(container_headers = t.container_headers)
 75 |     ()
 76 |     =
 77 |     create ~boundary ?prologue ?epilogue ~container_headers parts
 78 |   ;;
 79 | 
 80 |   let to_string_monoid t =
 81 |     Boundary.join_without_checking_for_conflicts
 82 |       ?prologue:t.prologue
 83 |       ~parts:(List.map t.parts ~f:Email.to_string_monoid)
 84 |       ?epilogue:t.epilogue
 85 |       t.boundary
 86 |   ;;
 87 | end
 88 | 
 89 | type t =
 90 |   | Multipart of Multipart.t
 91 |   | Message of Email.t
 92 |   | Data of Octet_stream.t
 93 | [@@deriving sexp_of]
 94 | 
 95 | let rec multipart_of_bigstring_shared ~boundary ~container_headers bstr =
 96 |   let open Or_error.Let_syntax in
 97 |   let prologue, parts, epilogue = Boundary.split boundary bstr in
 98 |   let%map parts =
 99 |     List.map parts ~f:(fun part ->
100 |       Or_error.tag
101 |         (Or_error.try_with (fun () ->
102 |            Email.of_bigstring (Bigstring_shared.to_bigstring part)))
103 |         ~tag:(sprintf "failed part:\n%s" (Bigstring_shared.to_string part)))
104 |     |> Or_error.all
105 |   in
106 |   Multipart.create_unsafe ~boundary ?prologue ?epilogue ~container_headers parts
107 | 
108 | and content_of_bigstring_shared ~headers ?container_headers bstr =
109 |   let open Or_error.Let_syntax in
110 |   let parent_media_type = Option.bind container_headers ~f:Media_type.from_headers in
111 |   let media_type =
112 |     Option.value
113 |       (Media_type.from_headers headers)
114 |       ~default:(Media_type.default ?parent:parent_media_type ())
115 |   in
116 |   let encoding = Octet_stream.Encoding.of_headers_or_default headers in
117 |   let octet_stream = Octet_stream.of_bigstring_shared ~encoding bstr in
118 |   let decode octet_stream =
119 |     match Octet_stream.decode octet_stream with
120 |     | None ->
121 |       Or_error.error "Unknown message encoding" encoding Octet_stream.Encoding.sexp_of_t
122 |     | Some decoded_bstr -> Ok decoded_bstr
123 |   in
124 |   match Media_type.multipart_boundary media_type with
125 |   | Some boundary ->
126 |     (* According to Wikipedia, the content-transfer-encoding of a multipart
127 |        type must always be "7bit", "8bit" or "binary" to avoid the
128 |        complications that would be posed by multiple levels of decoding. In
129 |        this case this decode call is free. *)
130 |     let%bind decoded_bstr = decode octet_stream in
131 |     let%bind multipart =
132 |       multipart_of_bigstring_shared ~boundary ~container_headers:headers decoded_bstr
133 |     in
134 |     Ok (Multipart multipart)
135 |   | None ->
136 |     if Media_type.is_message_rfc822 media_type
137 |     then (
138 |       let%bind decoded_bstr = decode octet_stream in
139 |       let%bind email =
140 |         Or_error.try_with (fun () ->
141 |           Email.of_bigstring (Bigstring_shared.to_bigstring decoded_bstr))
142 |       in
143 |       Ok (Message email))
144 |     else Ok (Data octet_stream)
145 | 
146 | and parse ?container_headers email =
147 |   content_of_bigstring_shared
148 |     ?container_headers
149 |     ~headers:(Email.headers email)
150 |     (Email.raw_content email |> Email_raw_content.to_bigstring_shared)
151 | ;;
152 | 
153 | let to_string_monoid = function
154 |   | Multipart multipart -> Multipart.to_string_monoid multipart
155 |   | Message message -> Email.to_string_monoid message
156 |   | Data octet_stream ->
157 |     Octet_stream.encoded_contents octet_stream |> Bigstring_shared.to_string_monoid
158 | ;;
159 | 
160 | let to_bigstring_shared t =
161 |   to_string_monoid t |> String_monoid.to_bigstring |> Bigstring_shared.of_bigstring
162 | ;;
163 | 
164 | let to_raw_content t = to_bigstring_shared t |> Email_raw_content.of_bigstring_shared
165 | 
166 | let rec multipart_map_data ~on_unparsable_content mp ~f =
167 |   Multipart.set
168 |     mp
169 |     ~parts:(List.map mp.Multipart.parts ~f:(map_data ~on_unparsable_content ~f))
170 |     ()
171 | 
172 | and content_map_data ~on_unparsable_content t ~f =
173 |   match t with
174 |   | Multipart t -> Multipart (multipart_map_data ~on_unparsable_content t ~f)
175 |   | Message message -> Message (map_data ~on_unparsable_content message ~f)
176 |   | Data data -> Data (f data)
177 | 
178 | and map_data ~on_unparsable_content email ~f =
179 |   match parse email with
180 |   | Ok content ->
181 |     let content = content_map_data content ~on_unparsable_content ~f in
182 |     Email.set_raw_content
183 |       email
184 |       (to_bigstring_shared content |> Email_raw_content.of_bigstring_shared)
185 |   | Error e ->
186 |     (match on_unparsable_content with
187 |      | `Skip -> email
188 |      | `Raise -> raise_s [%message "[map_data] has unparsable content" (e : Error.t)])
189 | ;;
190 | 
191 | let map_data ?(on_unparsable_content = `Skip) email ~f =
192 |   map_data ~on_unparsable_content email ~f
193 | ;;
194 | 
195 | let to_email ~headers t =
196 |   let headers =
197 |     let media_type =
198 |       match t with
199 |       | Multipart mp ->
200 |         (match Media_type.from_headers headers with
201 |          | None -> Some (Media_type.create_multipart "related" ~boundary:mp.boundary)
202 |          | Some media_type ->
203 |            Some (Media_type.set_multipart_boundary media_type mp.boundary))
204 |       | _ -> None
205 |     in
206 |     match media_type with
207 |     | None -> headers
208 |     | Some media_type -> Media_type.set_headers headers media_type
209 |   in
210 |   Email.create
211 |     ~headers
212 |     ~raw_content:(to_bigstring_shared t |> Email_raw_content.of_bigstring_shared)
213 | ;;
214 | 
215 | let set_content email t = to_email ~headers:(Email.headers email) t
216 | 


--------------------------------------------------------------------------------
/kernel/src/email_content.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | 
 3 | (** {v
 4 |  The cost depends on the encoding of the content and the main media type.
 5 | 
 6 |     N = Size of the message
 7 |     H = Size of the headers of the sub-message(s)
 8 | 
 9 |     Format: time complexity, memory complexity
10 | 
11 |     .         | 7bit, 8bit, binary | Base64, Quoted_printable
12 |     -------------------------------------------------------------
13 |     message   |    O(N), O(H)      | O(N), O(N)
14 |     multipart |    O(N), O(H)      | O(N), O(N)
15 |     other     |    O(1), O(1)      | O(N), O(N)
16 | 
17 |     Where other is any other main media type: text, image, application...
18 | 
19 |     Encoding and type can be obtained from the headers, using the modules
20 |     Headers.Content_type and Headers.Content_transfer_encoding, and the corresponding
21 |     default values.
22 |     v} *)
23 | 
24 | module Multipart : sig
25 |   type t = private
26 |     { boundary : Boundary.t
27 |     ; prologue : Bigstring_shared.t option
28 |     ; epilogue : Bigstring_shared.t option
29 |     ; parts : Email.t list
30 |     (** [container_headers] is informational only for use when further processing parts.
31 |         it is ignored by [to_email]. *)
32 |     ; container_headers : Headers.t
33 |     }
34 |   [@@deriving sexp_of]
35 | 
36 |   val create
37 |     :  ?boundary:Boundary.t
38 |     -> ?prologue:Bigstring_shared.t
39 |     -> ?epilogue:Bigstring_shared.t
40 |     -> ?container_headers:Headers.t
41 |     -> Email.t list
42 |     -> t
43 | 
44 |   val set
45 |     :  t
46 |     -> ?boundary:Boundary.t
47 |     -> ?prologue:Bigstring_shared.t option
48 |     -> ?epilogue:Bigstring_shared.t option
49 |     -> ?parts:Email.t list
50 |     -> ?container_headers:Headers.t
51 |     -> unit
52 |     -> t
53 | end
54 | 
55 | type t =
56 |   | Multipart of Multipart.t
57 |   | Message of Email.t
58 |   | Data of Octet_stream.t
59 | [@@deriving sexp_of]
60 | 
61 | (** [parse ?container_headers email] parses the content of [email]. The default content
62 |     type of a multipart body changes based on the container headers. This only comes into
63 |     play if the container had "Content-Type: multipart/digest". *)
64 | val parse : ?container_headers:Headers.t -> Email.t -> t Or_error.t
65 | 
66 | val to_email : headers:Headers.t -> t -> Email.t
67 | val set_content : Email.t -> t -> Email.t
68 | 
69 | (** Allow changing the message content to mask the actual data but retain the structure *)
70 | val map_data
71 |   :  ?on_unparsable_content:[ `Skip | `Raise ] (** default [`Skip] *)
72 |   -> Email.t
73 |   -> f:(Octet_stream.t -> Octet_stream.t)
74 |   -> Email.t
75 | 
76 | val to_raw_content : t -> Email_raw_content.t
77 | val to_bigstring_shared : t -> Bigstring_shared.t
78 | val to_string_monoid : t -> String_monoid.t
79 | 


--------------------------------------------------------------------------------
/kernel/src/email_grammar.mly:
--------------------------------------------------------------------------------
 1 | %{
 2 | open! Core
 3 | 
 4 | %}
 5 | 
 6 | /*(* Standard tokens *)*/
 7 | %token <string> ERROR
 8 | %token EOF
 9 | 
10 | /*(* Headers *)*/
11 | %token <string * string> FIELD
12 | %token HEADER_END
13 | /**/
14 | %token NO_HEADER_END
15 | 
16 | /*(* Body *)*/
17 | /*(* %token <string> OCTET_STREAM *)*/
18 | %token <int> OCTET_STREAM_OFFSET
19 | 
20 | %start message
21 | %type <Email_grammar_types.message> message
22 | 
23 | %%
24 | 
25 | message : part EOF { $1 };
26 | 
27 | part : header HEADER_END OCTET_STREAM_OFFSET
28 |     { `Message ($1, `Content_offset $3) }
29 | /**/
30 | | header NO_HEADER_END OCTET_STREAM_OFFSET
31 |     { `Message ($1, `Bad_headers $3) }
32 | | header
33 |   { `Message ($1, `Truncated) }
34 |   ;
35 | 
36 | header :
37 |   FIELD header { ($1 :: $2) }
38 |   | { [] }
39 | ;
40 | 
41 | %%
42 | 


--------------------------------------------------------------------------------
/kernel/src/email_grammar_types.ml:
--------------------------------------------------------------------------------
 1 | (* Some simple, lightweight types for parser output *)
 2 | type header = (string * string) list
 3 | 
 4 | type content_offset =
 5 |   [ `Content_offset of int
 6 |   | `Bad_headers of int
 7 |   | `Truncated
 8 |   ]
 9 | 
10 | type message = [ `Message of header * content_offset ]
11 | 


--------------------------------------------------------------------------------
/kernel/src/email_grammar_types.mli:
--------------------------------------------------------------------------------
 1 | (** Some simple, lightweight types for parser output *)
 2 | 
 3 | type header = (string * string) list
 4 | 
 5 | type content_offset =
 6 |   [ `Content_offset of int
 7 |   | `Bad_headers of int
 8 |   | `Truncated
 9 |   ]
10 | 
11 | type message = [ `Message of header * content_offset ]
12 | 


--------------------------------------------------------------------------------
/kernel/src/email_intf.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | 
 3 | module type Email = sig
 4 |   (** An [Email.t] is a list of headers along with unparsed content. [Email_content.parse]
 5 |       can be used to work with the structured content of an email. *)
 6 | 
 7 |   type t [@@deriving compare, hash, sexp_of]
 8 | 
 9 |   val create : headers:Headers.t -> raw_content:Email_raw_content.t -> t
10 |   val headers : t -> Headers.t
11 |   val set_headers : t -> Headers.t -> t
12 |   val modify_headers : t -> f:(Headers.t -> Headers.t) -> t
13 |   val raw_content : t -> Email_raw_content.t
14 |   val set_raw_content : t -> Email_raw_content.t -> t
15 |   val modify_raw_content : t -> f:(Email_raw_content.t -> Email_raw_content.t) -> t
16 | 
17 |   val to_bigstring_shared
18 |     :  ?eol_except_raw_content:Lf_or_crlf.t
19 |     -> t
20 |     -> Bigstring_shared.t
21 | 
22 |   (** String-builder-like module. Small-to-no memory overhead when unparsed. *)
23 |   val to_string_monoid : ?eol_except_raw_content:Lf_or_crlf.t -> t -> String_monoid.t
24 | 
25 |   (** Convert from string, in compliance with the RFC 2822 standard (standard email
26 |       format, e.g. .eml but not mbox) *)
27 |   val of_string : ?normalize_headers:Headers.Normalize.encode -> string -> t
28 | 
29 |   (** Convert to string, in compliance with the RFC 2822 standard (standard email format,
30 |       e.g. .eml but not mbox) *)
31 |   val to_string : ?eol_except_raw_content:Lf_or_crlf.t -> t -> string
32 | 
33 |   val to_bigstring : ?eol_except_raw_content:Lf_or_crlf.t -> t -> Bigstring.t
34 |   val of_bigstring : Bigstring.t -> t
35 |   val of_bigbuffer : Bigbuffer.t -> t
36 | 
37 |   include Comparable.S_plain with type t := t
38 |   include Hashable.S_plain with type t := t
39 | 
40 |   module Stable : sig
41 |     module V1 : Stable_without_comparator with type t = t
42 |   end
43 | end
44 | 


--------------------------------------------------------------------------------
/kernel/src/email_lexer.mli:
--------------------------------------------------------------------------------
1 | open! Core
2 | 
3 | val message : Email_lexer_state.t -> Lexing.lexbuf -> Email_grammar.token
4 | 


--------------------------------------------------------------------------------
/kernel/src/email_lexer.mll:
--------------------------------------------------------------------------------
  1 | {
  2 | open Email_grammar
  3 | module LS = Email_lexer_state
  4 | 
  5 | let force_token token _lexbuf = token
  6 | 
  7 | }
  8 | (* Rules from RFCs. Some may need to be copied to specific places to parse
  9 |  * their individual parts *)
 10 | 
 11 | (** RFC2234 - Core *)
 12 | let cr = "\013"
 13 | let lf = "\010"
 14 | 
 15 | (* Deliberately chose to include bare CR and LF here, although the RFC suggests
 16 |  * them to be included as part of the text characters.
 17 |  * The rationale being that, when parsing e-mail from a text file, they will
 18 |  * probably mean CRLF.
 19 |  * The issue should not arise in conforming e-mails.
 20 |  *)
 21 | let crlf_conforming = cr lf
 22 | let crlf_non_conforming = crlf_conforming | lf
 23 | let crlf = crlf_non_conforming
 24 | 
 25 | let wsp = [' ' '\t']
 26 | 
 27 | (** RFC2822 3.2.1 -- Primitive tokens *)
 28 | 
 29 | let no_ws_ctl = [ '\001'-'\008' '\011' '\012' '\014'-'\031' '\127']
 30 | 
 31 | (** RFC2822 3.2.3 -- Folding whitespace and comments *)
 32 | (* Obs: If this matches a CRLF, there's forcibly wsp afterwards *)
 33 | let obs_fws = wsp + (crlf wsp +) *
 34 | let fws = ((wsp * crlf) ? wsp + ) | obs_fws
 35 | 
 36 | let obs_char = [^ '\n' '\r' ]
 37 | 
 38 | (* Match lone CRs *)
 39 | (* This matching is not perfect, as in this case:
 40 |   - \r\r\r\r..\r\n
 41 |     It might not consider the last \r as part of a CRLF line ending.
 42 |     However, it fulfils the following properties:
 43 |     * On standards-compliant text, it will never match a CR that is part
 44 |       of a CRLF line-ending; standards compliant meaning that it doesn't
 45 |       have any lone CR pairs.
 46 |     * It works as expected on texts with lone CRs.
 47 | *)
 48 | let obs_cr_text = cr + [ ^ '\r' '\n']
 49 | 
 50 | let no_ws_printable = [ '\033' - '\126' ]
 51 | 
 52 | let utext =
 53 |   no_ws_ctl |
 54 |   no_ws_printable |
 55 |   obs_char |
 56 |   obs_cr_text
 57 | 
 58 | (* Beware of "a\n\t\nb". RFC2822, 3.2.3 says such headers must not be produced,
 59 |    but if they are, we must deal with them. *)
 60 | let unstructured = (fws? utext?)*
 61 | 
 62 | (**********************************************************)
 63 | 
 64 | (* Header fields *)
 65 | let ftext = [ '\033'-'\057' '\059'-'\126' ]
 66 | let field_name = ftext *
 67 | 
 68 | (* MESSAGE PARSING RULES *)
 69 | 
 70 | (* This lexer incorporates explicit state to allow it to process t
 71 |    the different parts of the email. This is because the tokens
 72 |    we want to generate depend on the context we are in.
 73 | 
 74 |    Another posibility is to handle everything in the grammar. However,
 75 |    each of the terminal symbols (characters) being a different
 76 |    state incurs significant overhead (both runtime and developing
 77 |    time).
 78 | 
 79 |    Statefulness is distasteful, so we keep it to a mininum:
 80 |      - Small number of states and transitions.
 81 |      - State is only read and set in one place, in the
 82 |        dispatcher.
 83 | *)
 84 | 
 85 | rule
 86 | (* Dispatcher *)
 87 | message t = parse
 88 |   | ""
 89 |   {
 90 |     match Core.Queue.dequeue t.LS.buf with
 91 |     | Some tok -> force_token tok lexbuf
 92 |     | None ->
 93 |       let result =
 94 |         match t.LS.state with
 95 |         (* Tokenization is vastly different depending on the context *)
 96 |         | `Header -> field lexbuf
 97 |         | `Content -> body_octet_stream lexbuf
 98 |         | `Expected_eof -> expected_eof lexbuf
 99 |       in
100 |       begin
101 |       LS.combine t result;
102 |       message t lexbuf
103 |       end
104 |     }
105 | and
106 | (** Looks for a header name *)
107 | field = parse
108 |   | (field_name as name)
109 |     (wsp * as _wsp)
110 |      ":"
111 |     (unstructured as body)
112 |     crlf ?
113 |     {
114 |       (* This code is repeated to avoid cyclical dependencies. Effort has
115 |        * been made to make it minimal.
116 |        *)
117 |       LS.return [FIELD(name, body)]
118 |     }
119 |   | crlf { LS.return ~new_state:`Content [HEADER_END] }
120 |   | eof  { LS.return_eof }
121 |   | "" { LS.return ~new_state:`Content [NO_HEADER_END] }
122 | 
123 | and
124 | 
125 | (** This rule throws an error if there are any more characters in the file *)
126 | body_octet_stream =
127 |   parse
128 |   | "" | eof
129 |   {
130 |     assert (Lexing.lexeme_start lexbuf = Lexing.lexeme_end lexbuf);
131 |     let pos = Lexing.lexeme_start lexbuf in
132 |     LS.return ~new_state:`Expected_eof [ OCTET_STREAM_OFFSET (pos); EOF ]
133 |   }
134 | and
135 | 
136 | 
137 | (* Supporting functions *)
138 | error =
139 |   parse
140 |     | _ as c { LS.return_error (Core.sprintf "Unexpected character %c" c) }
141 |     | eof    { LS.return_error "Unexpected EOF" }
142 | and
143 | expected_eof =
144 |   parse
145 |   | eof    { LS.return_eof }
146 |   | ""     { error lexbuf }
147 | 
148 | {
149 | 
150 | }
151 | 


--------------------------------------------------------------------------------
/kernel/src/email_lexer_state.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | 
 3 | module State = struct
 4 |   type t =
 5 |     [ `Header (** Initial state, parsing headers *)
 6 |     | `Content (** Parsing the body of the message. The details are in the body state. *)
 7 |     | `Expected_eof (** The message should end here. If it doesn't, it's an error *)
 8 |     ]
 9 | 
10 |   let initial = `Header
11 | end
12 | 
13 | module Content = struct
14 |   type t =
15 |     | Multipart of string list
16 |     | Octet_stream
17 | 
18 |   let default = Octet_stream
19 | end
20 | 
21 | type t =
22 |   { mutable state : State.t
23 |   ; buf : Email_grammar.token Queue.t
24 |   }
25 | 
26 | let create () = { state = State.initial; buf = Queue.create () }
27 | 
28 | module Result = struct
29 |   type t =
30 |     { new_state : State.t option
31 |     ; tokens : Email_grammar.token list
32 |     }
33 | 
34 |   module Std = struct
35 |     let return ?new_state tokens = { new_state; tokens }
36 |     let return_eof = return ~new_state:`Expected_eof [ Email_grammar.EOF ]
37 |     let return_error str = return [ Email_grammar.ERROR str ]
38 |   end
39 | end
40 | 
41 | let combine t result =
42 |   (match result.Result.new_state with
43 |    | Some state -> t.state <- state
44 |    | None -> ());
45 |   List.iter result.Result.tokens ~f:(fun tok -> Queue.enqueue t.buf tok)
46 | ;;
47 | 
48 | include Result.Std
49 | 
50 | module Error = struct
51 |   let unexpected_char c = sprintf "Unexpected char: %c" c
52 |   let unexpected_eof = "Unexpected end of file"
53 | end
54 | 


--------------------------------------------------------------------------------
/kernel/src/email_lexer_state.mli:
--------------------------------------------------------------------------------
 1 | open Core
 2 | 
 3 | module State : sig
 4 |   type t =
 5 |     [ `Header (** Initial state, parsing headers *)
 6 |     | `Content (** Parsing the body of the message. The details are in the body state. *)
 7 |     | `Expected_eof (** The message should end here. If it doesn't, it's an error *)
 8 |     ]
 9 | 
10 |   val initial : [ `Header ]
11 | end
12 | 
13 | module Content : sig
14 |   type t =
15 |     | Multipart of string list
16 |     | Octet_stream
17 | 
18 |   val default : t
19 | end
20 | 
21 | type t =
22 |   { mutable state : State.t
23 |   ; buf : Email_grammar.token Queue.t
24 |   }
25 | 
26 | val create : unit -> t
27 | 
28 | module Result : sig
29 |   type t =
30 |     { new_state : State.t option
31 |     ; tokens : Email_grammar.token list
32 |     }
33 | end
34 | 
35 | val combine : t -> Result.t -> unit
36 | val return : ?new_state:State.t -> Email_grammar.token list -> Result.t
37 | val return_eof : Result.t
38 | val return_error : string -> Result.t
39 | 
40 | module Error : sig
41 |   val unexpected_char : char -> string
42 |   val unexpected_eof : string
43 | end
44 | 


--------------------------------------------------------------------------------
/kernel/src/email_message_kernel.ml:
--------------------------------------------------------------------------------
 1 | include Email (** @inline *)
 2 | 
 3 | module Bigstring_shared = Bigstring_shared
 4 | module Content = Email_content
 5 | module Raw_content = Email_raw_content
 6 | module Simple = Email_simple
 7 | module Mimestring = Mimestring
 8 | module Octet_stream = Octet_stream
 9 | module String_monoid = String_monoid
10 | module Lf_or_crlf = Lf_or_crlf
11 | 
12 | module Headers = struct
13 |   include Headers
14 |   module Encoded_word = Encoded_word
15 | end
16 | 
17 | module Stable = struct
18 |   include Email.Stable
19 |   module Raw_content = Email_raw_content.Stable
20 |   module Simple = Email_simple.Stable
21 |   module Headers = Headers.Stable
22 | end
23 | 
24 | module Private = struct
25 |   module Boundary = Boundary
26 |   module Media_type = Media_type
27 |   module Rfc = Rfc
28 | 
29 |   module Email_intf = struct
30 |     module type Email = Email_intf.Email with type t = Email.t
31 |   end
32 | 
33 |   module Email_simple_intf = struct
34 |     module type Email_simple =
35 |       Email_simple_intf.Email_simple
36 |       with type Mimetype.t = Email_simple.Mimetype.t
37 |        and type Content.t = Email_simple.Content.t
38 | 
39 |     module type Mimetype =
40 |       Email_simple_intf.Mimetype with type t = Email_simple.Mimetype.t
41 | 
42 |     module type Content =
43 |       Email_simple_intf.Content
44 |       with module Mimetype := Email_simple.Mimetype
45 |        and type attachment_name := Email_simple.attachment_name
46 |        and type t = Email_simple.Content.t
47 | 
48 |     module type Expert =
49 |       Email_simple_intf.Expert
50 |       with module Mimetype := Email_simple.Mimetype
51 |        and module Content := Email_simple.Content
52 |        and type attachment_name := Email_simple.attachment_name
53 |        and type t := Email_simple.t
54 | 
55 |     module type Stable =
56 |       Email_simple_intf.Stable
57 |       with type Mimetype.latest := Email_simple.Mimetype.t
58 |        and type Content.latest := Email_simple.Content.t
59 |   end
60 | 
61 |   module String_monoid_intf = struct
62 |     module type Underlying =
63 |       String_monoid_intf.Underlying with type t = String_monoid.Underlying.t
64 | 
65 |     module type String_monoid =
66 |       String_monoid_intf.String_monoid
67 |       with type t = String_monoid.t
68 |        and type Underlying.t = String_monoid.Underlying.t
69 |   end
70 | end
71 | 


--------------------------------------------------------------------------------
/kernel/src/email_raw_content.ml:
--------------------------------------------------------------------------------
 1 | module Stable = struct
 2 |   open! Core.Core_stable
 3 | 
 4 |   module V1 = struct
 5 |     type t = Bigstring_shared.Stable.V1.t option [@@deriving bin_io, sexp, compare, equal]
 6 |   end
 7 | end
 8 | 
 9 | open! Core
10 | 
11 | type t = Bigstring_shared.t option [@@deriving compare, hash, sexp_of, equal]
12 | 
13 | let of_bigstring_shared bstr = Some bstr
14 | let of_string str = of_bigstring_shared (Bigstring_shared.of_string str)
15 | 
16 | let to_bigstring_shared = function
17 |   | None -> Bigstring_shared.empty
18 |   | Some bstr -> bstr
19 | ;;
20 | 
21 | let length t = Bigstring_shared.length (to_bigstring_shared t)
22 | 
23 | module Expert = struct
24 |   let of_bigstring_shared_option = Fn.id
25 |   let to_bigstring_shared_option = Fn.id
26 | end
27 | 


--------------------------------------------------------------------------------
/kernel/src/email_raw_content.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | 
 3 | type t [@@deriving compare, hash, sexp_of, equal]
 4 | 
 5 | val to_bigstring_shared : t -> Bigstring_shared.t
 6 | val of_string : string -> t
 7 | val of_bigstring_shared : Bigstring_shared.t -> t
 8 | val length : t -> int
 9 | 
10 | (** Even though the underlying type includes an option, most users should not have to
11 |     think about the difference between [Some ""] and [None]. You can use the [Expert]
12 |     module to deal with the optionality, if need be. *)
13 | module Expert : sig
14 |   val to_bigstring_shared_option : t -> Bigstring_shared.t option
15 |   val of_bigstring_shared_option : Bigstring_shared.t option -> t
16 | end
17 | 
18 | module Stable : sig
19 |   module V1 : sig
20 |     type nonrec t = t [@@deriving equal]
21 | 
22 |     include Stable_without_comparator with type t := t
23 |   end
24 | end
25 | 


--------------------------------------------------------------------------------
/kernel/src/email_simple.mli:
--------------------------------------------------------------------------------
1 | include Email_simple_intf.Email_simple (** @inline *)
2 | 


--------------------------------------------------------------------------------
/kernel/src/email_simple_intf.ml:
--------------------------------------------------------------------------------
  1 | open! Core
  2 | 
  3 | module type Mimetype = sig
  4 |   type t = private string [@@deriving compare, sexp_of]
  5 | 
  6 |   val text : t
  7 |   val text_utf8 : t
  8 |   val html : t
  9 |   val html_utf8 : t
 10 |   val pdf : t
 11 |   val jpg : t
 12 |   val png : t
 13 |   val csv : t
 14 |   val multipart_mixed : t
 15 |   val multipart_related : t
 16 |   val multipart_alternative : t
 17 |   val of_string : string -> t
 18 |   val equal : t -> t -> bool
 19 |   val arg_type : t Command.Arg_type.t
 20 |   val from_filename : string -> t
 21 |   val from_extension : string -> t
 22 |   val to_extension : t -> string option
 23 |   val guess_encoding : t -> Octet_stream.Encoding.known
 24 | end
 25 | 
 26 | module type Content = sig
 27 |   type attachment_name
 28 | 
 29 |   module Mimetype : T
 30 | 
 31 |   type t = private Email.t [@@deriving sexp_of]
 32 | 
 33 |   val of_email : Email.t -> t
 34 | 
 35 |   val create_custom
 36 |     :  content_type:Mimetype.t
 37 |     -> ?encoding:Octet_stream.Encoding.known
 38 |     -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list
 39 |     -> string
 40 |     -> t
 41 | 
 42 |   val create
 43 |     :  content_type:Mimetype.t
 44 |     -> ?encoding:Octet_stream.Encoding.known
 45 |     -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list
 46 |     -> string
 47 |     -> t
 48 |   [@@deprecated "[since 2019-08] Renamed to [create_custom]"]
 49 | 
 50 |   val html_utf8
 51 |     :  ?encoding:Octet_stream.Encoding.known (** default: `Quoted_printable *)
 52 |     -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list
 53 |     -> string
 54 |     -> t
 55 | 
 56 |   val html
 57 |     :  ?encoding:Octet_stream.Encoding.known (** default: `Quoted_printable *)
 58 |     -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list
 59 |     -> string
 60 |     -> t
 61 |   [@@deprecated "[since 2019-08] Please specify the charset, e.g. [html_utf8]"]
 62 | 
 63 |   val text_utf8
 64 |     :  ?encoding:Octet_stream.Encoding.known (** default: `Quoted_printable *)
 65 |     -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list
 66 |     -> string
 67 |     -> t
 68 | 
 69 |   val text
 70 |     :  ?encoding:Octet_stream.Encoding.known (** default: `Quoted_printable *)
 71 |     -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list
 72 |     -> string
 73 |     -> t
 74 |   [@@deprecated "[since 2019-08] Please specify the charset, e.g. [text_utf8]"]
 75 | 
 76 |   (** Plain text e-mail that also includes an html version so it's displayed monospace in
 77 |       gmail.
 78 | 
 79 |       By default, we add some custom styling to disable the line-wrap formatting rule
 80 |       which gmail uses. To disable this behavior, supply [~force_no_line_wrap:false]. *)
 81 |   val text_monospace_utf8
 82 |     :  ?extra_headers:(Headers.Name.t * Headers.Value.t) list
 83 |     -> ?force_no_line_wrap:bool (** default: true *)
 84 |     -> string
 85 |     -> t
 86 | 
 87 |   val text_monospace
 88 |     :  ?extra_headers:(Headers.Name.t * Headers.Value.t) list
 89 |     -> ?force_no_line_wrap:bool (** default: true *)
 90 |     -> string
 91 |     -> t
 92 |   [@@deprecated "[since 2019-08] Please specify the charset, e.g. [text_monospace_utf8]"]
 93 | 
 94 |   (** Combine 2 or more contents as alternative versions. List should be sorted from worst
 95 |       to best. *)
 96 |   val alternatives : ?extra_headers:(Headers.Name.t * Headers.Value.t) list -> t list -> t
 97 | 
 98 |   (** Combine 2 or more contents that should be bundled together *)
 99 |   val mixed : ?extra_headers:(Headers.Name.t * Headers.Value.t) list -> t list -> t
100 | 
101 |   (** Add related resources (e.g. inline images). You can reference them using
102 |       'cid:$[{attachment_name}]' in the content. To attach files you should use
103 |       [create ~attachments] *)
104 |   val with_related
105 |     :  ?extra_headers:(Headers.Name.t * Headers.Value.t) list
106 |     -> resources:(attachment_name * t) list
107 |     -> t
108 |     -> t
109 | 
110 |   val content_type : t -> Mimetype.t
111 | 
112 |   (** The Content-ID of the content *)
113 |   val related_part_cid : t -> attachment_name option
114 | 
115 |   val all_related_parts : t -> (attachment_name * t) list
116 |   val find_related : t -> attachment_name -> t option
117 | 
118 |   (** [content] and [parts] return [None] if the email doesn't properly parse. They also
119 |       return [None] if the message has content type "message/rfc822" *)
120 |   val content : t -> Octet_stream.t option
121 | 
122 |   val parts : t -> t list option
123 | 
124 |   (** Get the alternative versions available. If the message is not of content type
125 |       "multipart/alternative" then return a singleton list. *)
126 |   val alternative_parts : t -> t list
127 | 
128 |   (** Get the 'inline' parts, This expands "Content-Type: multipart/[{mixed,related}]",
129 |       stripping out any attachment parts. multipart/alternative is not expanded *)
130 |   val inline_parts : t -> t list
131 | 
132 |   val content_disposition : t -> [ `Inline | `Attachment of string ]
133 |   val attachment_name : t -> string option
134 | end
135 | 
136 | module type Expert = sig
137 |   type t
138 |   type attachment_name
139 | 
140 |   module Mimetype : T
141 |   module Content : T
142 | 
143 |   val create_raw
144 |     :  from:string (** defaults to <user@host> *)
145 |     -> to_:string list
146 |     -> ?cc:string list
147 |     -> ?reply_to:string
148 |     -> subject:string
149 |     -> id:string
150 |     -> ?in_reply_to:string
151 |     -> date:string
152 |     -> ?auto_generated:unit
153 |     -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list
154 |     -> ?attachments:(attachment_name * Content.t) list
155 |     -> Content.t
156 |     -> t
157 | 
158 |   val content
159 |     :  normalize_headers:Headers.Normalize.encode
160 |     -> extra_headers:(Headers.Name.t * Headers.Value.t) list
161 |     -> encoding:Octet_stream.Encoding.known
162 |     -> string
163 |     -> t
164 | 
165 |   val multipart
166 |     :  normalize_headers:Headers.Normalize.encode
167 |     -> content_type:Mimetype.t
168 |     -> extra_headers:(Headers.Name.t * Headers.Value.t) list
169 |     -> t list
170 |     -> t
171 | end
172 | 
173 | module type Stable = sig
174 |   module Content : sig
175 |     type latest
176 | 
177 |     module V1 : sig
178 |       type t = latest [@@deriving bin_io, sexp]
179 |     end
180 |   end
181 | 
182 |   module Mimetype : sig
183 |     type latest
184 | 
185 |     module V1 : Stable_without_comparator with type t = latest
186 |   end
187 | end
188 | 
189 | module type Email_simple = sig
190 |   module Mimetype : Mimetype
191 | 
192 |   type attachment_name = string [@@deriving sexp_of]
193 | 
194 |   module Content :
195 |     Content with module Mimetype := Mimetype and type attachment_name := attachment_name
196 | 
197 |   type t = Email.t [@@deriving sexp_of]
198 | 
199 |   val create
200 |     :  from:Email_address.t (** defaults to <user@host> *)
201 |     -> to_:Email_address.t list
202 |     -> ?cc:Email_address.t list
203 |     -> ?reply_to:Email_address.t
204 |     -> subject:string
205 |     -> id:string
206 |     -> ?in_reply_to:string
207 |     -> date_string:string
208 |     -> ?auto_generated:unit
209 |     -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list
210 |     -> ?attachments:(attachment_name * Content.t) list
211 |     -> Content.t
212 |     -> t
213 | 
214 |   val from : t -> Email_address.t option
215 |   val to_ : t -> Email_address.t list option
216 |   val cc : t -> Email_address.t list option
217 |   val subject : t -> string option
218 |   val id : t -> string option
219 | 
220 |   (** [extract_body ?content_type t] returns the body associated with the email part that
221 |       matches the [content_type] mimetype, or none if [t] does not contain a body or part
222 |       of type [content_type]. *)
223 |   val extract_body
224 |     :  ?content_type:Mimetype.t (** default: [Mimetype.text] *)
225 |     -> t
226 |     -> string option
227 | 
228 |   (** [extract_body_ext] and [extract_body_ext'] Attempt to find the message body in the
229 |       preferred format.
230 | 
231 |       [accept] is used to select only content parts in a format that is supported. If a
232 |       multipart component is accepted by [accept], we do NOT recursively extract the body.
233 |       We do so otherwise.
234 | 
235 |       [order] will be used to select the least part (by default the first part) *)
236 |   val extract_body_ext'
237 |     :  accept:((Mimetype.t * (string * string option) list) option -> 'format option)
238 |     -> t
239 |     -> ('format * string) Sequence.t
240 | 
241 |   val extract_body_ext
242 |     :  accept:((Mimetype.t * (string * string option) list) option -> 'format option)
243 |     -> ?order:('format -> 'format -> int)
244 |     -> t
245 |     -> ('format * string) option
246 | 
247 |   (** Related parts are those that are included in a multi-part message with a
248 |       "Content-ID" header. This content can be referenced by adding the "cid:" prefix and
249 |       stripping the enclosing '<' and '>'.
250 | 
251 |       For example (from https://tools.ietf.org/html/rfc2392):
252 | 
253 |       {v
254 |         From: foo1@bar.net
255 |         To: foo2@bar.net
256 |         Subject: A simple example
257 |         Mime-Version: 1.0
258 |         Content-Type: multipart/related; boundary="boundary-example-1"; type=Text/HTML
259 |         --boundary-example 1
260 |         Content-Type: Text/HTML; charset=US-ASCII
261 | 
262 |         to the other body part, for example through a statement such as:
263 |         <IMG SRC="cid:foo4*foo1@bar.net" ALT="IETF logo">
264 | 
265 |         --boundary-example-1
266 | 
267 |         Content-ID: <foo4*foo1@bar.net>
268 |         Content-Type: IMAGE/GIF
269 |         Content-Transfer-Encoding: BASE64
270 | 
271 |         R0lGODlhGAGgAPEAAP/////ZRaCgoAAAACH+PUNvcHlyaWdodCAoQykgMTk5
272 |         NSBJRVRGLiBVbmF1dGhvcml6ZWQgZHVwbGljYXRpb24gcHJvaGliaXRlZC4A
273 |         etc...
274 | 
275 |         --boundary-example-1--
276 |       v}
277 | 
278 |       Calling [all_related_parts] on this email would return a list of length one where
279 |       the [attachment_name] is "foo4*foo1@bar.net" for the single entry.
280 | 
281 |       Related parts are often used for inline images. *)
282 |   val all_related_parts : t -> (attachment_name * Content.t) list
283 | 
284 |   val find_related : t -> attachment_name -> Content.t option
285 |   val inline_parts : t -> Content.t list
286 | 
287 |   module Expert :
288 |     Expert
289 |     with module Mimetype := Mimetype
290 |      and module Content := Content
291 |      and type t := t
292 |      and type attachment_name := attachment_name
293 | 
294 |   module Stable :
295 |     Stable with type Content.latest := Content.t and type Mimetype.latest := Mimetype.t
296 | end
297 | 


--------------------------------------------------------------------------------
/kernel/src/encoded_word.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Angstrom
  3 | 
  4 | module Charset = struct
  5 |   (* The following might not be an exhaustive list. We can add to this as we encounter
  6 |        more cases. *)
  7 |   type t =
  8 |     [ `Ascii
  9 |     | `Big5
 10 |     | `GB2312
 11 |     | `Latin1
 12 |     | `Latin2
 13 |     | `Utf8
 14 |     | `Windows1252
 15 |     | `KS_C_5601_1987
 16 |     ]
 17 |   [@@deriving sexp_of, enumerate, compare, equal]
 18 | 
 19 |   let to_string = function
 20 |     | `Ascii -> "US-ASCII"
 21 |     | `Big5 -> "BIG5"
 22 |     | `GB2312 -> "GB2312"
 23 |     | `Latin1 -> "ISO-8859-1"
 24 |     | `Latin2 -> "ISO-8859-2"
 25 |     | `Utf8 -> "UTF-8"
 26 |     | `Windows1252 -> "WINDOWS-1252"
 27 |     | `KS_C_5601_1987 -> "KS_C_5601-1987"
 28 |   ;;
 29 | end
 30 | 
 31 | module Let_syntax = struct
 32 |   let bind t ~f = t >>= f
 33 |   let map t ~f = t >>| f
 34 |   let both a b = lift2 Tuple2.create a b
 35 | end
 36 | 
 37 | let ws = take_while1 Char.is_whitespace
 38 | 
 39 | let charset_parser charsets =
 40 |   List.map charsets ~f:(fun charset ->
 41 |     string_ci (Charset.to_string charset) >>| const charset)
 42 |   |> choice
 43 | ;;
 44 | 
 45 | let encoding : [ `Base64 | `Quoted_printable ] Angstrom.t =
 46 |   choice [ string_ci "B" >>| const `Base64; string_ci "Q" >>| const `Quoted_printable ]
 47 | ;;
 48 | 
 49 | let parser_ : charset_parser:'a Angstrom.t -> ('a * string) Angstrom.t =
 50 |   fun ~charset_parser ->
 51 |   let%bind () = string "=?" >>| ignore
 52 |   and charset = charset_parser
 53 |   and () = string "?" >>| ignore
 54 |   and encoding
 55 |   and () = string "?" >>| ignore
 56 |   and data =
 57 |     take_while (function
 58 |       | '?' -> false
 59 |       | c -> (not (Char.is_whitespace c)) && Char.is_print c)
 60 |   and () = string "?=" >>| ignore in
 61 |   let%bind data =
 62 |     match encoding with
 63 |     | `Quoted_printable ->
 64 |       (* RFC2047 deviates slightly from common quoted printable.
 65 |          In particular
 66 |          4.2(2) - Underscore may be used to encode space, and
 67 |          4.2(3)- underscore must be encoded.
 68 |          This substituion handles that decoding step. *)
 69 |       let data = String.substr_replace_all data ~pattern:"_" ~with_:" " in
 70 |       let data_bstr, _ =
 71 |         Quoted_printable_lexer.decode_quoted_printable
 72 |           (String.length data)
 73 |           (Lexing.from_string data)
 74 |       in
 75 |       return (Bigbuffer.contents data_bstr)
 76 |     | `Base64 ->
 77 |       (match Base64.decode data with
 78 |        | Ok data -> return data
 79 |        | Error (`Msg msg) -> fail msg)
 80 |   in
 81 |   return (charset, data)
 82 | ;;
 83 | 
 84 | let parser_many
 85 |   :  charset_parser:'a Angstrom.t
 86 |   -> [ `Encoded of 'a * string | `Plain of string ] list Angstrom.t
 87 |   =
 88 |   fun ~charset_parser ->
 89 |   many
 90 |     (choice
 91 |        [ (let%map hd = parser_ ~charset_parser
 92 |           and tl =
 93 |             (* RFC2047 6.2 When displaying a particular header field that contains
 94 |                multiple 'encoded-word's, any 'linear-white-space' that separates a
 95 |                pair of adjacent 'encoded-word's is ignored. *)
 96 |             many
 97 |               (let%map (_ : string) = option "" ws
 98 |                and res = parser_ ~charset_parser in
 99 |                `Encoded res)
100 |           in
101 |           `Encoded hd :: tl)
102 |        ; (let%map c =
103 |             choice
104 |               [ take_while1 (function
105 |                   | '=' -> false
106 |                   | c -> not (Char.is_whitespace c))
107 |               ; string "="
108 |                 (* Collapse Line breaks as per
109 |                  RFC822 - 3.1.1 Unfolding is accomplished by regarding CRLF immediately
110 |                  followed by an LWSP-char as equivalent to the LWSP-char.
111 |                  RFC822 - 3.1.3 Rules of (un)folding apply to these (unstructured) fields *)
112 |               ; (let%bind (_ : string) = choice [ string "\r\n"; string "\n" ] in
113 |                  ws)
114 |                 (* The RFC is ambiguous on what should happen if there is a lone CRLF, so we
115 |                  ignore those, and treat these as regular white space. The RFC is also
116 |                  ambiguous on how to treat multiple consecutive whitespaces, so we do the
117 |                  conservative thing and leave them exactly as is. *)
118 |               ; ws
119 |               ]
120 |           in
121 |           [ `Plain c ])
122 |        ])
123 |   >>| List.concat
124 | ;;
125 | 
126 | let decode_with_charset ?(charsets = Charset.all) str =
127 |   Angstrom.parse_string
128 |     ~consume:Prefix
129 |     (parser_many ~charset_parser:(charset_parser charsets))
130 |     str
131 |   |> Result.map_error ~f:Error.of_string
132 | ;;
133 | 
134 | let decode ?(charsets = Charset.all) str =
135 |   let%map.Or_error chunks = decode_with_charset ~charsets str in
136 |   List.map chunks ~f:(function
137 |     | `Plain str -> str
138 |     | `Encoded ((_charset : Charset.t), str) -> str)
139 |   |> String.concat ~sep:""
140 | ;;
141 | 
142 | let decode_with_raw_charset str =
143 |   Angstrom.parse_string
144 |     ~consume:Prefix
145 |     (parser_many
146 |        ~charset_parser:
147 |          (let%map charset =
148 |             take_while (function
149 |               | ' '
150 |               | '('
151 |               | ')'
152 |               | '<'
153 |               | '>'
154 |               | '@'
155 |               | ','
156 |               | ';'
157 |               | ':'
158 |               | '\\'
159 |               | '"'
160 |               | '|'
161 |               | '['
162 |               | ']'
163 |               | '?'
164 |               | '.'
165 |               | '=' ->
166 |                 (* List from https://datatracker.ietf.org/doc/html/rfc2047 +
167 |                    https://www.rfc-editor.org/errata/eid506 *)
168 |                 false
169 |               | c -> Char.is_print c)
170 |           in
171 |           `Charset charset))
172 |     str
173 |   |> Result.map_error ~f:Error.of_string
174 | ;;
175 | 


--------------------------------------------------------------------------------
/kernel/src/encoded_word.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | 
 3 | module Charset : sig
 4 |   type t =
 5 |     [ `Ascii
 6 |     | `Big5
 7 |     | `GB2312
 8 |     | `Latin1
 9 |     | `Latin2
10 |     | `Utf8
11 |     | `Windows1252
12 |     | `KS_C_5601_1987
13 |     ]
14 |   [@@deriving compare, sexp_of]
15 | 
16 |   val all : t list
17 | end
18 | 
19 | (** Like [decode_with_charset], but completely ignores all charset information and simply
20 |     concats all of the bytes together. *)
21 | val decode : ?charsets:Charset.t list -> string -> string Or_error.t
22 | 
23 | (** Decodes words encoded as per: https://tools.ietf.org/html/rfc2047 *)
24 | val decode_with_charset
25 |   :  ?charsets:Charset.t list
26 |   -> string
27 |   -> [ `Plain of string | `Encoded of Charset.t * string ] list Or_error.t
28 | 
29 | (** Like [decode_with_charset], but supports arbitrary charsets, not just the ones in
30 |     [Charset.t]. *)
31 | val decode_with_raw_charset
32 |   :  string
33 |   -> [ `Plain of string | `Encoded of [ `Charset of string ] * string ] list Or_error.t
34 | 


--------------------------------------------------------------------------------
/kernel/src/headers.ml:
--------------------------------------------------------------------------------
  1 | module Stable = struct
  2 |   open Core.Core_stable
  3 | 
  4 |   module Name = struct
  5 |     module V1 = struct
  6 |       type t = string [@@deriving bin_io, compare, hash, sexp, equal]
  7 |     end
  8 |   end
  9 | 
 10 |   module Value = struct
 11 |     module V1 = struct
 12 |       type t = string [@@deriving bin_io, compare, hash, sexp]
 13 |     end
 14 |   end
 15 | 
 16 |   module V1 = struct
 17 |     type t = (Name.V1.t * string) list [@@deriving bin_io, compare, hash, sexp, equal]
 18 |   end
 19 | end
 20 | 
 21 | open Core
 22 | 
 23 | module Normalize = struct
 24 |   type encode =
 25 |     [ `None (* Leave whitespace unchanged *)
 26 |     | `Whitespace (* Cleanup leading and trailing whitespace on each line *)
 27 |     ]
 28 |   [@@deriving sexp_of]
 29 | 
 30 |   type decode =
 31 |     [ encode
 32 |     | `Whitespace_and_encoding of
 33 |       [ `Any_charset | `Only of Encoded_word.Charset.t list ]
 34 |       * [ `Pretend_all_charsets_are_same ]
 35 |     ]
 36 |   [@@deriving sexp_of]
 37 | 
 38 |   let default : [> `Whitespace ] = `Whitespace
 39 | end
 40 | 
 41 | module Name : sig
 42 |   type t = string [@@deriving sexp_of, compare, hash, equal]
 43 | 
 44 |   val of_string : string -> t
 45 |   val to_string : t -> string
 46 | 
 47 |   include Comparable.S_plain with type t := t
 48 |   include Hashable.S_plain with type t := t
 49 | 
 50 |   val is : t -> string -> bool
 51 | end = struct
 52 |   include Mimestring.Case_insensitive
 53 | 
 54 |   let to_string str = str
 55 |   let is = equal_string
 56 | end
 57 | 
 58 | module Value : sig
 59 |   type t = string [@@deriving sexp_of, compare, hash]
 60 | 
 61 |   val of_string : ?normalize:Normalize.decode -> string -> t
 62 |   val to_string : ?normalize:Normalize.encode -> t -> string
 63 |   val to_string' : ?normalize:Normalize.decode -> t -> string
 64 | 
 65 |   include Comparable.S_plain with type t := t
 66 |   include Hashable.S_plain with type t := t
 67 | end = struct
 68 |   include String
 69 | 
 70 |   let normalize_string str =
 71 |     (* From the RFC (https://datatracker.ietf.org/doc/html/rfc822#section-3.1.1), newlines
 72 |        followed by whitespace should be replace by just the whitespace character.
 73 |        Stripping the lines isn't quite obeying the standard but matches what we've done
 74 |        historically and some emails probably have excessive whitespace. *)
 75 |     String.split_lines str
 76 |     |> List.map ~f:String.strip
 77 |     |> List.filter ~f:(Fn.non String.is_empty)
 78 |     |> String.concat ~sep:" "
 79 |   ;;
 80 | 
 81 |   let of_string ?(normalize = Normalize.default) str =
 82 |     match normalize with
 83 |     | `None -> str
 84 |     | `Whitespace -> normalize_string str
 85 |     | `Whitespace_and_encoding (charsets, `Pretend_all_charsets_are_same) ->
 86 |       let normalized = normalize_string str in
 87 |       let charsets =
 88 |         match charsets with
 89 |         | `Any_charset -> Encoded_word.Charset.all
 90 |         | `Only charsets -> charsets
 91 |       in
 92 |       (match Encoded_word.decode ~charsets normalized with
 93 |        | Ok str -> str
 94 |        | Error _ -> normalized)
 95 |   ;;
 96 | 
 97 |   let to_string ?(normalize = Normalize.default) str =
 98 |     match normalize with
 99 |     | `None -> str
100 |     | `Whitespace ->
101 |       " "
102 |       ^ (String.split_lines str |> List.map ~f:String.strip |> String.concat ~sep:"\n ")
103 |   ;;
104 | 
105 |   let to_string' ?(normalize = Normalize.default) str =
106 |     match normalize with
107 |     | #Normalize.encode as normalize -> to_string ~normalize str
108 |     | `Whitespace_and_encoding (_charsets, _armour) ->
109 |       to_string ~normalize:`Whitespace str
110 |   ;;
111 | 
112 |   let%expect_test "normalize_string" =
113 |     let values = [ "value"; " value"; " \n value"; "\nvalue" ] in
114 |     List.iter values ~f:(fun value -> normalize_string value |> print_endline);
115 |     [%expect
116 |       {|
117 |       value
118 |       value
119 |       value
120 |       value
121 |       |}]
122 |   ;;
123 | end
124 | 
125 | module Common = struct
126 |   let subject = "Subject"
127 |   let to_ = "To"
128 |   let cc = "Cc"
129 |   let bcc = "Bcc"
130 |   let from = "From"
131 |   let date = "Date"
132 |   let message_id = "Message-ID"
133 |   let list_id = "list-Id"
134 | end
135 | 
136 | type t = (Name.t * string) list [@@deriving sexp_of, compare, hash, equal]
137 | 
138 | let to_string_monoid ?(eol = `LF) t =
139 |   List.map t ~f:(fun (name, value) ->
140 |     String_monoid.concat_string [ (name :> string); ":"; value; Lf_or_crlf.to_string eol ])
141 |   |> String_monoid.concat
142 | ;;
143 | 
144 | let to_string ?eol t = String_monoid.to_string (to_string_monoid ?eol t)
145 | let length = List.length
146 | let empty = []
147 | let append = List.append
148 | 
149 | (* Accessors *)
150 | let last ?normalize t name =
151 |   let name = Name.of_string name in
152 |   List.fold t ~init:None ~f:(fun r (k, v) -> if Name.equal name k then Some v else r)
153 |   |> Option.map ~f:(Value.of_string ?normalize)
154 | ;;
155 | 
156 | let any ?normalize t name =
157 |   let name = Name.of_string name in
158 |   List.Assoc.find t name ~equal:Name.equal |> Option.map ~f:(Value.of_string ?normalize)
159 | ;;
160 | 
161 | let find_all ?normalize t name =
162 |   let name = Name.of_string name in
163 |   List.filter_map t ~f:(fun (name', value) ->
164 |     if Name.equal name name' then Some (Value.of_string ?normalize value) else None)
165 | ;;
166 | 
167 | (* Modify *)
168 | let of_list ~normalize : _ -> t =
169 |   List.map ~f:(fun (name, value) ->
170 |     let name = Name.of_string name in
171 |     let value = Value.to_string ~normalize value in
172 |     name, value)
173 | ;;
174 | 
175 | let to_list ?normalize : t -> _ =
176 |   List.map ~f:(fun (name, value) -> name, Value.of_string ?normalize value)
177 | ;;
178 | 
179 | let add ?normalize t ~name ~value =
180 |   let name = Name.of_string name in
181 |   let value = Value.to_string ?normalize value in
182 |   let rec add acc = function
183 |     | (name', _) :: _ as fields when Name.equal name name' ->
184 |       List.rev acc @ [ name, value ] @ fields
185 |     | field :: fields -> add (field :: acc) fields
186 |     | [] -> (name, value) :: t
187 |   in
188 |   add [] t
189 | ;;
190 | 
191 | let add_if_missing ?normalize t ~name ~value =
192 |   if List.Assoc.mem t ~equal:Name.equal name then t else add ?normalize t ~name ~value
193 | ;;
194 | 
195 | let set ?normalize t ~name ~value =
196 |   let name = Name.of_string name in
197 |   let value = Value.to_string ?normalize value in
198 |   let rec set acc = function
199 |     | (name', _) :: fields when Name.equal name name' ->
200 |       List.rev acc @ [ name, value ] @ fields
201 |     | field :: fields -> set (field :: acc) fields
202 |     | [] -> (name, value) :: t
203 |   in
204 |   set [] t
205 | ;;
206 | 
207 | let add_at_bottom ?normalize t ~name ~value =
208 |   List.rev (add ?normalize (List.rev t) ~name ~value)
209 | ;;
210 | 
211 | let add_at_bottom_if_missing ?normalize t ~name ~value =
212 |   if List.Assoc.mem t ~equal:Name.equal name
213 |   then t
214 |   else add_at_bottom ?normalize t ~name ~value
215 | ;;
216 | 
217 | let set_at_bottom ?normalize t ~name ~value =
218 |   List.rev (set ?normalize (List.rev t) ~name ~value)
219 | ;;
220 | 
221 | let add_all ?normalize t ts : t =
222 |   List.fold
223 |     ~init:t
224 |     ~f:(fun t (name, value) -> add ?normalize t ~name ~value)
225 |     (List.rev ts)
226 | ;;
227 | 
228 | let add_all_at_bottom ?normalize t ts =
229 |   List.fold ~init:t ~f:(fun t (name, value) -> add_at_bottom ?normalize t ~name ~value) ts
230 | ;;
231 | 
232 | let filter ?normalize t ~f =
233 |   List.filter t ~f:(fun (name, value) ->
234 |     f ~name ~value:(Value.of_string ?normalize value))
235 | ;;
236 | 
237 | let map' ?normalize t ~f =
238 |   List.map t ~f:(fun ((name : Name.t), (value_raw : string)) ->
239 |     let value = Value.of_string ?normalize value_raw in
240 |     let name', value' = f ~name ~value in
241 |     let value =
242 |       if String.equal (value :> string) value'
243 |       then value_raw
244 |       else Value.to_string' ?normalize value'
245 |     in
246 |     name', value)
247 | ;;
248 | 
249 | let map ?normalize t ~f = map' ?normalize t ~f:(fun ~name ~value -> name, f ~name ~value)
250 | 
251 | let smash_and_add ?normalize t ~name ~value =
252 |   let values =
253 |     find_all
254 |       ?normalize:((normalize : Normalize.encode option) :> Normalize.decode option)
255 |       t
256 |       name
257 |   in
258 |   let t = filter t ~f:(fun ~name:name' ~value:_ -> Name.(name <> name')) in
259 |   let value = String.concat (values @ [ value ]) ~sep:", " in
260 |   add_at_bottom ?normalize t ~name ~value
261 | ;;
262 | 
263 | let names = List.map ~f:fst
264 | 
265 | module%test _ = struct
266 |   let t = of_list ~normalize:`None [ "A", "a1"; "B", "b1"; "B", "b2" ]
267 |   let%test_unit _ = [%test_result: string] (to_string t) ~expect:"A:a1\nB:b1\nB:b2\n"
268 | 
269 |   let%test_unit _ =
270 |     [%test_result: string]
271 |       (add ~normalize:`None t ~name:"B" ~value:"b3" |> to_string)
272 |       ~expect:"A:a1\nB:b3\nB:b1\nB:b2\n"
273 |   ;;
274 | 
275 |   let%test_unit _ =
276 |     [%test_result: string]
277 |       (add ~normalize:`None t ~name:"B" ~value:"b3\nb3" |> to_string)
278 |       ~expect:"A:a1\nB:b3\nb3\nB:b1\nB:b2\n"
279 |   ;;
280 | 
281 |   let%test_unit _ =
282 |     [%test_result: string]
283 |       (add t ~name:"B" ~value:"b3" |> to_string)
284 |       ~expect:"A:a1\nB: b3\nB:b1\nB:b2\n"
285 |   ;;
286 | 
287 |   let%test_unit _ =
288 |     [%test_result: string]
289 |       (add t ~name:"B" ~value:"b3\nb3" |> to_string)
290 |       ~expect:"A:a1\nB: b3\n b3\nB:b1\nB:b2\n"
291 |   ;;
292 | 
293 |   let%test_unit _ =
294 |     [%test_result: string]
295 |       (add ~normalize:`None t ~name:"C" ~value:"c1" |> to_string)
296 |       ~expect:"C:c1\nA:a1\nB:b1\nB:b2\n"
297 |   ;;
298 | 
299 |   let%test_unit _ =
300 |     [%test_result: string]
301 |       (set ~normalize:`None t ~name:"B" ~value:"b3" |> to_string)
302 |       ~expect:"A:a1\nB:b3\nB:b2\n"
303 |   ;;
304 | 
305 |   let%test_unit _ =
306 |     [%test_result: string]
307 |       (set ~normalize:`None t ~name:"b" ~value:"b3" |> to_string)
308 |       ~expect:"A:a1\nb:b3\nB:b2\n"
309 |   ;;
310 | 
311 |   let%test_unit _ =
312 |     [%test_result: string]
313 |       (set ~normalize:`None t ~name:"C" ~value:"c1" |> to_string)
314 |       ~expect:"C:c1\nA:a1\nB:b1\nB:b2\n"
315 |   ;;
316 | 
317 |   let%test_unit _ =
318 |     [%test_result: string]
319 |       (set ~normalize:`None t ~name:"c" ~value:"c1" |> to_string)
320 |       ~expect:"c:c1\nA:a1\nB:b1\nB:b2\n"
321 |   ;;
322 | 
323 |   let%test_unit _ =
324 |     [%test_result: string]
325 |       (add_at_bottom ~normalize:`None t ~name:"A" ~value:"a2" |> to_string)
326 |       ~expect:"A:a1\nA:a2\nB:b1\nB:b2\n"
327 |   ;;
328 | 
329 |   let%test_unit _ =
330 |     [%test_result: string]
331 |       (add_at_bottom ~normalize:`None t ~name:"B" ~value:"b3" |> to_string)
332 |       ~expect:"A:a1\nB:b1\nB:b2\nB:b3\n"
333 |   ;;
334 | 
335 |   let%test_unit _ =
336 |     [%test_result: string]
337 |       (add_at_bottom ~normalize:`None t ~name:"C" ~value:"c1" |> to_string)
338 |       ~expect:"A:a1\nB:b1\nB:b2\nC:c1\n"
339 |   ;;
340 | 
341 |   let%test_unit _ =
342 |     [%test_result: string]
343 |       (set_at_bottom ~normalize:`None t ~name:"B" ~value:"b3" |> to_string)
344 |       ~expect:"A:a1\nB:b1\nB:b3\n"
345 |   ;;
346 | 
347 |   let%test_unit _ =
348 |     [%test_result: string]
349 |       (set_at_bottom ~normalize:`None t ~name:"C" ~value:"c1" |> to_string)
350 |       ~expect:"A:a1\nB:b1\nB:b2\nC:c1\n"
351 |   ;;
352 | end
353 | 


--------------------------------------------------------------------------------
/kernel/src/headers.mli:
--------------------------------------------------------------------------------
  1 | open! Core
  2 | 
  3 | (** [Normalize] specifies how to handle header values. It is used in two contexts:
  4 | 
  5 |     1) Transport (Normalize.encode): Specify how to turn a string into a header value.
  6 |        [`Whitespace] will add the necessary for transport.
  7 | 
  8 |     2) Processing (Normalize.decode): Specify how to turn a header value into a string.
  9 |        [`Whitespace] will remove all leading and trailing whitespace on each line in order
 10 |        to cleanly process the value. *)
 11 | module Normalize : sig
 12 |   type encode =
 13 |     [ `None (** Leave whitespace unchanged *)
 14 |     | `Whitespace (** Cleanup leading and trailing whitespace on each line *)
 15 |     ]
 16 |   [@@deriving sexp_of]
 17 | 
 18 |   type decode =
 19 |     [ encode
 20 |     | `Whitespace_and_encoding of
 21 |       [ `Any_charset | `Only of Encoded_word.Charset.t list ]
 22 |       * [ `Pretend_all_charsets_are_same ]
 23 |       (** Will attempt to handle decoded words. [`Any_charset] will cause all known
 24 |           charsets to be handled. [`Only charsets] will restrict the parsing to only the
 25 |           given charsets. [`Pretend_all_charsets_are_same] will not actually do any
 26 |           charset conversion and just copies the raw bytes. For ascii-compatible encodings
 27 |           this often works well-enough, though you do end up with garbage for any
 28 |           characters not in the ascii-plane. *)
 29 |     ]
 30 |   [@@deriving sexp_of]
 31 | 
 32 |   val default : [> `Whitespace ]
 33 | end
 34 | 
 35 | module Name : sig
 36 |   (** Case insensitive *)
 37 | 
 38 |   type t = string [@@deriving sexp_of, compare, hash]
 39 | 
 40 |   val of_string : string -> t
 41 |   val to_string : t -> string
 42 | 
 43 |   include Comparable.S_plain with type t := t
 44 |   include Hashable.S_plain with type t := t
 45 | 
 46 |   (** Short hand for [let is a b = equal a (of_string b)] *)
 47 | 
 48 |   val is : t -> string -> bool
 49 | end
 50 | 
 51 | (** This is just a list of commonly used header field names for simple reuse *)
 52 | module Common : sig
 53 |   val subject : string
 54 |   val to_ : string
 55 |   val cc : string
 56 |   val bcc : string
 57 |   val from : string
 58 |   val date : string
 59 |   val message_id : string
 60 |   val list_id : string
 61 | end
 62 | 
 63 | module Value : sig
 64 |   type t = string [@@deriving sexp_of, compare, hash]
 65 | 
 66 |   (** Normalize the whitespace for processing/ if [normalize == `None] this does nothing.
 67 |       if [normalize == `Whitespace] (default), strip leading/trailing whitespace on every
 68 |       line. *)
 69 |   val of_string : ?normalize:Normalize.decode -> string -> t
 70 | 
 71 |   (** Normalize the whitespace for transport (insert the appropriate leading space). if
 72 |       [normalize == `None] this does nothing. if [normalize == `Whitespace] (default),
 73 |       insert a leading space and indent subsequent lines with a tab (remove any other
 74 |       leading/trailing space on every line). *)
 75 |   val to_string : ?normalize:Normalize.encode -> t -> string
 76 | 
 77 |   include Comparable.S_plain with type t := t
 78 |   include Hashable.S_plain with type t := t
 79 | end
 80 | 
 81 | (*_ The add and set functions are same as in Field_list, except they add a space
 82 |   before the value. *)
 83 | 
 84 | type t [@@deriving compare, hash, sexp_of, equal]
 85 | 
 86 | (** [eol] defaults to `LF *)
 87 | val to_string_monoid : ?eol:Lf_or_crlf.t -> t -> String_monoid.t
 88 | 
 89 | val to_string : ?eol:Lf_or_crlf.t -> t -> string
 90 | val empty : t
 91 | val append : t -> t -> t
 92 | val length : t -> int
 93 | val of_list : normalize:Normalize.encode -> (Name.t * Value.t) list -> t
 94 | val to_list : ?normalize:Normalize.decode -> t -> (Name.t * Value.t) list
 95 | val last : ?normalize:Normalize.decode -> t -> Name.t -> Value.t option
 96 | val any : ?normalize:Normalize.decode -> t -> Name.t -> Value.t option
 97 | val find_all : ?normalize:Normalize.decode -> t -> Name.t -> Value.t list
 98 | val names : t -> Name.t list
 99 | val add : ?normalize:Normalize.encode -> t -> name:Name.t -> value:Value.t -> t
100 | val add_at_bottom : ?normalize:Normalize.encode -> t -> name:Name.t -> value:Value.t -> t
101 | val add_if_missing : ?normalize:Normalize.encode -> t -> name:Name.t -> value:Value.t -> t
102 | 
103 | val add_at_bottom_if_missing
104 |   :  ?normalize:Normalize.encode
105 |   -> t
106 |   -> name:Name.t
107 |   -> value:Value.t
108 |   -> t
109 | 
110 | val set : ?normalize:Normalize.encode -> t -> name:Name.t -> value:Value.t -> t
111 | val set_at_bottom : ?normalize:Normalize.encode -> t -> name:Name.t -> value:Value.t -> t
112 | val add_all : ?normalize:Normalize.encode -> t -> (Name.t * Value.t) list -> t
113 | val add_all_at_bottom : ?normalize:Normalize.encode -> t -> (Name.t * Value.t) list -> t
114 | 
115 | (** If headers with this name already exist, concatenates the values for all separated by
116 |     a comma, and appends the new value. Otherwise, creates a new header. *)
117 | val smash_and_add : ?normalize:Normalize.encode -> t -> name:Name.t -> value:Value.t -> t
118 | 
119 | val filter
120 |   :  ?normalize:Normalize.decode
121 |   -> t
122 |   -> f:(name:Name.t -> value:Value.t -> bool)
123 |   -> t
124 | 
125 | (** rewrite header values, preserving original whitespace where possible.
126 | 
127 |     [normalize] is used to [Value.of_string ?normalize] the [~value] before passing to
128 |     [f], and again to [Value.to_string ?normalize] the result. If the [~value] and
129 |     [f ~name ~value] are the same no change will be made (white space is preserved).
130 | 
131 |     Particularly the following is an identity transform:
132 |     [ map ~normalize:`Whitespace ~f:(fun ~name:_ ~value -> Value.of_string ~normalize:`Whitespace value) ].
133 |     By contrast the following will 'normalize' the whitespace on all headers.
134 |     [ map ~normalize:`None ~f:(fun ~name:_ ~value -> Value.of_string ~normalize:`Whitespace value) ]. *)
135 | val map
136 |   :  ?normalize:Normalize.decode
137 |   -> t
138 |   -> f:(name:Name.t -> value:Value.t -> Value.t)
139 |   -> t
140 | 
141 | val map'
142 |   :  ?normalize:Normalize.decode
143 |   -> t
144 |   -> f:(name:Name.t -> value:Value.t -> Name.t * Value.t)
145 |   -> t
146 | 
147 | module Stable : sig
148 |   module Name : sig
149 |     module V1 : sig
150 |       type t = Name.t [@@deriving equal]
151 | 
152 |       include Stable_without_comparator with type t := t
153 |     end
154 |   end
155 | 
156 |   module Value : sig
157 |     module V1 : Stable_without_comparator with type t = Value.t
158 |   end
159 | 
160 |   module V1 : sig
161 |     type nonrec t = t [@@deriving equal]
162 | 
163 |     include Stable_without_comparator with type t := t
164 |   end
165 | end
166 | 


--------------------------------------------------------------------------------
/kernel/src/lf_or_crlf.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | 
 3 | type t =
 4 |   [ `LF
 5 |   | `CRLF
 6 |   ]
 7 | 
 8 | let to_string = function
 9 |   | `LF -> "\n"
10 |   | `CRLF -> "\r\n"
11 | ;;
12 | 


--------------------------------------------------------------------------------
/kernel/src/lf_or_crlf.mli:
--------------------------------------------------------------------------------
1 | open! Core
2 | 
3 | type t =
4 |   [ `LF (** \n. Used to delineate new lines on most Unix systems. *)
5 |   | `CRLF (** \r\n. Used to delineate new lines over the network. *)
6 |   ]
7 | 
8 | val to_string : t -> string
9 | 


--------------------------------------------------------------------------------
/kernel/src/magic_mime_external.ml:
--------------------------------------------------------------------------------
1 | module Magic_mime = Magic_mime
2 | module Mime_types = Mime_types
3 | 


--------------------------------------------------------------------------------
/kernel/src/media_type.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | 
  3 | module Params = struct
  4 |   type t = (Headers.Name.t * string) list [@@deriving sexp_of, compare]
  5 | 
  6 |   let last t name =
  7 |     let name = Headers.Name.of_string name in
  8 |     List.fold t ~init:None ~f:(fun r (k, v) ->
  9 |       if Headers.Name.equal name k then Some v else r)
 10 |   ;;
 11 | 
 12 |   let to_key_value_string (k, v) =
 13 |     Headers.Name.to_string k ^ "=" ^ Rfc.RFC2045.Token.is_valid_or_quote v
 14 |   ;;
 15 | end
 16 | 
 17 | type t =
 18 |   { mime_type : Rfc.RFC2045.Token.t
 19 |   ; mime_subtype : Rfc.RFC2045.Token.t
 20 |   ; params : Params.t
 21 |   }
 22 | [@@deriving compare, sexp_of]
 23 | 
 24 | let mime_type t = Rfc.RFC2045.Token.to_lowercase_string t.mime_type
 25 | let mime_subtype t = Rfc.RFC2045.Token.to_lowercase_string t.mime_subtype
 26 | 
 27 | let create mime_type mime_subtype =
 28 |   { mime_type = Rfc.RFC2045.Token.of_string mime_type
 29 |   ; mime_subtype = Rfc.RFC2045.Token.of_string mime_subtype
 30 |   ; params = []
 31 |   }
 32 | ;;
 33 | 
 34 | let set_param t ~name ~value =
 35 |   { t with
 36 |     params =
 37 |       List.filter t.params ~f:(fun (k, _) -> not (Headers.Name.is name k))
 38 |       @ [ name, value ]
 39 |   }
 40 | ;;
 41 | 
 42 | let is ?mime_type ?mime_subtype t =
 43 |   let module T = Rfc.RFC2045.Token in
 44 |   Option.value_map mime_type ~default:true ~f:(fun mime_type ->
 45 |     T.equal t.mime_type (T.of_string mime_type))
 46 |   && Option.value_map mime_subtype ~default:true ~f:(fun mime_subtype ->
 47 |     T.equal t.mime_subtype (T.of_string mime_subtype))
 48 | ;;
 49 | 
 50 | (* Some convenience functions for working with mime types *)
 51 | let is_text t = is ~mime_type:"text" t
 52 | let is_multipart t = is ~mime_type:"multipart" t
 53 | let is_multipart_report = is ~mime_type:"multipart" ~mime_subtype:"report"
 54 | let is_message_rfc822 = is ~mime_type:"message" ~mime_subtype:"rfc822"
 55 | let is_digest t = is ~mime_type:"multipart" ~mime_subtype:"digest" t
 56 | 
 57 | let multipart_boundary t =
 58 |   if is_multipart t
 59 |   then Option.map ~f:Boundary.of_string (Params.last t.params "boundary")
 60 |   else None
 61 | ;;
 62 | 
 63 | let set_multipart_boundary t boundary =
 64 |   set_param t ~name:"boundary" ~value:(Boundary.to_string boundary)
 65 | ;;
 66 | 
 67 | let of_grammar (mime_type, mime_subtype, params) =
 68 |   { mime_type = Rfc.RFC2045.Token.of_string mime_type
 69 |   ; mime_subtype = Rfc.RFC2045.Token.of_string mime_subtype
 70 |   ; params
 71 |   }
 72 | ;;
 73 | 
 74 | let of_string x =
 75 |   of_grammar
 76 |     (Media_type_grammar.content_type Media_type_lexer.content_type (Lexing.from_string x))
 77 | ;;
 78 | 
 79 | let to_string t =
 80 |   String.concat
 81 |     ~sep:"; "
 82 |     ((Rfc.RFC2045.Token.to_lowercase_string t.mime_type
 83 |       ^ "/"
 84 |       ^ Rfc.RFC2045.Token.to_lowercase_string t.mime_subtype)
 85 |      :: List.map t.params ~f:Params.to_key_value_string)
 86 | ;;
 87 | 
 88 | let from_headers headers =
 89 |   Option.bind (Headers.last ~normalize:`None headers "Content-Type") ~f:(fun field ->
 90 |     Option.try_with (fun () -> of_string field))
 91 | ;;
 92 | 
 93 | let set_headers headers t =
 94 |   match from_headers headers with
 95 |   | Some t' when [%compare.equal: t] t t' -> headers
 96 |   | _ -> Headers.set_at_bottom headers ~name:"Content-Type" ~value:(to_string t)
 97 | ;;
 98 | 
 99 | let message_rfc822 = create "message" "rfc822"
100 | 
101 | let text_plain ?(charset = "us-ascii") () =
102 |   create "text" "plain" |> set_param ~name:"charset" ~value:charset
103 | ;;
104 | 
105 | let create_multipart mime_subtype ~boundary =
106 |   set_multipart_boundary (create "multipart" mime_subtype) boundary
107 | ;;
108 | 
109 | let default_default = text_plain ()
110 | let default_digest = message_rfc822
111 | 
112 | let default ?parent () =
113 |   if Option.value_map parent ~f:is_digest ~default:false
114 |   then default_digest
115 |   else default_default
116 | ;;
117 | 
118 | let%expect_test _ =
119 |   let test t =
120 |     let s = to_string t in
121 |     print_endline s;
122 |     let t' = of_string s in
123 |     [%test_eq: t] t t'
124 |   in
125 |   test message_rfc822;
126 |   [%expect {| message/rfc822 |}];
127 |   test (text_plain ());
128 |   [%expect {| text/plain; charset=us-ascii |}];
129 |   test (text_plain () ~charset:"nonsense charset");
130 |   [%expect {| text/plain; charset="nonsense charset" |}];
131 |   test default_default;
132 |   [%expect {| text/plain; charset=us-ascii |}];
133 |   test default_digest;
134 |   [%expect {| message/rfc822 |}];
135 |   test (create "application" "json");
136 |   [%expect {| application/json |}];
137 |   test (create_multipart "related" ~boundary:(Boundary.of_string "--::_BOUNDARY_::--"));
138 |   [%expect {| multipart/related; boundary="--::_BOUNDARY_::--" |}];
139 |   test (create_multipart "related" ~boundary:(Boundary.of_string "boundary"));
140 |   [%expect {| multipart/related; boundary=boundary |}];
141 |   test
142 |     (create_multipart
143 |        "related"
144 |        ~boundary:(Boundary.of_string "questionable boundary string"));
145 |   [%expect {| multipart/related; boundary="questionable boundary string" |}];
146 |   let test s =
147 |     let t = of_string s in
148 |     printf !"%{sexp:t}\n" t;
149 |     test t
150 |   in
151 |   test {|text/plain|};
152 |   [%expect
153 |     {|
154 |     ((mime_type text) (mime_subtype plain) (params ()))
155 |     text/plain
156 |     |}];
157 |   test {|test/plain; charset="us-ascii"|};
158 |   [%expect
159 |     {|
160 |     ((mime_type test) (mime_subtype plain) (params ((charset us-ascii))))
161 |     test/plain; charset=us-ascii
162 |     |}];
163 |   test
164 |     {|multipart/related;
165 |                 boundary="--::FOO"|};
166 |   [%expect
167 |     {|
168 |     ((mime_type multipart) (mime_subtype related) (params ((boundary --::FOO))))
169 |     multipart/related; boundary="--::FOO"
170 |     |}]
171 | ;;
172 | 


--------------------------------------------------------------------------------
/kernel/src/media_type.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | 
 3 | type t [@@deriving sexp_of, compare]
 4 | 
 5 | val multipart_boundary : t -> Boundary.t option
 6 | val set_multipart_boundary : t -> Boundary.t -> t
 7 | val from_headers : Headers.t -> t option
 8 | val set_headers : Headers.t -> t -> Headers.t
 9 | val default : ?parent:t -> unit -> t
10 | 
11 | (** [create type_ subtype] represents the mime-type "type_/subtype". *)
12 | val create : string -> string -> t
13 | 
14 | val mime_type : t -> string
15 | val mime_subtype : t -> string
16 | val create_multipart : string -> boundary:Boundary.t -> t
17 | val message_rfc822 : t
18 | val text_plain : ?charset:string -> unit -> t
19 | val is_text : t -> bool
20 | val is_multipart : t -> bool
21 | val is_multipart_report : t -> bool
22 | val is_message_rfc822 : t -> bool
23 | val is_digest : t -> bool
24 | 


--------------------------------------------------------------------------------
/kernel/src/media_type_grammar.mly:
--------------------------------------------------------------------------------
 1 | %{
 2 | open! Core
 3 | 
 4 | %}
 5 | 
 6 | /*(* Standard tokens *)*/
 7 | %token <string> ERROR
 8 | %token EOF
 9 | 
10 | %token <string> STRING
11 | %token <string> ATOM
12 | 
13 | %token EQUALS
14 | %token SLASH
15 | %token SEMICOLON
16 | 
17 | 
18 | %start content_type
19 | %type <Media_type_grammar_types.content_type> content_type
20 | 
21 | %%
22 | 
23 | content_type : ctype SLASH csubtype param_list { ($1, $3, $4) }
24 | ;
25 | 
26 | ctype : ATOM { $1 };
27 | csubtype : ATOM { $1 };
28 | 
29 | /*(* Some implementations wrongfully add semicolons at the end of the Content-type field.
30 | This rule allows for it.
31 | *)*/
32 | param_list :
33 |   | param_list_aux EOF { $1 }
34 |   | param_list_aux semicolon EOF { $1 }
35 | ;
36 | 
37 | /*(* Be tolerant of repeated semicolons *)*/
38 | semicolon :
39 |   | SEMICOLON { () }
40 |   | semicolon SEMICOLON { () }
41 | ;
42 | 
43 | param_list_aux :
44 |     { [] }
45 |   | param_list_aux semicolon parameter { $3 :: $1 }
46 | ;
47 | 
48 | parameter : attribute EQUALS value { ($1, $3) };
49 | attribute : ATOM { $1 };
50 | value :
51 |     ATOM { $1 }
52 |   | STRING { $1 }
53 | ;
54 | 
55 | %%
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/kernel/src/media_type_grammar_types.ml:
--------------------------------------------------------------------------------
1 | (* Field types *)
2 | type content_type = string * string * (string * string) list
3 | 


--------------------------------------------------------------------------------
/kernel/src/media_type_grammar_types.mli:
--------------------------------------------------------------------------------
1 | (** Field types *)
2 | 
3 | type content_type = string * string * (string * string) list
4 | 


--------------------------------------------------------------------------------
/kernel/src/media_type_lexer.mli:
--------------------------------------------------------------------------------
1 | open! Core
2 | 
3 | val content_type : Lexing.lexbuf -> Media_type_grammar.token
4 | 


--------------------------------------------------------------------------------
/kernel/src/media_type_lexer.mll:
--------------------------------------------------------------------------------
  1 | {
  2 | open Media_type_grammar
  3 | 
  4 | let unescape_staged = Core.String.Escaping.unescape ~escape_char:'\\';;
  5 | let unescape = Core.unstage unescape_staged;;
  6 | 
  7 | }
  8 | (* Rules from RFCs. Some may need to be copied to specific places to parse
  9 |  * their individual parts *)
 10 | 
 11 | (** RFC2234 - Core *)
 12 | let cr = "\013"
 13 | let lf = "\010"
 14 | 
 15 | (* Deliberately chose to include bare CR and LF here, although the RFC suggests
 16 |  * them to be included as part of the text characters.
 17 |  * The rationale being that, when parsing e-mail from a text file, they will
 18 |  * probably mean CRLF.
 19 |  * The issue should not arise in conforming e-mails.
 20 |  *)
 21 | let crlf_conforming = cr lf
 22 | let crlf_non_conforming = crlf_conforming | lf
 23 | let crlf = crlf_non_conforming
 24 | 
 25 | let wsp = [' ' '\t']
 26 | 
 27 | (** RFC2822 3.2.1 -- Primitive tokens *)
 28 | 
 29 | let no_ws_ctl = [ '\001'-'\008' '\011' '\012' '\014'-'\031' '\127']
 30 | 
 31 | (* See crlf *)
 32 | let text_non_conforming = [ '\001'-'\009' '\011' '\012' '\014'-'\127' ]
 33 | 
 34 | (** RFC2822 3.2.2 -- Quoted characters  *)
 35 | let obs_qp = "\\" [ '\000' - '\127' ]
 36 | (* Allows for escaping of newlines *)
 37 | let crlf_qp_non_conforming = "\\" crlf
 38 | 
 39 | let quoted_pair = "\\" text_non_conforming | crlf_qp_non_conforming | obs_qp
 40 | 
 41 | (** RFC2822 3.2.3 -- Folding whitespace and comments *)
 42 | (* Obs: If this matches a CRLF, there's forcibly wsp afterwards *)
 43 | let obs_fws = wsp + (crlf wsp +) *
 44 | let fws = ((wsp * crlf) ? wsp + ) | obs_fws
 45 | 
 46 | (** RFC2822 3.2.5 -- Quoted strings *)
 47 | let qtext = no_ws_ctl | [ '\033' '\035'-'\091' '\093'-'\126' ]
 48 | let qcontent = qtext | quoted_pair
 49 | 
 50 | let quoted_string_contents = (fws ? qcontent) * fws ?
 51 | (*
 52 | let quoted_string = cfws ? '"' quoted_string_contents '"' cfws ?
 53 | (** RFC2822 3.2.6 -- Miscellaneous tokens *)
 54 | let word = atom | quoted_string
 55 | 
 56 | let obs_phrase = word (word | "." | cfws) *
 57 | let phrase = word + | obs_phrase
 58 | *)
 59 | 
 60 | (**********************************************************)
 61 | 
 62 | (* RFC 2045 5.1 -- Syntax of the Content-type header field *)
 63 | (* Removed space characters, control characters, those outsize US-ASCII and
 64 |  * tspecials *)
 65 | let token_char =
 66 |   ([ ^ '\n' '\r'  ' '
 67 |        '(' ')' '<' '>' '@' ',' ';' ':' '\\' '"' '/' '[' ']' '?' '='
 68 |        '\000' - '\031' '\127' ])
 69 | 
 70 | let token = token_char +
 71 | 
 72 | rule
 73 | 
 74 | (******************)
 75 | (* HEADER PARSING *)
 76 | (******************)
 77 | (* These lexers are independent from the former, and work the way traditional
 78 | lexers do, without a dispatcher or state handlers *)
 79 | 
 80 | 
 81 | (** Allows other lexers to skip comments.
 82 | 
 83 |    Example usage:
 84 | rule my_lexer = parse
 85 |   | '('      -> { my_lexer (comment 1 lexbuf) }
 86 |   | .. other patterns ..
 87 | 
 88 | *)
 89 | comment level = parse
 90 |   | '('            { comment (level + 1) lexbuf }
 91 |   | ')' fws *      {
 92 |     if level <= 1 then
 93 |     begin
 94 |       assert (level = 1); lexbuf
 95 |     end
 96 |     else
 97 |       comment (level - 1) lexbuf }
 98 |   | '\\' ? _ { comment level lexbuf }
 99 | and
100 | 
101 | (* Parses a Content-type field *)
102 | content_type = parse
103 |   | '('          { content_type (comment 1 lexbuf) }
104 |   | fws          { content_type lexbuf }
105 |   | '"' (quoted_string_contents as str) '"' { STRING (unescape str) }
106 |   | '/'          { SLASH }
107 |   | token as str { ATOM (str) }
108 |   | '='          { EQUALS }
109 |   | ';'          { SEMICOLON }
110 |   | _ as c       { ERROR (Core.sprintf "Unexpected char: %c" c) }
111 |   | eof          { EOF }
112 | 
113 | 
114 | {
115 | 
116 | }
117 | 


--------------------------------------------------------------------------------
/kernel/src/mimestring.ml:
--------------------------------------------------------------------------------
 1 | open Core.Core_stable
 2 | 
 3 | (** Case-insensitive strings *)
 4 | module Case_insensitive = struct
 5 |   module Stable = struct
 6 |     module V1 = struct
 7 |       type t = String.V1.t [@@deriving sexp, bin_io]
 8 | 
 9 |       let compare = Core.String.Caseless.compare
10 |       let comparator = Core.String.Caseless.comparator
11 | 
12 |       type comparator_witness = Core.String.Caseless.comparator_witness
13 | 
14 |       let hash = Core.String.Caseless.hash
15 |       let hash_fold_t = Core.String.Caseless.hash_fold_t
16 |     end
17 |   end
18 | 
19 |   open Core
20 |   include String.Caseless
21 | 
22 |   let of_string = Fn.id
23 |   let to_string = Fn.id
24 |   let to_lowercase_string = String.lowercase
25 |   let equal_string = equal
26 | end
27 | 
28 | open Core
29 | 
30 | module type S = sig
31 |   type t [@@deriving sexp]
32 | 
33 |   val of_string : string -> t
34 |   val to_lowercase_string : t -> string
35 |   val equal_string : t -> string -> bool
36 | 
37 |   include Comparable.S_plain with type t := t
38 |   include Hashable.S_plain with type t := t
39 | end
40 | 
41 | let quote_escape =
42 |   unstage (String.Escaping.escape ~escapeworthy:[ '"'; '\\' ] ~escape_char:'\\')
43 | ;;
44 | 
45 | let quote str = String.concat [ "\""; quote_escape str; "\"" ]
46 | 


--------------------------------------------------------------------------------
/kernel/src/mimestring.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | 
 3 | (*_ For usage in functors *)
 4 | 
 5 | module type S = sig
 6 |   type t [@@deriving sexp]
 7 | 
 8 |   val of_string : string -> t
 9 |   val to_lowercase_string : t -> string
10 |   val equal_string : t -> string -> bool
11 | 
12 |   include Comparable.S_plain with type t := t
13 |   include Hashable.S_plain with type t := t
14 | end
15 | 
16 | module Case_insensitive : sig
17 |   include
18 |     S
19 |     with type t = string
20 |      and type comparator_witness = String.Caseless.comparator_witness
21 | 
22 |   val to_string : t -> string
23 | 
24 |   module Stable : sig
25 |     module V1 : sig
26 |       type nonrec t = t [@@deriving sexp, bin_io, compare, hash]
27 | 
28 |       include
29 |         Stable
30 |         with type t := t
31 |          and type comparator_witness = String.Caseless.comparator_witness
32 |     end
33 |   end
34 | end
35 | 
36 | val quote : string -> string
37 | 


--------------------------------------------------------------------------------
/kernel/src/octet_stream.ml:
--------------------------------------------------------------------------------
  1 | module Stable = struct
  2 |   open Core.Core_stable
  3 | 
  4 |   module Encoding = struct
  5 |     module V1 = struct
  6 |       type t =
  7 |         [ `Base64
  8 |         | `Bit7
  9 |         | `Bit8
 10 |         | `Binary
 11 |         | `Quoted_printable
 12 |         | `Unknown of string
 13 |         ]
 14 |       [@@deriving sexp, bin_io]
 15 |     end
 16 |   end
 17 | 
 18 |   module V1 = struct
 19 |     type t =
 20 |       { encoding : Encoding.V1.t
 21 |       ; content : Bigstring_shared.Stable.V1.t
 22 |       }
 23 |     [@@deriving sexp, bin_io]
 24 |   end
 25 | end
 26 | 
 27 | open Core
 28 | 
 29 | module Encoding = struct
 30 |   (** Text or binary are the type of the plaintext. For Base64, if the mode is text, '\n'
 31 |       is turned into '\r\n' when encoding, and viceversa. *)
 32 |   type known =
 33 |     [ `Base64
 34 |     | `Bit7
 35 |     | `Bit8
 36 |     | `Binary
 37 |     | `Quoted_printable
 38 |     ]
 39 |   [@@deriving sexp_of, compare, hash]
 40 | 
 41 |   type t =
 42 |     (* Stable.Encoding.V1.t = *)
 43 |     [ known
 44 |     | `Unknown of string
 45 |     ]
 46 |   [@@deriving sexp_of, compare, hash]
 47 | 
 48 |   let of_string encoding =
 49 |     match encoding |> String.strip |> String.lowercase with
 50 |     | "base64" -> `Base64
 51 |     | "7bit" -> `Bit7
 52 |     | "8bit" -> `Bit8
 53 |     | "binary" -> `Binary
 54 |     | "quoted-printable" -> `Quoted_printable
 55 |     | unknown -> `Unknown unknown
 56 |   ;;
 57 | 
 58 |   let to_string = function
 59 |     | `Base64 -> "base64"
 60 |     | `Bit7 -> "7bit"
 61 |     | `Bit8 -> "8bit"
 62 |     | `Binary -> "binary"
 63 |     | `Quoted_printable -> "quoted-printable"
 64 |     | `Unknown unknown -> unknown
 65 |   ;;
 66 | 
 67 |   let default = `Bit7
 68 |   let default' = `Bit7
 69 | 
 70 |   let of_headers ?(ignore_base64_for_multipart = true) headers =
 71 |     Headers.last headers "content-transfer-encoding"
 72 |     |> Option.map ~f:of_string
 73 |     |> function
 74 |     | Some `Base64 when ignore_base64_for_multipart ->
 75 |       let is_multipart =
 76 |         match Media_type.from_headers headers with
 77 |         | Some media_type -> Media_type.is_multipart media_type
 78 |         | None -> false
 79 |       in
 80 |       if is_multipart then Some default else Some `Base64
 81 |     | _ as encoding -> encoding
 82 |   ;;
 83 | 
 84 |   let of_headers_or_default ?ignore_base64_for_multipart headers =
 85 |     match of_headers ?ignore_base64_for_multipart headers with
 86 |     | Some t -> t
 87 |     | None -> default
 88 |   ;;
 89 | end
 90 | 
 91 | type t = Stable.V1.t =
 92 |   { encoding : Encoding.t
 93 |   ; content : Bigstring_shared.t
 94 |   }
 95 | [@@deriving sexp_of, compare, hash]
 96 | 
 97 | let encoding t = t.encoding
 98 | let encoded_contents t = t.content
 99 | let encoded_contents_string t = Bigstring_shared.to_string (encoded_contents t)
100 | let of_bigstring_shared ~encoding content = { encoding; content }
101 | 
102 | let of_string ~encoding str =
103 |   of_bigstring_shared ~encoding (Bigstring_shared.of_string str)
104 | ;;
105 | 
106 | let empty = of_bigstring_shared ~encoding:Encoding.default Bigstring_shared.empty
107 | 
108 | module Identity = struct
109 |   let encode bstr = bstr
110 |   let decode bstr = bstr
111 | end
112 | 
113 | module Base64 = struct
114 |   let buffer_size = 1024
115 |   let empty_bytes = Bytes.create 0
116 | 
117 |   let decode (src : Bigstring_shared.t) =
118 |     let src = (src :> Bigstring.t) in
119 |     let dst = Bigbuffer.create ((Bigstring.length src + 3) / 4 * 3) in
120 |     let buffer = Bytes.create buffer_size in
121 |     let decoder = Base64_rfc2045.decoder `Manual in
122 |     let rec loop ~pos =
123 |       match Base64_rfc2045.decode decoder with
124 |       | `Await ->
125 |         if pos = Bigstring.length src
126 |         then (
127 |           (* Signal end of input. *)
128 |           Base64_rfc2045.src decoder empty_bytes 0 0;
129 |           loop ~pos)
130 |         else (
131 |           let len = Int.min (Bigstring.length src - pos) buffer_size in
132 |           Bigstring.To_bytes.blit ~src ~src_pos:pos ~dst:buffer ~dst_pos:0 ~len;
133 |           Base64_rfc2045.src decoder buffer 0 len;
134 |           loop ~pos:(pos + len))
135 |       | `Wrong_padding ->
136 |         (* Ignore padding issues. *)
137 |         loop ~pos
138 |       | `End -> Bigstring_shared.of_bigbuffer_volatile dst
139 |       | `Flush str ->
140 |         Bigbuffer.add_string dst str;
141 |         loop ~pos
142 |       | `Malformed _unparsed ->
143 |         (* Ignored invalid characters. *)
144 |         loop ~pos
145 |     in
146 |     loop ~pos:0
147 |   ;;
148 | 
149 |   let encoded_length input =
150 |     (* 3 characters becomes 4 *)
151 |     let base64_3_4_expanded_length = (Bigstring.length input + 2) / 3 * 4 in
152 |     (* "\r\n" is added for line breaks *)
153 |     let base64_rfc2045_line_length = 76 in
154 |     let base64_rfc2045_lines =
155 |       (base64_3_4_expanded_length + (base64_rfc2045_line_length - 1))
156 |       / base64_rfc2045_line_length
157 |     in
158 |     base64_rfc2045_lines * (base64_rfc2045_line_length + String.length "\r\n")
159 |   ;;
160 | 
161 |   let encode (src : Bigstring_shared.t) =
162 |     let src = (src :> Bigstring.t) in
163 |     let dst = Bigbuffer.create (encoded_length src) in
164 |     let encoder = Base64_rfc2045.encoder `Manual in
165 |     let buffer = Bytes.create buffer_size in
166 |     Base64_rfc2045.dst encoder buffer 0 buffer_size;
167 |     let rec flush = function
168 |       | `Ok -> ()
169 |       | `Partial ->
170 |         let len = buffer_size - Base64_rfc2045.dst_rem encoder in
171 |         assert (len > 0);
172 |         Bigbuffer.add_subbytes dst buffer ~pos:0 ~len;
173 |         Base64_rfc2045.dst encoder buffer 0 buffer_size;
174 |         flush (Base64_rfc2045.encode encoder `Await)
175 |     in
176 |     let rec loop ~pos =
177 |       if pos >= Bigstring.length src
178 |       then (
179 |         flush (Base64_rfc2045.encode encoder `End);
180 |         Bigstring_shared.of_bigbuffer_volatile dst)
181 |       else (
182 |         flush (Base64_rfc2045.encode encoder (`Char (Bigstring.get src pos)));
183 |         loop ~pos:(pos + 1))
184 |     in
185 |     loop ~pos:0
186 |   ;;
187 | end
188 | 
189 | module Quoted_printable = struct
190 |   let decode bstr =
191 |     (* The RFC2045 says that newlines can be converted to the platforms native
192 |        format, so that's what we'll do. It's the same for both binary data and
193 |        text data. If a CRLF sequence appears in the decoded data, that's because
194 |        it was encoded as =0D=0A, which means the characters shouldn't be
195 |        interpreted as EOL.  *)
196 |     let bigbuffer, _ =
197 |       Quoted_printable_lexer.decode_quoted_printable
198 |         (Bigstring_shared.length bstr)
199 |         (Bigstring_shared.to_lexbuf bstr)
200 |     in
201 |     Bigstring_shared.of_bigbuffer_volatile bigbuffer
202 |   ;;
203 | 
204 |   let encode bstr =
205 |     let bigbuffer =
206 |       Quoted_printable_lexer.encode_quoted_printable
207 |         (Bigstring_shared.length bstr)
208 |         (Bigstring_shared.to_lexbuf bstr)
209 |     in
210 |     Bigstring_shared.of_bigbuffer_volatile bigbuffer
211 |   ;;
212 | end
213 | 
214 | let decode t =
215 |   match t.encoding with
216 |   | `Base64 -> Some (Base64.decode t.content)
217 |   | `Quoted_printable -> Some (Quoted_printable.decode t.content)
218 |   | `Bit7 -> Some (Identity.decode t.content)
219 |   | `Bit8 -> Some (Identity.decode t.content)
220 |   | `Binary -> Some (Identity.decode t.content)
221 |   | `Unknown _ -> None
222 | ;;
223 | 
224 | let encode ~encoding bstr =
225 |   let bstr =
226 |     match encoding with
227 |     | `Base64 -> Base64.encode bstr
228 |     | `Quoted_printable -> Quoted_printable.encode bstr
229 |     | `Bit7 -> Identity.encode bstr
230 |     | `Bit8 -> Identity.encode bstr
231 |     | `Binary -> Identity.encode bstr
232 |   in
233 |   let encoding = (encoding :> Encoding.t) in
234 |   of_bigstring_shared ~encoding bstr
235 | ;;
236 | 


--------------------------------------------------------------------------------
/kernel/src/octet_stream.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | 
 3 | (** RFC 2045 MIME-encoded Bigstrings. *)
 4 | module Encoding : sig
 5 |   (** Text or binary are the type of the plaintext. For Base64, if the mode is text, '\n'
 6 |       is turned into '\r\n' when encoding, and vice versa. *)
 7 |   type known =
 8 |     [ `Base64
 9 |     | `Bit7
10 |     | `Bit8
11 |     | `Binary
12 |     | `Quoted_printable
13 |     ]
14 |   [@@deriving sexp_of, compare, hash]
15 | 
16 |   type t =
17 |     [ known
18 |     | `Unknown of string
19 |     ]
20 |   [@@deriving sexp_of, compare, hash]
21 | 
22 |   (*_ RFC 2045 says 7bit should be assumed if the Content-Transfer-Encoding heading is
23 |     missing. *)
24 | 
25 |   val default : known
26 |   val default' : t
27 | 
28 |   (** Determine an encoding based on email headers. [ignore_base64_for_multipart] is
29 |       useful because some clients can't read RFCs and incorrectly indicate a transfer
30 |       encoding of base64 for multipart messages. *)
31 |   val of_headers_or_default
32 |     :  ?ignore_base64_for_multipart:bool (** default: true *)
33 |     -> Headers.t
34 |     -> t
35 | 
36 |   include Stringable.S with type t := t
37 | end
38 | 
39 | type t [@@deriving sexp_of, compare, hash]
40 | 
41 | val of_string : encoding:Encoding.t -> string -> t
42 | val of_bigstring_shared : encoding:Encoding.t -> Bigstring_shared.t -> t
43 | val empty : t
44 | val encoding : t -> Encoding.t
45 | val encoded_contents : t -> Bigstring_shared.t
46 | val encoded_contents_string : t -> string
47 | 
48 | (** These are the expensive operation. *)
49 | 
50 | val encode : encoding:Encoding.known -> Bigstring_shared.t -> t
51 | 
52 | (** None if encoding is `Unknown. *)
53 | val decode : t -> Bigstring_shared.t option
54 | 
55 | module Stable : sig
56 |   module V1 : sig
57 |     type nonrec t = t [@@deriving sexp, bin_io]
58 |   end
59 | end
60 | 


--------------------------------------------------------------------------------
/kernel/src/quoted_printable_lexer.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | 
 3 | val decode_quoted_printable
 4 |   :  int
 5 |   -> Lexing.lexbuf
 6 |   -> Bigbuffer.t * [ `Ok | `Unexpected_characters ]
 7 | 
 8 | (*_ quoted printable is ALWAYS encoded as text *)
 9 | 
10 | val encode_quoted_printable : int -> Lexing.lexbuf -> Bigbuffer.t
11 | 


--------------------------------------------------------------------------------
/kernel/src/quoted_printable_lexer.mll:
--------------------------------------------------------------------------------
  1 | {
  2 | 
  3 | (* Can't open Core, as the OCamlLex generated code requires Array.create
  4 |  * to be of type (int -> 'a -> 'a t), not (len:int -> 'a -> 'a t) *)
  5 | module C = Core
  6 | 
  7 | module Quoted_printable = struct
  8 | 
  9 |   (* Quoted printable functions *)
 10 |   let decode_hex a b =
 11 |     let decode_digit c = match c with
 12 |       | '0'..'9' -> (Char.code c) - (Char.code '0') + 0
 13 |       | 'A'..'F' -> (Char.code c) - (Char.code 'A') + 10
 14 |       | 'a'..'f' -> (Char.code c) - (Char.code 'a') + 10
 15 |       | _ -> invalid_arg "c"
 16 |     in
 17 |     Char.chr (((decode_digit a) * 16) + (decode_digit b))
 18 |   ;;
 19 | 
 20 |   let hex_to = "0123456789ABCDEF"
 21 | 
 22 |   module Buffer = struct
 23 |     type t =
 24 |       {
 25 |         max_len : int;
 26 |         text : C.Bigbuffer.t;
 27 | 
 28 |         (* Invariant: The length of `word' is always less than `max_len' *)
 29 |         word : C.Bigbuffer.t;
 30 | 
 31 |         (* Invariant: `pos' is always less than `max_len' *)
 32 |         mutable pos  : int;
 33 |       }
 34 |     ;;
 35 | 
 36 |     let create len =
 37 |       {
 38 |         text = C.Bigbuffer.create len;
 39 |         word = C.Bigbuffer.create 16;
 40 |         max_len = 76;
 41 |         pos = 0;
 42 |       }
 43 |     ;;
 44 | 
 45 |     let add_break t =
 46 |       if t.pos > 0 then
 47 |       begin
 48 |         C.Bigbuffer.add_string t.text "=\n";
 49 |         t.pos <- 0
 50 |       end
 51 |     ;;
 52 | 
 53 |     (* Adds the current word to the buffer, wrapping it to the
 54 |        next line if necessary *)
 55 |     let commit_word t =
 56 |       if t.pos + C.Bigbuffer.length t.word >= t.max_len then add_break t;
 57 | 
 58 |       C.Bigbuffer.add_buffer t.text t.word;
 59 |       t.pos <- t.pos + C.Bigbuffer.length t.word;
 60 |       C.Bigbuffer.clear t.word
 61 |       ;
 62 |     ;;
 63 | 
 64 |     let wrap_bigword t len_next =
 65 |       if C.Bigbuffer.length t.word >= t.max_len - len_next then
 66 |       begin
 67 |         add_break t;
 68 |         commit_word t;
 69 |         add_break t;
 70 |       end
 71 |     ;;
 72 | 
 73 |     let add_char t c =
 74 |       wrap_bigword t 1;
 75 |       C.Bigbuffer.add_char t.word c;
 76 |     ;;
 77 | 
 78 | 
 79 |     let add_quoted_char t c =
 80 |       wrap_bigword t 3;
 81 |       let code = Char.code c in
 82 |       let high = (code lsr 4) land (16 - 1) in
 83 |       let low = code land (16 - 1) in
 84 |       C.Bigbuffer.add_char t.word '=';
 85 |       C.Bigbuffer.add_char t.word hex_to.[high];
 86 |       C.Bigbuffer.add_char t.word hex_to.[low]
 87 |     ;;
 88 | 
 89 |     (* When calling this function, one must not immediately
 90 |       call add_new_line.
 91 |     *)
 92 |     let add_wsp t c =
 93 |       add_char t c;
 94 |       commit_word t
 95 |     ;;
 96 | 
 97 |     let add_new_line t c =
 98 |       C.Option.iter c ~f:(fun c -> add_quoted_char t c);
 99 |       commit_word t;
100 |       C.Bigbuffer.add_char t.text '\n';
101 |       t.pos <- 0
102 |     ;;
103 | 
104 |     let to_bigbuffer t =
105 |       commit_word t;
106 |       t.text
107 |     ;;
108 |   end
109 | 
110 |   (* TEST_MODULE = struct
111 |    *   TEST = C.String.length hex_to = 16;;
112 |    * end *)
113 | end
114 | 
115 | 
116 | 
117 | }
118 | (* Rules from RFCs. Some may need to be copied to specific places to parse
119 |  * their individual parts *)
120 | 
121 | (** RFC2234 - Core *)
122 | let cr = "\013"
123 | let lf = "\010"
124 | 
125 | (* Deliberately chose to include bare CR and LF here, although the RFC suggests
126 |  * them to be included as part of the text characters.
127 |  * The rationale being that, when parsing e-mail from a text file, they will
128 |  * probably mean CRLF.
129 |  * The issue should not arise in conforming e-mails.
130 |  *)
131 | let crlf_conforming = cr lf
132 | let crlf_non_conforming = crlf_conforming | lf
133 | let crlf = crlf_non_conforming
134 | 
135 | let wsp = [' ' '\t']
136 | 
137 | (* Quoted-printable *)
138 | let hexdigit_non_strict = ['0'-'9' 'a'-'f' 'A'-'F']
139 | let hexdigit_strict = hexdigit_non_strict # ['a'-'f']
140 | let qp_wsp = [' ' '\t']
141 | 
142 | let qp_allowed = [ '\033'-'\060' '\062'-'\126' ] as c
143 | 
144 | rule
145 | (*  Quoted-printable text *)
146 | decode_quoted_printable buffer dirty = parse
147 |   | wsp * crlf
148 |       {
149 |         C.Bigbuffer.add_char buffer '\n';
150 |         decode_quoted_printable buffer dirty lexbuf
151 |       }
152 |   | '=' crlf { decode_quoted_printable buffer dirty lexbuf }
153 |   | '=' (hexdigit_strict as a) (hexdigit_strict as b)
154 |       {
155 |         C.Bigbuffer.add_char buffer (Quoted_printable.decode_hex a b);
156 |         decode_quoted_printable buffer dirty lexbuf
157 |       }
158 |   | '=' (hexdigit_non_strict as a) (hexdigit_non_strict as b)
159 |       {
160 |         C.Bigbuffer.add_char buffer (Quoted_printable.decode_hex a b);
161 |         decode_quoted_printable buffer `Unexpected_characters lexbuf
162 |       }
163 |   | (qp_allowed | wsp) as c
164 |       {
165 |         C.Bigbuffer.add_char buffer c;
166 |         decode_quoted_printable buffer dirty lexbuf;
167 |       }
168 |   | _ as c
169 |       {
170 |         (* This characters shoudn't appear in a quoted-printable body,
171 |         buy the most robust way to handle them is probably copying
172 |         them verbatim *)
173 |         C.Bigbuffer.add_char buffer c;
174 |         decode_quoted_printable buffer `Unexpected_characters lexbuf
175 |       }
176 |   | eof { dirty }
177 | 
178 | and
179 | 
180 | (* Quoted-printable encoding with wrapping *)
181 | encode_quoted_printable buffer = parse
182 |   | (wsp as c) ? '\n'
183 |     {
184 |       Quoted_printable.Buffer.add_new_line buffer c;
185 |       encode_quoted_printable buffer lexbuf
186 |     }
187 |   | wsp as c
188 |     {
189 |       Quoted_printable.Buffer.add_wsp buffer c;
190 |       encode_quoted_printable buffer lexbuf
191 |     }
192 |   | qp_allowed as c
193 |     {
194 |       Quoted_printable.Buffer.add_char buffer c;
195 |       encode_quoted_printable buffer lexbuf
196 |     }
197 |   | _ as c
198 |     {
199 |       Quoted_printable.Buffer.add_quoted_char buffer c;
200 |       encode_quoted_printable buffer lexbuf
201 |     }
202 |   | eof
203 |     {
204 |       ()
205 |     }
206 | 
207 | {
208 | 
209 | let decode_quoted_printable len lexbuf =
210 |   let length_estimate = len in
211 |   let bigbuffer = C.Bigbuffer.create length_estimate in
212 |   let warnings = decode_quoted_printable bigbuffer `Ok lexbuf in
213 |   (bigbuffer, warnings)
214 | ;;
215 | 
216 | let encode_quoted_printable len lexbuf =
217 |   let length_estimate = len in
218 |   let buffer = Quoted_printable.Buffer.create length_estimate in
219 |   encode_quoted_printable buffer lexbuf
220 |   ;
221 | 
222 |   Quoted_printable.Buffer.to_bigbuffer buffer
223 | ;;
224 | 
225 | }
226 | 


--------------------------------------------------------------------------------
/kernel/src/rfc.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | 
 3 | module RFC2045 = struct
 4 |   module Token = struct
 5 |     include (Mimestring.Case_insensitive : Mimestring.S)
 6 | 
 7 |     let is_valid str =
 8 |       (not (String.is_empty str))
 9 |       && String.for_all str ~f:(function
10 |         | '('
11 |         | ')'
12 |         | '<'
13 |         | '>'
14 |         | '@'
15 |         | ','
16 |         | ';'
17 |         | ':'
18 |         | '\\'
19 |         | '"'
20 |         | '/'
21 |         | '['
22 |         | ']'
23 |         | '?'
24 |         | '=' -> false
25 |         (* '\n', '\r', ' ' are excluded by the following: *)
26 |         | '\033' .. '\126' -> true
27 |         | _ -> false)
28 |     ;;
29 | 
30 |     let is_valid_or_quote str = if is_valid str then str else Mimestring.quote str
31 |   end
32 | end
33 | 


--------------------------------------------------------------------------------
/kernel/src/rfc.mli:
--------------------------------------------------------------------------------
 1 | (** Some functions for processing tokens from the BNF grammars that appear in the RFCs *)
 2 | 
 3 | module RFC2045 : sig
 4 |   module Token : sig
 5 |     include Mimestring.S
 6 | 
 7 |     (** True if the string doesn't need to be quoted *)
 8 |     val is_valid : string -> bool
 9 | 
10 |     (** Quotes a string if necessary *)
11 |     val is_valid_or_quote : string -> string
12 |   end
13 | end
14 | 


--------------------------------------------------------------------------------
/kernel/src/string_monoid.mli:
--------------------------------------------------------------------------------
1 | include String_monoid_intf.String_monoid (** @inline *)
2 | 
3 | module Private : sig
4 |   val output : dst_output:(Underlying.t -> unit) -> t -> unit
5 | end
6 | 


--------------------------------------------------------------------------------
/kernel/src/string_monoid_intf.ml:
--------------------------------------------------------------------------------
 1 | (** Simple library for concatenating immutable strings efficiently *)
 2 | 
 3 | open! Core
 4 | 
 5 | module type Underlying = sig
 6 |   type t =
 7 |     | String of String.t
 8 |     | Bigstring of Bigstring.t
 9 |     | Char of char
10 | end
11 | 
12 | module type String_monoid = sig
13 |   type t
14 | 
15 |   (** Primitive, constant-time operations *)
16 |   val empty : t
17 | 
18 |   val nl : t
19 |   val plus : t -> t -> t
20 |   val length : t -> int
21 | 
22 |   (** Linear in the number of elements. *)
23 |   val concat : ?sep:t -> t list -> t
24 | 
25 |   (** Linear in the number of elements in the list. *)
26 |   val concat_string : ?sep:string -> string list -> t
27 | 
28 |   (*_ t_of_* is O(1), *_of_t is O(N), N being the length *)
29 | 
30 |   include Stringable.S with type t := t
31 | 
32 |   val of_bigstring : Bigstring.t -> t
33 |   val to_bigstring : t -> Bigstring.t
34 |   val of_char : char -> t
35 | 
36 |   (*_
37 |     For the library to fulfill it's purpose of minimal overhead
38 |     string concatenation, the output functions must be tightly
39 |     coupled with the low-level representation.
40 | 
41 |     Any new output channel should be implemented as new methods
42 |     of the library itself.
43 |   *)
44 | 
45 |   val output_bigbuffer : t -> Bigbuffer.t -> unit
46 | 
47 |   module Underlying : Underlying
48 | 
49 |   val fold : t -> init:'accum -> f:('accum -> Underlying.t -> 'accum) -> 'accum
50 |   val iter : t -> f:(Underlying.t -> unit) -> unit
51 |   val is_suffix : t -> suffix:string -> bool
52 |   val is_prefix : t -> prefix:string -> bool
53 |   val is_substring : t -> substring:string -> bool
54 |   val is_string : t -> string:string -> bool
55 | end
56 | 


--------------------------------------------------------------------------------
/kernel/src/string_monoidable.ml:
--------------------------------------------------------------------------------
1 | module type S = sig
2 |   type t
3 | 
4 |   val to_string_monoid : t -> String_monoid.t
5 | end
6 | 


--------------------------------------------------------------------------------
/kernel/src/string_monoidable.mli:
--------------------------------------------------------------------------------
1 | module type S = sig
2 |   type t
3 | 
4 |   val to_string_monoid : t -> String_monoid.t
5 | end
6 | 


--------------------------------------------------------------------------------
/src/crypto.ml:
--------------------------------------------------------------------------------
1 | module Cryptokit = Cryptokit
2 | module CryptokitBignum = CryptokitBignum
3 | 


--------------------------------------------------------------------------------
/src/dune:
--------------------------------------------------------------------------------
 1 | (library
 2 |  (name email_message)
 3 |  (public_name email_message)
 4 |  (libraries angstrom async base64 core_unix.bigstring_unix core core_unix
 5 |    cryptokit email_address email_message_kernel core_kernel.nonempty_list re2
 6 |    core_unix.sys_unix core_unix.time_float_unix core_kernel.uuid
 7 |    core_unix.uuid)
 8 |  (preprocess
 9 |   (pps ppx_jane)))
10 | 


--------------------------------------------------------------------------------
/src/email.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open! Async
 3 | open! Import
 4 | include Email_message_kernel
 5 | 
 6 | let save ?temp_file ?perm ?fsync ?eol_except_raw_content t path =
 7 |   Writer.with_file_atomic ?temp_file ?perm ?fsync path ~f:(fun writer ->
 8 |     String_monoid.iter (to_string_monoid ?eol_except_raw_content t) ~f:(function
 9 |       | String_monoid.Underlying.Char c -> Writer.write_char writer c
10 |       | String str -> Writer.write writer str
11 |       | Bigstring bstr -> Writer.schedule_bigstring writer bstr);
12 |     return ())
13 | ;;
14 | 
15 | module Content = Email_message_kernel.Content
16 | module Raw_content = Email_message_kernel.Raw_content
17 | module Simple = Email_simple
18 | 


--------------------------------------------------------------------------------
/src/email.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open! Async
 3 | open! Import
 4 | 
 5 | include Email_message_kernel.Private.Email_intf.Email (** @inline *)
 6 | 
 7 | (** Efficiently save [t] to disk with little additional allocation.
 8 | 
 9 |     [?temp_file], [?perm], [?fsync] are blindly passed to [Writer.with_file_atomic] *)
10 | val save
11 |   :  ?temp_file:string
12 |   -> ?perm:Unix.file_perm
13 |   -> ?fsync:bool (** default is [false] *)
14 |   -> ?eol_except_raw_content:Lf_or_crlf.t (** default is [`LF] *)
15 |   -> t
16 |   -> string
17 |   -> unit Deferred.t
18 | 
19 | module Content = Email_message_kernel.Content
20 | module Raw_content = Email_message_kernel.Raw_content
21 | module Simple = Email_simple
22 | 


--------------------------------------------------------------------------------
/src/email_date.ml:
--------------------------------------------------------------------------------
  1 | open! Core
  2 | module Time = Time_float_unix
  3 | 
  4 | let utc_offset_string time ~zone =
  5 |   let utc_offset = Time.utc_offset time ~zone in
  6 |   let is_utc = Time.Span.( = ) utc_offset Time.Span.zero in
  7 |   if is_utc
  8 |   then "Z"
  9 |   else
 10 |     String.concat
 11 |       [ (if Time.Span.( < ) utc_offset Time.Span.zero then "-" else "+")
 12 |       ; Time.Ofday.to_string_trimmed
 13 |           (Time.Ofday.of_span_since_start_of_day_exn (Time.Span.abs utc_offset))
 14 |       ]
 15 | ;;
 16 | 
 17 | let rfc822_date ?(zone = force Time.Zone.local) now =
 18 |   let offset_string =
 19 |     utc_offset_string ~zone now |> String.filter ~f:(fun c -> Base.Char.( <> ) c ':')
 20 |   in
 21 |   let now_string = Time.format now "%a, %d %b %Y %H:%M:%S" ~zone in
 22 |   sprintf "%s %s" now_string offset_string
 23 | ;;
 24 | 
 25 | open Angstrom
 26 | open Let_syntax
 27 | 
 28 | (* Folding whitespace and comments. See RFC2822#3.2.3 *)
 29 | let comment =
 30 |   let comment_text =
 31 |     skip (function
 32 |       (* slight simplification of the RFC *)
 33 |       | '(' | ')' | '\\' -> false
 34 |       | _ -> true)
 35 |   in
 36 |   let quoted_pair =
 37 |     let%map (_ : char) = char '\\' *> any_char in
 38 |     ()
 39 |   in
 40 |   fix (fun comment ->
 41 |     char '(' *> skip_many (comment_text <|> quoted_pair <|> comment) <* char ')')
 42 | ;;
 43 | 
 44 | let single_whitespace_or_comment = comment <|> skip Char.is_whitespace
 45 | let folding_whitespace = skip_many1 single_whitespace_or_comment <?> "FWS"
 46 | let optional_folding_whitespace = skip_many single_whitespace_or_comment <?> "?FWS"
 47 | 
 48 | let rec skip_min_max ~min ~max thing =
 49 |   assert (min <= max);
 50 |   if min > 0
 51 |   then thing *> skip_min_max ~min:(min - 1) ~max:(max - 1) thing
 52 |   else if max > 0
 53 |   then option () (skip_min_max ~min:1 ~max thing)
 54 |   else return ()
 55 | ;;
 56 | 
 57 | let parse_two_digit_int =
 58 |   consumed (skip_min_max (skip Char.is_digit) ~min:2 ~max:2) >>| Int.of_string
 59 | ;;
 60 | 
 61 | let parse_two_to_four_digit_int =
 62 |   consumed (skip_min_max (skip Char.is_digit) ~min:2 ~max:4) >>| Int.of_string
 63 | ;;
 64 | 
 65 | let parse_one_or_two_digit_int =
 66 |   consumed (skip_min_max (skip Char.is_digit) ~min:1 ~max:2) >>| Int.of_string
 67 | ;;
 68 | 
 69 | let parse_day_of_week =
 70 |   choice (List.map Day_of_week.all ~f:(Fn.compose string_ci Day_of_week.to_string))
 71 |   <?> "day of week"
 72 | ;;
 73 | 
 74 | let parse_month =
 75 |   choice
 76 |     (List.map Month.all ~f:(fun month ->
 77 |        const month <$> string_ci (Month.to_string month)))
 78 |   <?> "month"
 79 | ;;
 80 | 
 81 | let parse_time_zone =
 82 |   let utc_offset =
 83 |     (let%mapn sign =
 84 |        Angstrom.choice [ const Sign.Pos <$> char '+'; const Sign.Neg <$> char '-' ]
 85 |        <?> "sign"
 86 |      and hours = parse_two_digit_int <?> "hours"
 87 |      and minutes = parse_two_digit_int <?> "minutes" in
 88 |      let utc_offset = Time.Span.create ~sign ~hr:hours ~min:minutes () in
 89 |      if not
 90 |           (Time.Span.between
 91 |              utc_offset
 92 |              ~low:(Time.Span.neg Time.Span.day)
 93 |              ~high:Time.Span.day)
 94 |      then raise_s [%message "The supplied UTC offset is semantically invalid."];
 95 |      utc_offset)
 96 |     <?> "utc offset"
 97 |   in
 98 |   let military_time_zone =
 99 |     const Time.Span.zero
100 |     <$> (List.init 26 ~f:(fun i ->
101 |            Char.of_int_exn (Char.to_int 'A' + i) |> Char.to_string)
102 |          |> List.filter ~f:(String.( <> ) "J")
103 |          |> List.map ~f:string_ci
104 |          |> choice)
105 |     <?> "military zone"
106 |   in
107 |   let obsolete_zone =
108 |     [ "UT", Time.Span.zero
109 |     ; "GMT", Time.Span.zero
110 |     ; "EST", Time.Span.create ~sign:Sign.Neg ~hr:5 ()
111 |     ; "EDT", Time.Span.create ~sign:Sign.Neg ~hr:4 ()
112 |     ; "CST", Time.Span.create ~sign:Sign.Neg ~hr:6 ()
113 |     ; "CDT", Time.Span.create ~sign:Sign.Neg ~hr:5 ()
114 |     ; "MST", Time.Span.create ~sign:Sign.Neg ~hr:7 ()
115 |     ; "MDT", Time.Span.create ~sign:Sign.Neg ~hr:6 ()
116 |     ; "PST", Time.Span.create ~sign:Sign.Neg ~hr:8 ()
117 |     ; "PDT", Time.Span.create ~sign:Sign.Neg ~hr:7 ()
118 |     ]
119 |     |> List.map ~f:(fun (abbrev, offset) -> const offset <$> string_ci abbrev)
120 |     |> choice
121 |     <?> "obsolote zone"
122 |   in
123 |   choice [ obsolete_zone; military_time_zone; utc_offset ] <?> "time zone"
124 | ;;
125 | 
126 | let untruncate_year year =
127 |   (* As per https://tools.ietf.org/html/rfc5322#section-4.3 *)
128 |   if year <= 49 then 2000 + year else if year <= 999 then 1900 + year else year
129 | ;;
130 | 
131 | let parse_date =
132 |   (let%mapn () =
133 |      option () (parse_day_of_week *> option ' ' (char ',') *> folding_whitespace)
134 |    and day = parse_one_or_two_digit_int <?> "day" <* folding_whitespace
135 |    and month = parse_month
136 |    and year =
137 |      folding_whitespace *> (parse_two_to_four_digit_int >>| untruncate_year <?> "year")
138 |    in
139 |    Date.create_exn ~y:year ~m:month ~d:day)
140 |   <?> "date"
141 | ;;
142 | 
143 | let parse_time_of_day =
144 |   (let%mapn hour = parse_two_digit_int <?> "hour"
145 |    and minutes = char ':' *> parse_two_digit_int <?> "minute"
146 |    and seconds = option 0 (char ':' *> parse_two_digit_int <?> "second") in
147 |    Time.Ofday.create ~hr:hour ~min:minutes ~sec:seconds ())
148 |   <?> "time of day"
149 | ;;
150 | 
151 | let rfc2822_date_parser =
152 |   let%mapn date = parse_date <* folding_whitespace
153 |   and time_of_day = parse_time_of_day <* folding_whitespace
154 |   and utc_offset = parse_time_zone in
155 |   let time_no_zone = Time.of_date_ofday ~zone:Time.Zone.utc date time_of_day in
156 |   Time.sub time_no_zone utc_offset, utc_offset
157 | ;;
158 | 
159 | (* See https://tools.ietf.org/html/rfc5322#section-3.3 for the full spec. Also note
160 |    https://tools.ietf.org/html/rfc5322#appendix-A.5 on whitespace.
161 | 
162 |    https://github.com/moment/moment/blob/022dc038af5ebafafa375f4566fb23366f4e4aa8/src/lib/create/from-string.js#L189
163 |    (alongside the RFC), was used as a reference for this implementation. *)
164 | let of_string_exn_with_utc_offset date =
165 |   match
166 |     parse_string
167 |       ~consume:All
168 |       (optional_folding_whitespace *> rfc2822_date_parser
169 |        <* optional_folding_whitespace
170 |        <* end_of_input)
171 |       date
172 |   with
173 |   | Ok time_and_utc_offset -> time_and_utc_offset
174 |   | Error message -> failwith ("Failed to parse RFC822 " ^ message)
175 | ;;
176 | 
177 | let of_string_exn date =
178 |   let time, _utc_offset = of_string_exn_with_utc_offset date in
179 |   time
180 | ;;
181 | 
182 | let of_string_exn_with_time_zone date =
183 |   let time, utc_offset = of_string_exn_with_utc_offset date in
184 |   let utc_offset_parts = Time.Span.to_parts utc_offset in
185 |   let hours_to_seconds hours = hours * 60 * 60 in
186 |   let minutes_to_seconds minutes = minutes * 60 in
187 |   let utc_offset_in_seconds =
188 |     Time.Span.of_int_sec
189 |       (Sign.to_int utc_offset_parts.sign
190 |        * (hours_to_seconds utc_offset_parts.hr + minutes_to_seconds utc_offset_parts.min)
191 |       )
192 |   in
193 |   time, Time.Zone.of_utc_offset_in_seconds_round_down utc_offset_in_seconds
194 | ;;
195 | 


--------------------------------------------------------------------------------
/src/email_date.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | 
 3 | (** Generate an RFC822-style date *)
 4 | val rfc822_date : ?zone:Time_float.Zone.t -> Time_float.t -> string
 5 | 
 6 | (** Parse an RFC822-style string into a [Time.t].
 7 | 
 8 |     Note that if the weekday is provided, it will not be semantically validated. *)
 9 | val of_string_exn : string -> Time_float.t
10 | 
11 | (** Similar to [of_string_exn], but also return back the time zone that was parsed from
12 |     the provided string. *)
13 | val of_string_exn_with_time_zone : string -> Time_float.t * Time_float.Zone.t
14 | 


--------------------------------------------------------------------------------
/src/email_message.ml:
--------------------------------------------------------------------------------
 1 | module Bigstring_shared = Email_message_kernel.Bigstring_shared
 2 | module Email = Email
 3 | module Email_address = Email_address
 4 | module Email_headers = Email_message_kernel.Headers
 5 | module Email_selector = Selector
 6 | module Email_wrapper = Wrapper
 7 | module Email_date = Email_date
 8 | module Mimestring = Email_message_kernel.Mimestring
 9 | module Octet_stream = Email_message_kernel.Octet_stream
10 | module String_monoid = String_monoid
11 | module Lf_or_crlf = Email_message_kernel.Lf_or_crlf
12 | 
13 | module Email_message_stable = struct
14 |   module Email = struct
15 |     include Email.Stable
16 |     module Raw_content = Email_message_kernel.Raw_content.Stable
17 |     module Simple = Email_message_kernel.Simple.Stable
18 |   end
19 | 
20 |   module Email_address = Email_address.Stable
21 |   module Email_wrapper = Email_wrapper.Stable
22 |   module Email_headers = Email_message_kernel.Headers.Stable
23 | end
24 | 
25 | module Private = struct
26 |   include Email_message_kernel.Private
27 | end
28 | 


--------------------------------------------------------------------------------
/src/email_simple.mli:
--------------------------------------------------------------------------------
  1 | open! Core
  2 | open! Async
  3 | open! Import
  4 | 
  5 | (*_ break dependency cycle *)
  6 | 
  7 | module Email := Email_message_kernel
  8 | 
  9 | (** @inline *)
 10 | include
 11 |   Email_message_kernel.Private.Email_simple_intf.Email_simple
 12 |   with module Content := Email_message_kernel.Simple.Content
 13 |    and module Expert := Email_message_kernel.Simple.Expert
 14 |    and module Stable := Email_message_kernel.Simple.Stable
 15 | 
 16 | type attachment_name = string [@@deriving sexp_of]
 17 | 
 18 | (** For parsing attachments. Use [create ~attachments] to add attachments. Convenience
 19 |     functions for email parts that have "Content-Disposition: attachment" *)
 20 | module Attachment : sig
 21 |   module Id : sig
 22 |     type t [@@deriving compare, sexp_of]
 23 |   end
 24 | 
 25 |   type t
 26 | 
 27 |   (** In a given email, each attachment has a unique [Id.t] that is determined by the
 28 |       email structure. *)
 29 |   val id : t -> Id.t
 30 | 
 31 |   (** The headers surrounding this attachment *)
 32 |   val headers : t -> Headers.t
 33 | 
 34 |   (** [Some email] if this is an attached message/rfc822 content *)
 35 |   val embedded_email : t -> Email.t option
 36 | 
 37 |   (** These are expensive operations *)
 38 |   val raw_data : t -> Bigstring_shared.t Or_error.t
 39 | 
 40 |   val md5 : t -> string Or_error.t
 41 |   val sha256 : t -> string Or_error.t
 42 |   val filename : t -> attachment_name
 43 | 
 44 |   (** [filename] decoded as per [Headers.Encoded_word.decode] *)
 45 |   val decoded_filename : t -> attachment_name
 46 | 
 47 |   (** This function is similar to [decode_filename], but it includes charset details.
 48 |       Right now, the email attachment downloader uses it to convert attachment names to
 49 |       UTF-8 using lib/iconv. In the future, we might want to add UTF-8 conversion directly
 50 |       into [Email_message] and swap this out with [decode_filename_with_utf_8_conversion].
 51 |       However, we can't do that at the moment because of a vulnerability in iconv *)
 52 |   type decode_error =
 53 |     [ `Multiple_distinct_charsets_in_attachment_name of
 54 |       [ `Encoded of Headers.Encoded_word.Charset.t * attachment_name
 55 |       | `Plain of attachment_name
 56 |       ]
 57 |         Nonempty_list.t
 58 |     | `Empty_attachment_name
 59 |     ]
 60 | 
 61 |   val decoded_filename_with_charset
 62 |     :  t
 63 |     -> (Headers.Encoded_word.Charset.t option * attachment_name, decode_error) result
 64 | 
 65 |   val to_file : t -> string -> unit Deferred.Or_error.t
 66 | end
 67 | 
 68 | module Content : sig
 69 |   include Email_message_kernel.Private.Email_simple_intf.Content
 70 | 
 71 |   val of_file
 72 |     :  ?content_type:Mimetype.t
 73 |     -> ?encoding:Octet_stream.Encoding.known
 74 |     -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list
 75 |     -> string
 76 |     -> t Deferred.t
 77 | 
 78 |   (** Save content to disk *)
 79 |   val to_file : t -> string -> unit Deferred.Or_error.t
 80 | end
 81 | 
 82 | val create
 83 |   :  ?from:Email_address.t (** defaults to <user@host> *)
 84 |   -> to_:Email_address.t list
 85 |   -> ?cc:Email_address.t list
 86 |   -> ?reply_to:Email_address.t
 87 |   -> subject:string
 88 |   -> ?id:string
 89 |   -> ?in_reply_to:string
 90 |   -> ?date:Time_float.t
 91 |   -> ?auto_generated:unit
 92 |   -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list
 93 |   -> ?attachments:(attachment_name * Content.t) list
 94 |   -> ?no_tracing_headers:[ `Because_not_using_standard_email_infra ]
 95 |   -> Content.t
 96 |   -> t
 97 | 
 98 | val create_utf8
 99 |   :  ?from:Email_address.t (** defaults to <user@host> *)
100 |   -> to_:Email_address.t list
101 |   -> ?cc:Email_address.t list
102 |   -> ?reply_to:Email_address.t
103 |   -> subject:string
104 |   -> ?id:string
105 |   -> ?in_reply_to:string
106 |   -> ?date:Time_float.t
107 |   -> ?auto_generated:unit
108 |   -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list
109 |   -> ?attachments:(attachment_name * Content.t) list
110 |   -> ?no_tracing_headers:[ `Because_not_using_standard_email_infra ]
111 |   -> Content.t
112 |   -> t
113 | 
114 | (** A unique value to be used in a Message-Id header *)
115 | val make_id : unit -> Headers.Value.t
116 | 
117 | val local_address : unit -> Email_address.t
118 | 
119 | (** [all_attachments] looks recursively through the e-mail parts, looking for attachments.
120 | 
121 |     [~include_inline_parts] (default is `None) controls whether this function will attempt
122 |     to interpret inline parts as attachments. [`Named_or_has_content_id] most aggressively
123 |     classifies parts as attachments, including inline parts that are either named or have
124 |     a Content-Id header. [`Named] will include inline parts that are named.
125 | 
126 |     If [~look_through_attached_mails:true] (the default), it will separately include both
127 |     e-mail attachments as well as the attachments to those e-mails. Otherwise it will
128 |     include e-mail attachments but not (separately) any of the attached e-mails'
129 |     attachments.
130 | 
131 |     [decode_filename_using] defines which character sets should be used to decode
132 |     attachment filenames. This option is mainly there to keep legacy behaviour, where
133 |     KS-C-5601 encoded filenames aren't decoded by default. *)
134 | val all_attachments
135 |   :  ?decode_filename_using:Headers.Encoded_word.Charset.t list
136 |   -> ?include_inline_parts:[ `None | `Named | `Named_or_has_content_id ]
137 |   -> ?look_through_attached_mails:bool
138 |   -> t
139 |   -> Attachment.t list
140 | 
141 | val find_attachment : t -> attachment_name -> Attachment.t option
142 | 
143 | (** [map_attachments] recurses into message/rfc822 parts. However, if a message/rfc822
144 |     part is replaced, there is no further recursion.
145 | 
146 |     [decode_filename_using] defines which character sets should be used to decode
147 |     attachment filenames. This option is mainly there to keep legacy behaviour, where
148 |     KS-C-5601 encoded filenames aren't decoded by default. *)
149 | val map_attachments
150 |   :  ?include_inline_parts:[ `None | `Named | `Named_or_has_content_id ]
151 |   -> ?decode_filename_using:Headers.Encoded_word.Charset.t list
152 |   -> t
153 |   -> f:(Attachment.t -> [ `Keep | `Replace of t ])
154 |   -> t
155 | 
156 | module Expert : sig
157 |   include Email_message_kernel.Private.Email_simple_intf.Expert
158 | 
159 |   val create_raw
160 |     :  ?from:string (** defaults to <user> *)
161 |     -> to_:string list
162 |     -> ?cc:string list
163 |     -> ?reply_to:string
164 |     -> subject:string
165 |     -> ?id:string
166 |     -> ?in_reply_to:string
167 |     -> ?date:string
168 |     -> ?auto_generated:unit
169 |     -> ?extra_headers:(Headers.Name.t * Headers.Value.t) list
170 |     -> ?attachments:(attachment_name * Content.t) list
171 |     -> ?no_tracing_headers:[ `Because_not_using_standard_email_infra ]
172 |     -> Content.t
173 |     -> t
174 | end
175 | 
176 | module Stable : sig
177 |   include Email_message_kernel.Private.Email_simple_intf.Stable
178 | 
179 |   module Attachment : sig
180 |     module Id : sig
181 |       module V1 : Stable_without_comparator with type t = Attachment.Id.t
182 |     end
183 |   end
184 | end
185 | 


--------------------------------------------------------------------------------
/src/import.ml:
--------------------------------------------------------------------------------
1 | module Headers = Email_message_kernel.Headers
2 | module Bigstring_shared = Email_message_kernel.Bigstring_shared
3 | module Octet_stream = Email_message_kernel.Octet_stream
4 | module Email_content = Email_message_kernel.Content
5 | module Mimestring = Email_message_kernel.Mimestring
6 | module Lf_or_crlf = Email_message_kernel.Lf_or_crlf
7 | 


--------------------------------------------------------------------------------
/src/selector.ml:
--------------------------------------------------------------------------------
 1 | module Stable = struct
 2 |   open Core.Core_stable
 3 | 
 4 |   module Base = struct
 5 |     module V1 = struct
 6 |       type t =
 7 |         [ `exists_header of string * Re2.Stable.V1_no_options.t
 8 |         | `all_headers of string * Re2.Stable.V1_no_options.t
 9 |         ]
10 |       [@@deriving bin_shape, sexp]
11 | 
12 |       let%expect_test _ =
13 |         print_endline [%bin_digest: t];
14 |         [%expect {| cbce30b485bdbeb9e93285287a91e7f5 |}]
15 |       ;;
16 |     end
17 |   end
18 | 
19 |   module V1 = struct
20 |     type t = Base.V1.t Blang.V1.t [@@deriving bin_shape, sexp]
21 | 
22 |     let%expect_test _ =
23 |       print_endline [%bin_digest: t];
24 |       [%expect {| 96b50dce93691f115ea076e70421edf9 |}]
25 |     ;;
26 |   end
27 | end
28 | 
29 | open Core
30 | open! Import
31 | module Regex = Re2
32 | 
33 | module Base = struct
34 |   type t =
35 |     (* When adding to this type, don't forget to add to examples below. *)
36 |     [ `exists_header of string * Regex.t
37 |     | `all_headers of string * Regex.t
38 |     ]
39 |   [@@deriving sexp_of]
40 | 
41 |   let matches' t headers =
42 |     match t with
43 |     | `exists_header (header, regex) ->
44 |       let headers = Headers.find_all headers header in
45 |       List.exists headers ~f:(Regex.matches regex)
46 |     | `all_headers (header, regex) ->
47 |       let headers = Headers.find_all headers header in
48 |       List.for_all headers ~f:(Regex.matches regex)
49 |   ;;
50 | 
51 |   let matches t email = matches' t (Email.headers email)
52 | 
53 |   let examples =
54 |     [ `exists_header ("cc", Regex.of_string ".*@janestreet.com")
55 |     ; `all_headers ("cc", Regex.of_string ".*@janestreet.com")
56 |     ]
57 |   ;;
58 | end
59 | 
60 | type t = Base.t Blang.t [@@deriving sexp_of]
61 | 
62 | let matches' t headers = Blang.eval t (fun base -> Base.matches' base headers)
63 | let matches t email = matches' t (Email.headers email)
64 | let example : t = Blang.and_ (List.map Base.examples ~f:Blang.base)
65 | 


--------------------------------------------------------------------------------
/src/selector.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open! Import
 3 | 
 4 | module Base : sig
 5 |   (*_ The "exists" query can equally be accomplished using [sexp query], but
 6 |     not the "all" one. *)
 7 | 
 8 |   type t =
 9 |     [ `exists_header of string * Re2.t
10 |     | `all_headers of string * Re2.t
11 |     ]
12 |   [@@deriving sexp_of]
13 | 
14 |   val matches : t -> Email.t -> bool
15 |   val matches' : t -> Headers.t -> bool
16 |   val examples : t list
17 | end
18 | 
19 | type t = Base.t Blang.t [@@deriving sexp_of]
20 | 
21 | val matches : t -> Email.t -> bool
22 | val matches' : t -> Headers.t -> bool
23 | val example : t
24 | 
25 | module Stable : sig
26 |   module Base : sig
27 |     module V1 : sig
28 |       type t = [ | Base.t ] [@@deriving bin_shape, sexp]
29 |     end
30 |   end
31 | 
32 |   module V1 : sig
33 |     type nonrec t = t [@@deriving bin_shape, sexp]
34 |   end
35 | end
36 | 


--------------------------------------------------------------------------------
/src/string_monoid.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open! Import
 3 | include Email_message_kernel.String_monoid
 4 | 
 5 | module Underlying = struct
 6 |   include Underlying
 7 | 
 8 |   let output_channel ~channel = function
 9 |     | String str -> Out_channel.output_string channel str
10 |     | Bigstring bstr -> Bigstring_unix.really_output channel bstr
11 |     | Char c -> Out_channel.output_char channel c
12 |   ;;
13 | 
14 |   let output_unix ~writer = function
15 |     | String str -> Async.Writer.write writer str
16 |     | Bigstring bstr -> Async.Writer.write_bigstring writer bstr
17 |     | Char c -> Async.Writer.write_char writer c
18 |   ;;
19 | end
20 | 
21 | let output_channel t channel =
22 |   Private.output ~dst_output:(Underlying.output_channel ~channel) t
23 | ;;
24 | 
25 | let output_unix t writer = Private.output ~dst_output:(Underlying.output_unix ~writer) t
26 | 


--------------------------------------------------------------------------------
/src/string_monoid.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open! Async
 3 | 
 4 | include Email_message_kernel.Private.String_monoid_intf.String_monoid (** @inline *)
 5 | 
 6 | (*_
 7 |   For the library to fulfill it's purpose of minimal overhead
 8 |   string concatenation, the output functions must be tightly
 9 |   coupled with the low-level representation.
10 | 
11 |   Any new output channel should be implemented as new methods
12 |   of the library itself.
13 | *)
14 | 
15 | val output_unix : t -> Writer.t -> unit
16 | val output_channel : t -> Out_channel.t -> unit
17 | 


--------------------------------------------------------------------------------
/src/wrapper.ml:
--------------------------------------------------------------------------------
  1 | module Stable = struct
  2 |   open Core.Core_stable
  3 |   module Email_simple = Email_simple.Stable
  4 |   module Email_address = Email_address.Stable
  5 | 
  6 |   module V1 = struct
  7 |     type t =
  8 |       { header : Email_simple.Content.V1.t
  9 |       ; from : [ `Keep | `Change_to of Email_address.V1.t ]
 10 |       ; to_ : [ `Keep | `Change_to of Email_address.V1.t list ]
 11 |       ; cc : [ `Keep | `Change_to of Email_address.V1.t list ]
 12 |       ; subject : [ `Keep | `Prepend of string ]
 13 |       }
 14 |     [@@deriving sexp, bin_io]
 15 |   end
 16 | end
 17 | 
 18 | open! Core
 19 | open! Import
 20 | 
 21 | type t = Stable.V1.t =
 22 |   { header : Email_simple.Content.t
 23 |   ; from : [ `Keep | `Change_to of Email_address.t ]
 24 |   ; to_ : [ `Keep | `Change_to of Email_address.t list ]
 25 |   ; cc : [ `Keep | `Change_to of Email_address.t list ]
 26 |   ; subject : [ `Keep | `Prepend of string ]
 27 |   }
 28 | [@@deriving sexp_of]
 29 | 
 30 | let create ?(from = `Keep) ?(to_ = `Keep) ?(cc = `Keep) ?(subject = `Keep) header =
 31 |   { header; from; to_; cc; subject }
 32 | ;;
 33 | 
 34 | let create_from_email email =
 35 |   let get_header x = Headers.last ~normalize:`None (Email.headers email) x in
 36 |   let from =
 37 |     match get_header "From" with
 38 |     | None | Some "" -> `Keep
 39 |     | Some from -> `Change_to (Email_address.of_string_exn from)
 40 |   in
 41 |   let to_ =
 42 |     match get_header "To" with
 43 |     | None | Some "" -> `Keep
 44 |     | Some to_ -> `Change_to (Email_address.list_of_string_exn to_)
 45 |   in
 46 |   let cc =
 47 |     match get_header "Cc" with
 48 |     | None | Some "" -> `Keep
 49 |     | Some cc -> `Change_to (Email_address.list_of_string_exn cc)
 50 |   in
 51 |   let subject =
 52 |     match get_header "Subject" with
 53 |     | None | Some "" -> `Keep
 54 |     | Some subject -> `Prepend subject
 55 |   in
 56 |   let email =
 57 |     Email.modify_headers
 58 |       email
 59 |       ~f:
 60 |         (Headers.filter ~f:(fun ~name ~value:_ ->
 61 |            let open String.Caseless.Replace_polymorphic_compare in
 62 |            name <> "From" && name <> "To" && name <> "Cc" && name <> "Subject"))
 63 |   in
 64 |   create ~from ~to_ ~cc ~subject (Email_simple.Content.of_email email)
 65 | ;;
 66 | 
 67 | let content_of_email email =
 68 |   Email_simple.Content.create_custom
 69 |     ~content_type:(Email_simple.Mimetype.of_string "message/rfc822")
 70 |     (Email.to_string email)
 71 | ;;
 72 | 
 73 | (* We must be very careful with the email headers that we use in the new email. We use the
 74 |    following policies:
 75 | 
 76 |    (1) Add "From", "To", "Cc", "Subject" according to the supplied arguments to [create]
 77 |    (2) Copy over all other headers except:
 78 |    (i) DKIM-Signature - We break the signing by altering the email content
 79 |    (ii) Return-Path - We don't want the altered email to ever bounce back to the
 80 |    original sender
 81 |    (iii) Content-Transfer-Encoding, Content-Type, Content-Disposition - We structure
 82 |    the email differently. These wouldn't make sense anymore
 83 | *)
 84 | let add { header; from; to_; cc; subject } email =
 85 |   let content = Email_simple.Content.mixed [ header; content_of_email email ] in
 86 |   let headers = Email.headers email in
 87 |   let get_headers x = Headers.find_all ~normalize:`None headers x in
 88 |   let get_header x = Headers.last ~normalize:`None headers x in
 89 |   let from =
 90 |     match from with
 91 |     | `Keep -> get_header "From" |> Option.value ~default:""
 92 |     | `Change_to addr -> Email_address.to_string addr
 93 |   in
 94 |   let to_ =
 95 |     match to_ with
 96 |     | `Keep -> get_headers "To"
 97 |     | `Change_to addrs -> List.map addrs ~f:Email_address.to_string
 98 |   in
 99 |   let cc =
100 |     match cc with
101 |     | `Keep -> get_headers "Cc"
102 |     | `Change_to addrs -> List.map addrs ~f:Email_address.to_string
103 |   in
104 |   let subject =
105 |     let subj = get_header "Subject" |> Option.value ~default:"" in
106 |     match subject with
107 |     | `Keep -> subj
108 |     | `Prepend str -> sprintf "%s %s" str subj
109 |   in
110 |   let extra_headers =
111 |     Headers.filter headers ~f:(fun ~name ~value:_ ->
112 |       match name with
113 |       | "From"
114 |       | "To"
115 |       | "Cc"
116 |       | "Subject"
117 |       | "Message-Id"
118 |       | "Date"
119 |       | "DKIM-Signature"
120 |       | "Return-Path"
121 |       | "Content-Transfer-Encoding"
122 |       | "Content-Type"
123 |       | "Content-Disposition" -> false
124 |       | _ -> true)
125 |     |> Headers.to_list
126 |   in
127 |   let id = get_header "Message-Id" in
128 |   let date = get_header "Date" in
129 |   Email_simple.Expert.create_raw ?id ?date ~from ~to_ ~cc ~subject ~extra_headers content
130 | ;;
131 | 


--------------------------------------------------------------------------------
/src/wrapper.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | 
 3 | (** {v
 4 |  This module exposes a function to add a custom wrapper around an existing email. Gmail
 5 |     will result in the following formatting:
 6 | 
 7 |     CUSTOM WRAPPER TEXT
 8 | 
 9 |     ---------- Forwarded message ----------
10 |     From: (1)
11 |     To: (2)
12 |     Cc: (3)
13 |     Date: (4)
14 |     Subject: (5)
15 |     (6)
16 | 
17 |     where (1)-(6) are taken from the original email.
18 |     v} *)
19 | 
20 | type t [@@deriving sexp_of]
21 | 
22 | (** Create a [Wrapper.t] that can be used to wrap emails *)
23 | 
24 | val create
25 |   :  ?from:[ `Keep | `Change_to of Email_address.t ] (** default: `Keep *)
26 |   -> ?to_:[ `Keep | `Change_to of Email_address.t list ] (** default: `Keep *)
27 |   -> ?cc:[ `Keep | `Change_to of Email_address.t list ] (** default: `Keep *)
28 |   -> ?subject:[ `Keep | `Prepend of string ] (** default: `Keep *)
29 |   -> Email_simple.Content.t
30 |   -> t
31 | 
32 | (** Like [create], but extract [?from], [?to_], [?cc] and [?subject] from the headers
33 |     "From:", "To:", "Cc:" and "Subject:" respectively. *)
34 | val create_from_email : Email.t -> t
35 | 
36 | (** Transform an email by wrapping it according to the [Wrapper.t] *)
37 | val add : t -> Email.t -> Email.t
38 | 
39 | module Stable : sig
40 |   module V1 : sig
41 |     type nonrec t = t [@@deriving sexp, bin_io]
42 |   end
43 | end
44 | 


--------------------------------------------------------------------------------
/test/dune:
--------------------------------------------------------------------------------
1 | (library
2 |  (name email_message_test)
3 |  (libraries async core email_message expect_test_helpers_async
4 |    expect_test_helpers_core core_unix.time_float_unix)
5 |  (preprocess
6 |   (pps ppx_jane)))
7 | 


--------------------------------------------------------------------------------
/test/email_message_test.ml:
--------------------------------------------------------------------------------
 1 | module Test_boundary = Test_boundary
 2 | module Test_email = Test_email
 3 | module Test_email_content = Test_email_content
 4 | module Test_email_date = Test_email_date
 5 | module Test_email_save = Test_email_save
 6 | module Test_email_simple = Test_email_simple
 7 | module Test_encoded_word = Test_encoded_word
 8 | module Test_headers = Test_headers
 9 | module Test_media_type = Test_media_type
10 | module Test_octet_stream = Test_octet_stream
11 | module Test_related_parts = Test_related_parts
12 | module Test_rfc = Test_rfc
13 | 


--------------------------------------------------------------------------------
/test/test_boundary.ml:
--------------------------------------------------------------------------------
  1 | open! Core
  2 | open Async
  3 | open Email_message
  4 | open Email_message.Private
  5 | open Expect_test_helpers_core
  6 | open Boundary
  7 | 
  8 | let boundary = of_string "BOUNDARY"
  9 | 
 10 | let split str =
 11 |   let bs = Bigstring_shared.to_string in
 12 |   let prologue, parts, epilogue = split boundary (Bigstring_shared.of_string str) in
 13 |   let prologue = Option.map prologue ~f:bs in
 14 |   let parts = List.map parts ~f:bs in
 15 |   let epilogue = Option.map epilogue ~f:bs in
 16 |   print_s
 17 |     [%message
 18 |       "" (prologue : string option) (parts : string list) (epilogue : string option)]
 19 | ;;
 20 | 
 21 | let join (prologue, parts, epilogue) =
 22 |   let prologue = Option.map prologue ~f:Bigstring_shared.of_string in
 23 |   let epilogue = Option.map epilogue ~f:Bigstring_shared.of_string in
 24 |   let parts = List.map parts ~f:String_monoid.of_string in
 25 |   let joined = join_without_checking_for_conflicts ?prologue ~parts ?epilogue boundary in
 26 |   (* Expect tests ignore leading and trailing whitespace (and common indentation)
 27 |      on both the 'expected' and 'actual' outputs. Wrapping in '######'s ensures
 28 |      that no whitespace is stripped. *)
 29 |   printf "######\n%s\n######" (String_monoid.to_string joined)
 30 | ;;
 31 | 
 32 | let%expect_test "split" =
 33 |   (* Simple tests with no prologue or epilogue *)
 34 |   split "--BOUNDARY\n\n--BOUNDARY--";
 35 |   [%expect {| ((prologue ()) (parts ("")) (epilogue ())) |}];
 36 |   split "--BOUNDARY\nP\n--BOUNDARY--";
 37 |   [%expect {| ((prologue ()) (parts (P)) (epilogue ())) |}];
 38 |   split "--BOUNDARY\r\nP\r\n--BOUNDARY--";
 39 |   [%expect {| ((prologue ()) (parts (P)) (epilogue ())) |}];
 40 |   split "--BOUNDARY\nP\n--BOUNDARY\nQ\n--BOUNDARY--";
 41 |   [%expect {| ((prologue ()) (parts (P Q)) (epilogue ())) |}];
 42 |   (* Prologue and epilogue *)
 43 |   split "A\n--BOUNDARY\nP\n--BOUNDARY--";
 44 |   [%expect
 45 |     {|
 46 |     ((prologue (A))
 47 |      (parts    (P))
 48 |      (epilogue ()))
 49 |     |}];
 50 |   split "--BOUNDARY\nP\n--BOUNDARY--\nB";
 51 |   [%expect
 52 |     {|
 53 |     ((prologue ())
 54 |      (parts    (P))
 55 |      (epilogue ("\nB")))
 56 |     |}];
 57 |   split "A\n--BOUNDARY\nP\n--BOUNDARY--\nB";
 58 |   [%expect
 59 |     {|
 60 |     ((prologue (A))
 61 |      (parts    (P))
 62 |      (epilogue ("\nB")))
 63 |     |}];
 64 |   split "A\r\n--BOUNDARY\r\nP\r\n--BOUNDARY--\r\nB";
 65 |   [%expect
 66 |     {|
 67 |     ((prologue (A))
 68 |      (parts    (P))
 69 |      (epilogue ("\r\nB")))
 70 |     |}];
 71 |   (* Preserve extra whitespace *)
 72 |   split "\nA\n\n--BOUNDARY\n\nP\n\n--BOUNDARY--\nB\n";
 73 |   [%expect
 74 |     {|
 75 |     ((prologue ("\nA\n"))
 76 |      (parts    ("\nP\n"))
 77 |      (epilogue ("\nB\n")))
 78 |     |}];
 79 |   (* Whitespace padding on boundary line *)
 80 |   split "--BOUNDARY \nB\n--BOUNDARY--";
 81 |   [%expect
 82 |     {|
 83 |     ((prologue ("--BOUNDARY \nB"))
 84 |      (parts    ())
 85 |      (epilogue ()))
 86 |     |}];
 87 |   (* Content with something that looks like a boundary *)
 88 |   split "--BOUNDARY\nnot a --BOUNDARY\nnot a =\n--BOUNDARY either\n--BOUNDARY--";
 89 |   [%expect
 90 |     {|
 91 |     ((prologue ())
 92 |      (parts ("not a --BOUNDARY\nnot a =\n--BOUNDARY either"))
 93 |      (epilogue ()))
 94 |     |}];
 95 |   return ()
 96 | ;;
 97 | 
 98 | let%expect_test "join" =
 99 |   (* Simple tests with no prologue or epilogue *)
100 |   join (None, [ "" ], None);
101 |   [%expect
102 |     {|
103 |     ######
104 |     --BOUNDARY
105 | 
106 |     --BOUNDARY--
107 |     ######
108 |     |}];
109 |   let%bind () =
110 |     join (None, [ "P" ], None);
111 |     [%expect
112 |       {|
113 |       ######
114 |       --BOUNDARY
115 |       P
116 |       --BOUNDARY--
117 |       ######
118 |       |}];
119 |     return ()
120 |   in
121 |   let%bind () =
122 |     join (None, [ "P"; "Q" ], None);
123 |     [%expect
124 |       {|
125 |       ######
126 |       --BOUNDARY
127 |       P
128 |       --BOUNDARY
129 |       Q
130 |       --BOUNDARY--
131 |       ######
132 |       |}];
133 |     return ()
134 |   in
135 |   (* Prologue and epilogue *)
136 |   join (Some "A", [ "P" ], None);
137 |   [%expect
138 |     {|
139 |     ######
140 |     A
141 |     --BOUNDARY
142 |     P
143 |     --BOUNDARY--
144 |     ######
145 |     |}];
146 |   join (None, [ "P" ], Some "\nB");
147 |   [%expect
148 |     {|
149 |     ######
150 |     --BOUNDARY
151 |     P
152 |     --BOUNDARY--
153 |     B
154 |     ######
155 |     |}];
156 |   join (Some "A", [ "P" ], Some "\nB");
157 |   [%expect
158 |     {|
159 |     ######
160 |     A
161 |     --BOUNDARY
162 |     P
163 |     --BOUNDARY--
164 |     B
165 |     ######
166 |     |}];
167 |   (* Preserve extra whitespace *)
168 |   join (Some "\nA\n", [ "\nP\n" ], Some "\nB\n");
169 |   [%expect
170 |     {|
171 |     ######
172 | 
173 |     A
174 | 
175 |     --BOUNDARY
176 | 
177 |     P
178 | 
179 |     --BOUNDARY--
180 |     B
181 | 
182 |     ######
183 |     |}];
184 |   return ()
185 | ;;
186 | 
187 | module%test [@name "non-compliant"] _ = struct
188 |   (* The following tests document undefined behavior. *)
189 | 
190 |   let%expect_test "non-compliant [split]" =
191 |     (* Parsing of weird and malformed data into a sensible form *)
192 |     split "--BOUNDARY\n--BOUNDARY--";
193 |     [%expect {| ((prologue ()) (parts ("")) (epilogue ())) |}];
194 |     split "";
195 |     [%expect
196 |       {|
197 |       ((prologue (""))
198 |        (parts    ())
199 |        (epilogue ()))
200 |       |}];
201 |     split "\n";
202 |     [%expect
203 |       {|
204 |       ((prologue ("\n"))
205 |        (parts    ())
206 |        (epilogue ()))
207 |       |}];
208 |     (* Missing boundary markers *)
209 |     split "A\n--BOUNDARY--\nB";
210 |     [%expect {| ((prologue (A)) (parts ()) (epilogue ("\nB"))) |}];
211 |     split "--BOUNDARY--\nB";
212 |     [%expect {| ((prologue ("")) (parts ()) (epilogue ("\nB"))) |}];
213 |     split "--BOUNDARY--\n";
214 |     [%expect {| ((prologue ("")) (parts ()) (epilogue ("\n"))) |}];
215 |     split "--BOUNDARY--";
216 |     [%expect
217 |       {|
218 |       ((prologue (""))
219 |        (parts    ())
220 |        (epilogue ()))
221 |       |}];
222 |     split "A\n--BOUNDARY--";
223 |     [%expect
224 |       {|
225 |       ((prologue (A))
226 |        (parts    ())
227 |        (epilogue ()))
228 |       |}];
229 |     return ()
230 |   ;;
231 | 
232 |   let%expect_test "non-compliant [join]" =
233 |     join (Some "A", [], Some "\nB");
234 |     [%expect
235 |       {|
236 |       ######
237 |       A
238 |       B
239 |       ######
240 |       |}];
241 |     join (None, [], Some "\nB");
242 |     [%expect
243 |       {|
244 |       ######
245 | 
246 |       B
247 |       ######
248 |       |}];
249 |     join (None, [], None);
250 |     [%expect
251 |       {|
252 |       ######
253 | 
254 | 
255 |       ######
256 |       |}];
257 |     join (Some "A", [], None);
258 |     [%expect
259 |       {|
260 |       ######
261 |       A
262 |       ######
263 |       |}];
264 |     return ()
265 |   ;;
266 | end
267 | 


--------------------------------------------------------------------------------
/test/test_boundary.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/test_email.ml:
--------------------------------------------------------------------------------
  1 | open! Core
  2 | open Async
  3 | open Email_message
  4 | open Email
  5 | 
  6 | let parse s =
  7 |   let parsed = of_string s in
  8 |   printf !"%{sexp:t}\n" parsed;
  9 |   let roundtripped = to_string parsed in
 10 |   printf !"Successfully roundtripped: %{sexp:bool}" (String.equal s roundtripped)
 11 | ;;
 12 | 
 13 | let%expect_test "simple" =
 14 |   parse "From: foo@bar.com\nTo: foo@bar.com\n\nhello world";
 15 |   [%expect
 16 |     {|
 17 |     ((headers ((From " foo@bar.com") (To " foo@bar.com")))
 18 |      (raw_content ("hello world")))
 19 |     Successfully roundtripped: true
 20 |     |}];
 21 |   return ()
 22 | ;;
 23 | 
 24 | let%expect_test "no newlines" =
 25 |   parse "";
 26 |   [%expect
 27 |     {|
 28 |     ((headers ()) (raw_content ()))
 29 |     Successfully roundtripped: true
 30 |     |}];
 31 |   (* Header lines should be terminated with "\n". We add the missing "\n" when we
 32 |      [to_string]. *)
 33 |   parse "Header: hello world";
 34 |   [%expect
 35 |     {|
 36 |     ((headers ((Header " hello world"))) (raw_content ()))
 37 |     Successfully roundtripped: false
 38 |     |}];
 39 |   return ()
 40 | ;;
 41 | 
 42 | let%expect_test "1 newline" =
 43 |   (* This is malformed. I could imagine having [None] for [raw_content] as well. *)
 44 |   parse "\n";
 45 |   [%expect
 46 |     {|
 47 |     ((headers ()) (raw_content ("")))
 48 |     Successfully roundtripped: true
 49 |     |}];
 50 |   (* This is malformed. I could imagine having [Some ""] for [raw_content] as well. *)
 51 |   parse "Header: hello world\n";
 52 |   [%expect
 53 |     {|
 54 |     ((headers ((Header " hello world"))) (raw_content ()))
 55 |     Successfully roundtripped: true
 56 |     |}];
 57 |   (* This case is weird, see below for an explanation *)
 58 |   parse "Header: hello world\nBody";
 59 |   [%expect
 60 |     {|
 61 |     ((headers ((Header " hello world"))) (raw_content (Body)))
 62 |     Successfully roundtripped: false
 63 |     |}];
 64 |   return ()
 65 | ;;
 66 | 
 67 | let%expect_test "2 newlines" =
 68 |   parse "\n\n";
 69 |   [%expect
 70 |     {|
 71 |     ((headers ()) (raw_content ("\n")))
 72 |     Successfully roundtripped: true
 73 |     |}];
 74 |   parse "Header: hello world\n\nBody";
 75 |   [%expect
 76 |     {|
 77 |     ((headers ((Header " hello world"))) (raw_content (Body)))
 78 |     Successfully roundtripped: true
 79 |     |}];
 80 |   return ()
 81 | ;;
 82 | 
 83 | let%expect_test "weird headers" =
 84 |   (* Google and Exim both change to "body mode" (and adds a blank line) on the first
 85 |      line that "doesn't look like a header" for some slightly different
 86 |      interpretation of that phrase:
 87 | 
 88 |      - Google allows ASCII 32-57,59-126 (printable characters minus colon) in a
 89 |        header name
 90 | 
 91 |      - Exim allows ASCII 33-57,59-126 (printable characters minus colon minus space)
 92 |        in a header name
 93 | 
 94 |      RFC 5322 does not specify how to handle an invalid header line.  The following
 95 |      scenarios are possible:
 96 | 
 97 |      1) An invalid header (according to the above rules) is written somewhere in the
 98 |      header block.  Any following headers are incorrectly treated as the start of
 99 |      the message body.
100 | 
101 |      2) An MUA forgets to put a blank line between the header block and the body.
102 |      As long as the first body line does not look like a header (according to the
103 |      above rules), the "right thing" happens. *)
104 |   (* Make sure we can handle the obsolete syntax of headers with whitespace before the
105 |      colon. This doesn't roundtrip because we remove the whitespace before the ":"*)
106 |   parse "From: foo@bar.com\nObsolete-header : hello world\n";
107 |   [%expect
108 |     {|
109 |     ((headers ((From " foo@bar.com") (Obsolete-header " hello world")))
110 |      (raw_content ()))
111 |     Successfully roundtripped: false
112 |     |}];
113 |   (* Whitespace should not be a part of a header field.  Google considers this a
114 |      valid header.  Exim treats this as the start of the body. *)
115 |   parse "From: foo@bar.com\nMalformed header: hello world\n";
116 |   [%expect
117 |     {|
118 |     ((headers ((From " foo@bar.com")))
119 |      (raw_content ("Malformed header: hello world\n")))
120 |     Successfully roundtripped: false
121 |     |}];
122 |   (* RFC 5322 says that field names must contain at least 1 character, however
123 |      Google and Exim both don't have this requirement. In addition, we get some
124 |      messages in the wild that have broken headers like this. *)
125 |   parse "From: foo@bar.com\n: hello world\n";
126 |   [%expect
127 |     {|
128 |     ((headers ((From " foo@bar.com") ("" " hello world"))) (raw_content ()))
129 |     Successfully roundtripped: true
130 |     |}];
131 |   return ()
132 | ;;
133 | 
134 | (* RFC 2822 Section 2.2.3:
135 | 
136 |    {v
137 | 
138 |  The general rule is that wherever this standard allows for folding white space (not
139 |  simply WSP characters), a CRLF may be inserted before any WSP. For example, the header
140 |  field:
141 | 
142 |  Subject: This is a test
143 | 
144 |  can be represented as:
145 | 
146 |  Subject: This
147 |   is a test
148 | 
149 |    v}
150 | *)
151 | let%expect_test "Folding whitespace" =
152 |   parse
153 |     ([ "Subject: This is a multiline"; " subject"; ""; "body" ] |> String.concat ~sep:"\n");
154 |   [%expect
155 |     {|
156 |     ((headers ((Subject  " This is a multiline\
157 |                         \n subject")))
158 |      (raw_content (body)))
159 |     Successfully roundtripped: true
160 |     |}];
161 |   return ()
162 | ;;
163 | 


--------------------------------------------------------------------------------
/test/test_email.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/test_email_content.ml:
--------------------------------------------------------------------------------
  1 | open! Core
  2 | open Async
  3 | open Email_message
  4 | open Expect_test_helpers_core
  5 | open Email.Content
  6 | 
  7 | let parse s =
  8 |   let email = Email.of_string s in
  9 |   let unparsed = Email.raw_content email |> Email.Raw_content.to_bigstring_shared in
 10 |   let parsed = parse email |> ok_exn in
 11 |   print_s [%message "" ~_:(parsed : t)];
 12 |   let round_tripped = to_bigstring_shared parsed in
 13 |   assert ([%compare.equal: Bigstring_shared.t] unparsed round_tripped)
 14 | ;;
 15 | 
 16 | let%expect_test "simple content" =
 17 |   parse "From: foo@bar.com\n\nhello world";
 18 |   [%expect
 19 |     {|
 20 |     (Data (
 21 |       (encoding Bit7)
 22 |       (content  "hello world")))
 23 |     |}];
 24 |   return ()
 25 | ;;
 26 | 
 27 | let%expect_test "simple multipart" =
 28 |   parse
 29 |     "Content-Type: multipart/alternative; boundary=BOUNDARY\n\n\
 30 |      --BOUNDARY\n\
 31 |      Content-Type: text/plain; charset=UTF-8\n\n\
 32 |      Simple body\n\n\
 33 |      --BOUNDARY\n\
 34 |      Content-Type: text/html; charset=UTF-8\n\
 35 |      Content-Transfer-Encoding: quoted-printable\n\n\
 36 |      <div>Simple body</div>\n\n\
 37 |      --BOUNDARY--";
 38 |   [%expect
 39 |     {|
 40 |     (Multipart (
 41 |       (boundary BOUNDARY)
 42 |       (prologue ())
 43 |       (epilogue ())
 44 |       (parts (
 45 |         ((headers ((Content-Type " text/plain; charset=UTF-8")))
 46 |          (raw_content ("Simple body\n")))
 47 |         ((headers (
 48 |            (Content-Type " text/html; charset=UTF-8")
 49 |            (Content-Transfer-Encoding " quoted-printable")))
 50 |          (raw_content ("<div>Simple body</div>\n")))))
 51 |       (container_headers ((
 52 |         Content-Type " multipart/alternative; boundary=BOUNDARY")))))
 53 |     |}];
 54 |   return ()
 55 | ;;
 56 | 
 57 | let%expect_test "nested multipart" =
 58 |   parse
 59 |     "Content-Type: multipart/mixed; boundary=BOUNDARY1\n\n\
 60 |      --BOUNDARY1\n\
 61 |      Content-Type: multipart/alternative; boundary=BOUNDARY2\n\n\
 62 |      --BOUNDARY2\n\
 63 |      Content-Type: text/plain; charset=UTF-8\n\n\
 64 |      Simple body\n\n\
 65 |      --BOUNDARY2\n\
 66 |      Content-Type: text/html; charset=UTF-8\n\n\
 67 |      <div>Simple body</div>\n\n\
 68 |      --BOUNDARY2--\n\
 69 |      --BOUNDARY1\n\
 70 |      Content-Type: text/plain; charset=US-ASCII; name=\"attachment.txt\"\n\
 71 |      Content-Disposition: attachment; filename=\"attachment.txt\"\n\
 72 |      Content-Transfer-Encoding: base64\n\n\
 73 |      Zm9v\n\
 74 |      --BOUNDARY1--";
 75 |   [%expect
 76 |     {|
 77 |     (Multipart (
 78 |       (boundary BOUNDARY1)
 79 |       (prologue ())
 80 |       (epilogue ())
 81 |       (parts (
 82 |         ((headers ((Content-Type " multipart/alternative; boundary=BOUNDARY2")))
 83 |          (raw_content (
 84 |            "--BOUNDARY2\nContent-Type: text/plain; charset=UTF-8\n\nSimple body\n\n--BOUNDARY2\nContent-Type: text/html; charset=UTF-8\n\n<div>Simple body</div>\n\n--BOUNDARY2--")))
 85 |         ((headers (
 86 |            (Content-Type " text/plain; charset=US-ASCII; name=\"attachment.txt\"")
 87 |            (Content-Disposition " attachment; filename=\"attachment.txt\"")
 88 |            (Content-Transfer-Encoding " base64")))
 89 |          (raw_content (Zm9v)))))
 90 |       (container_headers ((Content-Type " multipart/mixed; boundary=BOUNDARY1")))))
 91 |     |}];
 92 |   return ()
 93 | ;;
 94 | 
 95 | let%expect_test "message/rfc822" =
 96 |   parse "Content-Type: message/rfc822\n\nFrom: foo@bar.com\n\nSample body";
 97 |   [%expect
 98 |     {| (Message ((headers ((From " foo@bar.com"))) (raw_content ("Sample body")))) |}];
 99 |   return ()
100 | ;;
101 | 


--------------------------------------------------------------------------------
/test/test_email_content.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/test_email_date.ml:
--------------------------------------------------------------------------------
  1 | open! Core
  2 | open Email_message
  3 | open Expect_test_helpers_core
  4 | open Email_date
  5 | module Time = Time_float_unix
  6 | 
  7 | let parse_and_print time =
  8 |   print_string (Time.to_string_iso8601_basic (of_string_exn time) ~zone:Time.Zone.utc)
  9 | ;;
 10 | 
 11 | let parse_and_print_with_time_zone time =
 12 |   let time, time_zone = of_string_exn_with_time_zone time in
 13 |   let time = Time.to_string_iso8601_basic time ~zone:Time.Zone.utc in
 14 |   print_string [%string "%{time}, %{Time.Zone.to_string time_zone}"]
 15 | ;;
 16 | 
 17 | let%expect_test "of_rfc822_date neg offset" =
 18 |   parse_and_print "Fri, 03 Dec 2010 16:02:30 -0600 (CST)";
 19 |   [%expect {| 2010-12-03T22:02:30.000000Z |}];
 20 |   parse_and_print_with_time_zone "Fri, 03 Dec 2010 16:02:30 -0600 (CST)";
 21 |   [%expect {| 2010-12-03T22:02:30.000000Z, UTC-6 |}]
 22 | ;;
 23 | 
 24 | let%expect_test "of_rfc822_date offset with non zero minutes" =
 25 |   parse_and_print "Fri, 3 Dec 2010 16:02:30 +0530 (IST)";
 26 |   [%expect {| 2010-12-03T10:32:30.000000Z |}];
 27 |   parse_and_print_with_time_zone "Fri, 3 Dec 2010 16:02:30 +0530 (IST)";
 28 |   [%expect {| 2010-12-03T10:32:30.000000Z, UTC+5:30 |}];
 29 |   parse_and_print "Fri, 03 Dec 2010 16:02:30 -0330 (NT)";
 30 |   [%expect {| 2010-12-03T19:32:30.000000Z |}];
 31 |   parse_and_print_with_time_zone "Fri, 03 Dec 2010 22:02:30 -0330 (NT)";
 32 |   [%expect {| 2010-12-04T01:32:30.000000Z, UTC-3:30 |}]
 33 | ;;
 34 | 
 35 | let%expect_test "of_rfc822_date GMT" =
 36 |   parse_and_print "Fri, 3 Dec 2010 16:02:30 +0000 (GMT)";
 37 |   [%expect {| 2010-12-03T16:02:30.000000Z |}];
 38 |   parse_and_print_with_time_zone "Fri, 3 Dec 2010 16:02:30 +0000 (GMT)";
 39 |   [%expect {| 2010-12-03T16:02:30.000000Z, UTC |}]
 40 | ;;
 41 | 
 42 | let%expect_test "of_rfc822_date pos offset" =
 43 |   parse_and_print "Fri, 03 Dec 2010 16:02:30 +0600 (BST)";
 44 |   [%expect {| 2010-12-03T10:02:30.000000Z |}];
 45 |   parse_and_print_with_time_zone "Fri, 03 Dec 2010 16:02:30 +0600 (BST)";
 46 |   [%expect {| 2010-12-03T10:02:30.000000Z, UTC+6 |}]
 47 | ;;
 48 | 
 49 | let%expect_test "of_rfc822_date seconds are optional" =
 50 |   parse_and_print "Fri, 03 Dec 2010 16:02 +0600 (BST)";
 51 |   [%expect {| 2010-12-03T10:02:00.000000Z |}]
 52 | ;;
 53 | 
 54 | let%expect_test "of_rfc822_date 2 digit year meaning 20th century (obsolete format)" =
 55 |   parse_and_print "01 Jan 98 11:00 +0000";
 56 |   [%expect {| 1998-01-01T11:00:00.000000Z |}]
 57 | ;;
 58 | 
 59 | let%expect_test "of_rfc822_date 2 digit year meaning 21st century (obsolete format)" =
 60 |   parse_and_print "01 Jan 20 11:00 +0000";
 61 |   [%expect {| 2020-01-01T11:00:00.000000Z |}]
 62 | ;;
 63 | 
 64 | let%expect_test "of_rfc822_date embedded comments and extra whitespace" =
 65 |   parse_and_print "  Fri,   (hello) 03 Dec 2010 16:02:30 (BST)\n+0600";
 66 |   [%expect {| 2010-12-03T10:02:30.000000Z |}]
 67 | ;;
 68 | 
 69 | let%expect_test "of_rfc822_date no day-of-week" =
 70 |   parse_and_print "03 Dec 2010 16:02:30 +0600 (BST)";
 71 |   [%expect {| 2010-12-03T10:02:30.000000Z |}]
 72 | ;;
 73 | 
 74 | let%expect_test "of_rfc822_date obsolete timezones" =
 75 |   List.iter
 76 |     [ "UT"; "GMT"; "EST"; "EDT"; "CST"; "CDT"; "MST"; "MDT"; "PST"; "PDT" ]
 77 |     ~f:(fun tz ->
 78 |       parse_and_print ("03 Dec 2010 16:02:30 " ^ tz);
 79 |       Out_channel.newline stdout);
 80 |   [%expect
 81 |     {|
 82 |     2010-12-03T16:02:30.000000Z
 83 |     2010-12-03T16:02:30.000000Z
 84 |     2010-12-03T21:02:30.000000Z
 85 |     2010-12-03T20:02:30.000000Z
 86 |     2010-12-03T22:02:30.000000Z
 87 |     2010-12-03T21:02:30.000000Z
 88 |     2010-12-03T23:02:30.000000Z
 89 |     2010-12-03T22:02:30.000000Z
 90 |     2010-12-04T00:02:30.000000Z
 91 |     2010-12-03T23:02:30.000000Z
 92 |     |}]
 93 | ;;
 94 | 
 95 | (* As per https://tools.ietf.org/html/rfc5322#section-4.3 all these military timezones are
 96 |    unpredictable and hence should be parsed as '-0000' *)
 97 | let%expect_test "of_rfc822_date military timezones" =
 98 |   let single_letter_tzs =
 99 |     List.init 26 ~f:(fun i -> Char.of_int_exn (Char.to_int 'A' + i) |> Char.to_string)
100 |     |> List.filter ~f:(String.( <> ) "J")
101 |   in
102 |   let parsed =
103 |     List.map single_letter_tzs ~f:(fun tz ->
104 |       of_string_exn ("Fri, 03 Dec 2010 16:02:30 " ^ tz))
105 |   in
106 |   print_string (Time.to_string_iso8601_basic (List.hd_exn parsed) ~zone:Time.Zone.utc);
107 |   List.iter parsed ~f:(fun time -> require_equal (module Time) (List.hd_exn parsed) time);
108 |   [%expect {| 2010-12-03T16:02:30.000000Z |}]
109 | ;;
110 | 
111 | let%expect_test "of_rfc822_date fail semantically incorrect date" =
112 |   show_raise (fun () -> of_string_exn "Fri, 39 Dec 2010 16:02:30 +0000 (GMT)");
113 |   [%expect
114 |     {|
115 |     (raised (
116 |       Invalid_argument
117 |       "Date.create_exn ~y:2010 ~m:Dec ~d:39 error: 31 day month violation"))
118 |     |}]
119 | ;;
120 | 
121 | let%expect_test "of_rfc822_date fail bad format" =
122 |   show_raise (fun () -> of_string_exn "bad format");
123 |   [%expect {| (raised (Failure "Failed to parse RFC822 date > day: skip")) |}]
124 | ;;
125 | 
126 | let%expect_test "of_rfc822_date fail no TZinfo" =
127 |   show_raise (fun () -> of_string_exn "Fri, 03 Dec 2010 16:02:30 (BST)");
128 |   [%expect {| (raised (Failure "Failed to parse RFC822 time zone: no more choices")) |}]
129 | ;;
130 | 
131 | let%expect_test "of_rfc822_date semantically invalid UTC offset" =
132 |   show_raise (fun () -> of_string_exn "Fri, 03 Dec 2010 16:02:30 -3600 (CST)");
133 |   [%expect {| (raised "The supplied UTC offset is semantically invalid.") |}]
134 | ;;
135 | 
136 | let%expect_test "rfc822_date" =
137 |   print_string (Email_date.rfc822_date Time.epoch);
138 |   [%expect {| Wed, 31 Dec 1969 19:00:00 -0500 |}]
139 | ;;
140 | 
141 | let%test_unit ("rfc822_date round-trip" [@tags "64-bits-only"]) =
142 |   let open! Quickcheck.Let_syntax in
143 |   Quickcheck.test
144 |     (* Unfortunately we cannot use Time.quickcheck_generator since that generates times
145 |        with sub-second precision and a RFC822 date is only precise to the second.
146 | 
147 |        unix time 3_000_000_000 is in the year 2065.
148 |     *)
149 |     (let%map seconds_since_epoch = Int64.gen_incl (-100L) 3_000_000_000L in
150 |      Float.of_int64 seconds_since_epoch |> Time.Span.of_sec |> Time.of_span_since_epoch)
151 |     ~sexp_of:[%sexp_of: Time.t]
152 |     ~f:(fun time ->
153 |       [%test_eq: Time.t] time (Email_date.rfc822_date time |> of_string_exn))
154 | ;;
155 | 


--------------------------------------------------------------------------------
/test/test_email_date.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/test_email_save.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open Async
 3 | open Email_message
 4 | open Expect_test_helpers_async
 5 | 
 6 | let save ?eol_except_raw_content str =
 7 |   let email = Email.of_string str in
 8 |   with_temp_dir (fun tmpdir ->
 9 |     let path = tmpdir ^/ "email" in
10 |     let%bind () = Email.save ?eol_except_raw_content email path in
11 |     let%bind on_disk = Reader.file_contents path in
12 |     String.concat_map on_disk ~f:(fun c ->
13 |       if Char.equal c '\r' then "\\r" else String.of_char c)
14 |     |> printf "%s";
15 |     Deferred.unit)
16 | ;;
17 | 
18 | let%expect_test "save" =
19 |   let email_str = "A: B\nC: D\n\nLine 1\nLine 2" in
20 |   let%bind () = save email_str in
21 |   [%expect
22 |     {|
23 |     A: B
24 |     C: D
25 | 
26 |     Line 1
27 |     Line 2
28 |     |}];
29 |   let%bind () = save ~eol_except_raw_content:`CRLF email_str in
30 |   [%expect
31 |     {|
32 |     A: B\r
33 |     C: D\r
34 |     \r
35 |     Line 1
36 |     Line 2
37 |     |}];
38 |   Deferred.unit
39 | ;;
40 | 


--------------------------------------------------------------------------------
/test/test_email_save.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/test_email_simple.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/test_encoded_word.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Email_message.Email_headers.Encoded_word
  3 | 
  4 | let%expect_test "decode" =
  5 |   let test str = printf "%S\n" (decode str |> Or_error.ok_exn) in
  6 |   test "hello = there?!?";
  7 |   [%expect {| "hello = there?!?" |}];
  8 |   test "hello\n there\n\tagain\n   my\nfriend";
  9 |   [%expect {| "hello there\tagain   my\nfriend" |}];
 10 |   (* Some test vectors from: https://tools.ietf.org/html/rfc2047 *)
 11 |   test "=?ISO-8859-1?Q?a?=";
 12 |   [%expect {| "a" |}];
 13 |   test "=?ISO-8859-1?Q?a?= b";
 14 |   [%expect {| "a b" |}];
 15 |   test "=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=";
 16 |   [%expect {| "ab" |}];
 17 |   test "=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?=";
 18 |   [%expect {| "ab" |}];
 19 |   test "=?ISO-8859-1?Q?a?=\n       =?ISO-8859-1?Q?b?=";
 20 |   [%expect {| "ab" |}];
 21 |   test "=?ISO-8859-1?Q?a_b?=";
 22 |   [%expect {| "a b" |}];
 23 |   test "=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?_b?=";
 24 |   [%expect {| "a b" |}];
 25 |   test " =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>";
 26 |   [%expect {| " Keith Moore <moore@cs.utk.edu>" |}];
 27 |   test " =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>";
 28 |   [%expect {| " Keld J\248rn Simonsen <keld@dkuug.dk>" |}];
 29 |   test " =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>";
 30 |   [%expect {| " Andr\233 Pirard <PIRARD@vm1.ulg.ac.be>" |}];
 31 |   test
 32 |     "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\n\
 33 |      =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=";
 34 |   [%expect {| "If you can read this you understand the example." |}];
 35 |   (* Big5 example *)
 36 |   test "=?Big5?B?pl7C0DogW0V4dGVybmFsXSBSZTogPDxKYW5lc3RyZWV0Pj4gVW5tYXRjaGVk?=";
 37 |   [%expect {| "\166^\194\208: [External] Re: <<Janestreet>> Unmatched" |}];
 38 |   (* a UTF8 Example *)
 39 |   test "=?UTF-8?B?SGkgVGhlcmVcIQo=?=";
 40 |   [%expect {| "Hi There\\!\n" |}];
 41 |   (* mixed encodings *)
 42 |   test "Hello =?US-ASCII?Q?ascii?= =?ISO-8859-1?B?YmluYXJ5?= =?UTF-8?Q?world?=";
 43 |   [%expect {| "Hello asciibinaryworld" |}];
 44 |   test "Ocaml =?UTF-8?B?8J+Qqg==?= Code";
 45 |   [%expect {| "Ocaml \240\159\144\170 Code" |}]
 46 | ;;
 47 | 
 48 | let%expect_test "decode-with-charset" =
 49 |   let test str =
 50 |     let decoded = decode_with_charset str |> Or_error.ok_exn in
 51 |     print_s
 52 |       [%sexp
 53 |         (decoded
 54 |          : [ `Encoded of
 55 |              [ `Ascii
 56 |              | `Big5
 57 |              | `GB2312
 58 |              | `Latin1
 59 |              | `Latin2
 60 |              | `Utf8
 61 |              | `Windows1252
 62 |              | `KS_C_5601_1987
 63 |              ]
 64 |              * string
 65 |            | `Plain of string
 66 |            ]
 67 |              list)]
 68 |   in
 69 |   test "hello = there?!?";
 70 |   [%expect {| ((Plain hello) (Plain " ") (Plain =) (Plain " ") (Plain there?!?)) |}];
 71 |   test "hello\n there\n\tagain\n   my\nfriend";
 72 |   [%expect
 73 |     {|
 74 |     ((Plain hello) (Plain " ") (Plain there) (Plain "\t") (Plain again)
 75 |      (Plain "   ") (Plain my) (Plain "\n") (Plain friend))
 76 |     |}];
 77 |   (* Some test vectors from: https://tools.ietf.org/html/rfc2047 *)
 78 |   test "=?ISO-8859-1?Q?a?=";
 79 |   [%expect {| ((Encoded (Latin1 a))) |}];
 80 |   test "=?ISO-8859-1?Q?a?= b";
 81 |   [%expect {| ((Encoded (Latin1 a)) (Plain " ") (Plain b)) |}];
 82 |   test "=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=";
 83 |   [%expect {| ((Encoded (Latin1 a)) (Encoded (Latin1 b))) |}];
 84 |   test "=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?=";
 85 |   [%expect {| ((Encoded (Latin1 a)) (Encoded (Latin1 b))) |}];
 86 |   test "=?ISO-8859-1?Q?a?=\n       =?ISO-8859-1?Q?b?=";
 87 |   [%expect {| ((Encoded (Latin1 a)) (Encoded (Latin1 b))) |}];
 88 |   test "=?ISO-8859-1?Q?a_b?=";
 89 |   [%expect {| ((Encoded (Latin1 "a b"))) |}];
 90 |   test "=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?_b?=";
 91 |   [%expect {| ((Encoded (Latin1 a)) (Encoded (Latin1 " b"))) |}];
 92 |   test " =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>";
 93 |   [%expect
 94 |     {|
 95 |     ((Plain " ") (Encoded (Ascii "Keith Moore")) (Plain " ")
 96 |      (Plain <moore@cs.utk.edu>))
 97 |     |}];
 98 |   test " =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>";
 99 |   [%expect
100 |     {|
101 |     ((Plain " ") (Encoded (Latin1 "Keld J\248rn Simonsen")) (Plain " ")
102 |      (Plain <keld@dkuug.dk>))
103 |     |}];
104 |   test " =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>";
105 |   [%expect
106 |     {|
107 |     ((Plain " ") (Encoded (Latin1 "Andr\233")) (Plain " ") (Plain Pirard)
108 |      (Plain " ") (Plain <PIRARD@vm1.ulg.ac.be>))
109 |     |}];
110 |   test
111 |     "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\n\
112 |      =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=";
113 |   [%expect
114 |     {|
115 |     ((Encoded (Latin1 "If you can read this yo"))
116 |      (Encoded (Latin2 "u understand the example.")))
117 |     |}];
118 |   (* Big5 example *)
119 |   test "=?Big5?B?pl7C0DogW0V4dGVybmFsXSBSZTogPDxKYW5lc3RyZWV0Pj4gVW5tYXRjaGVk?=";
120 |   [%expect
121 |     {| ((Encoded (Big5 "\166^\194\208: [External] Re: <<Janestreet>> Unmatched"))) |}];
122 |   (* a UTF8 Example *)
123 |   test "=?UTF-8?B?SGkgVGhlcmVcIQo=?=";
124 |   [%expect {| ((Encoded (Utf8 "Hi There\\!\n"))) |}];
125 |   (* mixed encodings *)
126 |   test "Hello =?US-ASCII?Q?ascii?= =?ISO-8859-1?B?YmluYXJ5?= =?UTF-8?Q?world?=";
127 |   [%expect
128 |     {|
129 |     ((Plain Hello) (Plain " ") (Encoded (Ascii ascii)) (Encoded (Latin1 binary))
130 |      (Encoded (Utf8 world)))
131 |     |}];
132 |   test "Ocaml =?UTF-8?B?8J+Qqg==?= Code";
133 |   [%expect
134 |     {|
135 |     ((Plain Ocaml) (Plain " ") (Encoded (Utf8 "\240\159\144\170")) (Plain " ")
136 |      (Plain Code))
137 |     |}]
138 | ;;
139 | 
140 | let%expect_test _ =
141 |   let test str = printf "%S\n" (decode str |> Or_error.ok_exn) in
142 |   (* The RFC is pretty clear in saying that spaces inside the encode word are not supported.
143 |      Yet we've seen some incorrectly encoded headers in the wild.
144 | 
145 |      Adding this test to document that the current behaviour is correct.
146 |   *)
147 |   test
148 |     "=?UTF-8?Q?encoded word with spaces ?= =?UTF-8?Q?and new\n\
149 |      lines?= =?UTF-8?Q?should=20not=20be=20decoded?=";
150 |   [%expect
151 |     {| "=?UTF-8?Q?encoded word with spaces ?= =?UTF-8?Q?and new\nlines?= should not be decoded" |}]
152 | ;;
153 | 


--------------------------------------------------------------------------------
/test/test_encoded_word.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/test_headers.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open Email_message
 3 | module Expect_test_config = Core.Expect_test_config
 4 | 
 5 | let%expect_test "whitespace" =
 6 |   let headers =
 7 |     Email_headers.of_list
 8 |       ~normalize:`None
 9 |       [ "header1", "1"
10 |       ; "header2", " 2"
11 |       ; "header3", "=?ISO-8859-1?Q?a?="
12 |       ; "header4", " =?ISO-8859-1?Q?a?="
13 |       ; "header5", " =?ISO-8859-1?Q?a?= \n  =?ISO-8859-1?Q?b?=  \n c"
14 |       ]
15 |   in
16 |   let print_headers ~normalize =
17 |     Email_headers.to_list ~normalize headers
18 |     |> List.iter ~f:(fun (name, value) -> printf "|%s|%s|\n" name value)
19 |   in
20 |   print_headers ~normalize:`None;
21 |   [%expect
22 |     {|
23 |     |header1|1|
24 |     |header2| 2|
25 |     |header3|=?ISO-8859-1?Q?a?=|
26 |     |header4| =?ISO-8859-1?Q?a?=|
27 |     |header5| =?ISO-8859-1?Q?a?=
28 |       =?ISO-8859-1?Q?b?=
29 |      c|
30 |     |}];
31 |   print_headers ~normalize:`Whitespace;
32 |   [%expect
33 |     {|
34 |     |header1|1|
35 |     |header2|2|
36 |     |header3|=?ISO-8859-1?Q?a?=|
37 |     |header4|=?ISO-8859-1?Q?a?=|
38 |     |header5|=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?= c|
39 |     |}];
40 |   print_headers
41 |     ~normalize:(`Whitespace_and_encoding (`Any_charset, `Pretend_all_charsets_are_same));
42 |   [%expect
43 |     {|
44 |     |header1|1|
45 |     |header2|2|
46 |     |header3|a|
47 |     |header4|a|
48 |     |header5|ab c|
49 |     |}]
50 | ;;
51 | 


--------------------------------------------------------------------------------
/test/test_headers.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/test_media_type.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open Async
 3 | open Email_message
 4 | open Email_message.Private
 5 | open Expect_test_helpers_core
 6 | open Media_type
 7 | 
 8 | let from_headers headers =
 9 |   let result = Option.value_exn (from_headers headers) in
10 |   print_s [%message "" ~_:(result : t)]
11 | ;;
12 | 
13 | let%expect_test "[Media_type.last]" =
14 |   let headers =
15 |     [ "Content-Type", "multipart/mixed;\nboundary=\"BOUNDARY\"" ]
16 |     |> Email_headers.of_list ~normalize:`Whitespace
17 |   in
18 |   from_headers headers;
19 |   [%expect
20 |     {|
21 |     ((mime_type    multipart)
22 |      (mime_subtype mixed)
23 |      (params ((boundary BOUNDARY))))
24 |     |}];
25 |   return ()
26 | ;;
27 | 


--------------------------------------------------------------------------------
/test/test_media_type.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/test_octet_stream.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open Async
 3 | open Email_message
 4 | open Octet_stream
 5 | 
 6 | let test ~encoding plaintext =
 7 |   let coded = encode ~encoding (Bigstring_shared.of_string plaintext) in
 8 |   printf "%s" (Octet_stream.encoded_contents_string coded);
 9 |   let decoded = decode coded |> Option.value_exn |> Bigstring_shared.to_string in
10 |   [%test_result: string] decoded ~expect:plaintext
11 | ;;
12 | 
13 | let%expect_test "base64" =
14 |   let test = test ~encoding:`Base64 in
15 |   test "any carnal pleasure.";
16 |   [%expect {| YW55IGNhcm5hbCBwbGVhc3VyZS4= |}];
17 |   test "any carnal pleasure";
18 |   [%expect {| YW55IGNhcm5hbCBwbGVhc3VyZQ== |}];
19 |   test "any carnal pleasur";
20 |   [%expect {| YW55IGNhcm5hbCBwbGVhc3Vy |}];
21 |   test "any carnal pleasu";
22 |   [%expect {| YW55IGNhcm5hbCBwbGVhc3U= |}];
23 |   test "any carnal pleas";
24 |   [%expect {| YW55IGNhcm5hbCBwbGVhcw== |}];
25 |   test
26 |     "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor \
27 |      incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud \
28 |      exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute \
29 |      irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla \
30 |      pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia \
31 |      deserunt mollit anim id est laborum.";
32 |   [%expect
33 |     {|
34 |     TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg
35 |     c2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu
36 |     YSBhbGlxdWEuIFV0IGVuaW0gYWQgbWluaW0gdmVuaWFtLCBxdWlzIG5vc3RydWQgZXhlcmNpdGF0
37 |     aW9uIHVsbGFtY28gbGFib3JpcyBuaXNpIHV0IGFsaXF1aXAgZXggZWEgY29tbW9kbyBjb25zZXF1
38 |     YXQuIER1aXMgYXV0ZSBpcnVyZSBkb2xvciBpbiByZXByZWhlbmRlcml0IGluIHZvbHVwdGF0ZSB2
39 |     ZWxpdCBlc3NlIGNpbGx1bSBkb2xvcmUgZXUgZnVnaWF0IG51bGxhIHBhcmlhdHVyLiBFeGNlcHRl
40 |     dXIgc2ludCBvY2NhZWNhdCBjdXBpZGF0YXQgbm9uIHByb2lkZW50LCBzdW50IGluIGN1bHBhIHF1
41 |     aSBvZmZpY2lhIGRlc2VydW50IG1vbGxpdCBhbmltIGlkIGVzdCBsYWJvcnVtLg==
42 |     |}];
43 |   return ()
44 | ;;
45 | 
46 | let%expect_test "quoted printable" =
47 |   let test = test ~encoding:`Quoted_printable in
48 |   test
49 |     "If you believe that truth=beauty, then surely mathematics is the most beautiful \
50 |      branch of philosophy.";
51 |   [%expect
52 |     {|
53 |     If you believe that truth=3Dbeauty, then surely mathematics is the most =
54 |     beautiful branch of philosophy.
55 |     |}];
56 |   test "\000\001\002a\003    \n";
57 |   [%expect {| =00=01=02a=03   =20 |}];
58 |   test "\000\001\002a\003    \n";
59 |   [%expect {| =00=01=02a=03   =20 |}];
60 |   test "\000\001\002a\003    \n";
61 |   [%expect {| =00=01=02a=03   =20 |}];
62 |   test "\000\001\002a\003    \n";
63 |   [%expect {| =00=01=02a=03   =20 |}];
64 |   test "This text is fairly long and should be wrapped by a conforming implementation.";
65 |   [%expect
66 |     {|
67 |     This text is fairly long and should be wrapped by a conforming =
68 |     implementation.
69 |     |}];
70 |   test
71 |     "123456789A123456789B123456789C123456789D123456789E123456789F123456789G123456789H123456789I";
72 |   [%expect
73 |     {|
74 |     123456789A123456789B123456789C123456789D123456789E123456789F123456789G12345=
75 |     6789H123456789I
76 |     |}];
77 |   test
78 |     "123456789A123456789B123456789C123456789D123456789E123456789F123456789G=23456789H123456789I";
79 |   [%expect
80 |     {|
81 |     123456789A123456789B123456789C123456789D123456789E123456789F123456789G=3D23=
82 |     456789H123456789I
83 |     |}];
84 |   return ()
85 | ;;
86 | 


--------------------------------------------------------------------------------
/test/test_octet_stream.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/test_related_parts.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open Async
 3 | open Email_message
 4 | 
 5 | let test_email =
 6 |   (* This is the test email from https://tools.ietf.org/html/rfc2392 except I had to
 7 |      remove the "type=Text/HTML" from the top-most "Content-Type" header. *)
 8 |   String.strip
 9 |     {|
10 | From: foo1@bar.net
11 | To: foo2@bar.net
12 | Subject: A simple example
13 | Mime-Version: 1.0
14 | Content-Type: multipart/related; boundary="boundary-example-1"
15 | --boundary-example 1
16 | Content-Type: Text/HTML; charset=US-ASCII
17 | 
18 | to the other body part, for example through a statement such as:
19 | <IMG SRC="cid:foo4*foo1@bar.net" ALT="IETF logo">
20 | 
21 | --boundary-example-1
22 | Content-ID: <foo4*foo1@bar.net>
23 | Content-Type: IMAGE/GIF
24 | Content-Transfer-Encoding: BASE64
25 | 
26 | R0lGODlhGAGgAPEAAP/////ZRaCgoAAAACH+PUNvcHlyaWdodCAoQykgMTk5
27 | NSBJRVRGLiBVbmF1dGhvcml6ZWQgZHVwbGljYXRpb24gcHJvaGliaXRlZC4A
28 | etc...
29 | 
30 | --boundary-example-1--
31 | |}
32 | ;;
33 | 
34 | let%expect_test _ =
35 |   let email = Email.of_string test_email in
36 |   Email.Simple.all_related_parts email
37 |   |> [%sexp_of: (string * Email.Simple.Content.t) list]
38 |   |> print_s;
39 |   [%expect
40 |     {|
41 |     ((foo4*foo1@bar.net
42 |       ((headers
43 |         ((Content-ID " <foo4*foo1@bar.net>") (Content-Type " IMAGE/GIF")
44 |          (Content-Transfer-Encoding " BASE64")))
45 |        (raw_content
46 |         ( "R0lGODlhGAGgAPEAAP/////ZRaCgoAAAACH+PUNvcHlyaWdodCAoQykgMTk5\
47 |          \nNSBJRVRGLiBVbmF1dGhvcml6ZWQgZHVwbGljYXRpb24gcHJvaGliaXRlZC4A\
48 |          \netc...\
49 |          \n")))))
50 |     |}];
51 |   return ()
52 | ;;
53 | 


--------------------------------------------------------------------------------
/test/test_related_parts.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/test_rfc.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open Async
 3 | open Email_message.Private
 4 | open Expect_test_helpers_core
 5 | open Rfc.RFC2045.Token
 6 | 
 7 | let%expect_test "RFC2045.Token" =
 8 |   let is_valid_or_quote strs =
 9 |     let results = List.map strs ~f:(fun str -> str, is_valid_or_quote str) in
10 |     print_s [%message "" ~_:(results : (string * string) list)]
11 |   in
12 |   is_valid_or_quote [ "abcdefghijkl"; "abc=dka"; ""; "\"" ];
13 |   [%expect
14 |     {|
15 |     ((abcdefghijkl abcdefghijkl)
16 |      (abc=dka      "\"abc=dka\"")
17 |      (""           "\"\"")
18 |      ("\""         "\"\\\"\""))
19 |     |}];
20 |   let is_valid strs =
21 |     let results = List.map strs ~f:(fun str -> str, `Is_valid (is_valid str)) in
22 |     print_s [%message "" ~_:(results : (string * [ `Is_valid of bool ]) list)]
23 |   in
24 |   is_valid
25 |     [ "abcdefghijkl"
26 |     ; "LoremIpsum"
27 |     ; "3.141"
28 |     ; "---"
29 |     ; "a"
30 |     ; "abc=dka"
31 |     ; ""
32 |     ; "\""
33 |     ; "@"
34 |     ; ","
35 |     ; ":"
36 |     ; ";"
37 |     ; "\x00"
38 |     ; "\x7F"
39 |     ; "\x80"
40 |     ];
41 |   [%expect
42 |     {|
43 |     ((abcdefghijkl (Is_valid true))
44 |      (LoremIpsum   (Is_valid true))
45 |      (3.141        (Is_valid true))
46 |      (---          (Is_valid true))
47 |      (a            (Is_valid true))
48 |      (abc=dka      (Is_valid false))
49 |      (""           (Is_valid false))
50 |      ("\""         (Is_valid false))
51 |      (@            (Is_valid false))
52 |      (,            (Is_valid false))
53 |      (:            (Is_valid false))
54 |      (";"          (Is_valid false))
55 |      ("\000"       (Is_valid false))
56 |      ("\127"       (Is_valid false))
57 |      ("\128"       (Is_valid false)))
58 |     |}];
59 |   return ()
60 | ;;
61 | 


--------------------------------------------------------------------------------
/test/test_rfc.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------