├── .gitattributes
├── .github
    └── workflows
    │   ├── lint.yml
    │   └── test.yml
├── .gitignore
├── Cargo.toml
├── LICENSE
├── README.md
├── examples
    ├── dump_mail.rs
    └── owned.rs
├── src
    ├── addrparse.rs
    ├── body.rs
    ├── dateparse.rs
    ├── header.rs
    ├── headers.rs
    ├── lib.rs
    └── msgidparse.rs
└── tests
    └── files
        ├── test_email_01.txt
        └── test_email_01_sample.pdf


/.gitattributes:
--------------------------------------------------------------------------------
1 | tests/files/test_email_01_sample.pdf -text -diff
2 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: Lints
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   formatting:
11 |     name: Check formatting
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Get latest stable Rust
15 |         run: rustup toolchain install stable --profile default
16 |       - name: Check out source
17 |         uses: actions/checkout@v2
18 |       - name: Check formatting
19 |         run: cargo fmt -v -- --check
20 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 | 
 9 | 
10 | jobs:
11 |   stable:
12 |     name: Run tests on stable rust
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - name: Get latest stable Rust
16 |         run: rustup toolchain install stable --profile minimal
17 |       - name: Check out source
18 |         uses: actions/checkout@v2
19 |       - name: Run tests
20 |         run: cargo test --all
21 |   beta:
22 |     name: Run tests on beta rust
23 |     runs-on: ubuntu-latest
24 |     steps:
25 |       - name: Get latest beta Rust
26 |         run: rustup toolchain install beta --profile minimal
27 |       - name: Check out source
28 |         uses: actions/checkout@v2
29 |       - name: Run tests
30 |         run: cargo test --all
31 |   nightly:
32 |     name: Run tests on nightly rust
33 |     runs-on: ubuntu-latest
34 |     steps:
35 |       - name: Get latest nightly Rust
36 |         run: rustup toolchain install nightly --profile minimal
37 |       - name: Check out source
38 |         uses: actions/checkout@v2
39 |       - name: Run tests
40 |         run: cargo test --all
41 |   msrv:
42 |     name: Run tests on mininum supported rust version
43 |     runs-on: ubuntu-latest
44 |     steps:
45 |       - name: Get minimum supported Rust
46 |         run: rustup toolchain install 1.51.0 --profile minimal
47 |       - name: Check out source
48 |         uses: actions/checkout@v2
49 |       - name: Run tests
50 |         run: cargo test --all
51 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | Cargo.lock
3 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "mailparse"
 3 | version = "0.16.1"
 4 | authors = ["Kartikaya Gupta"]
 5 | edition = "2018"
 6 | license = "0BSD"
 7 | 
 8 | description = "A simple parser for MIME e-mail messages"
 9 | homepage = "https://github.com/staktrace/mailparse/blob/master/README.md"
10 | repository = "https://github.com/staktrace/mailparse"
11 | readme = "README.md"
12 | keywords = ["parser", "email", "rfc822", "mime", "maildir"]
13 | categories = ["email", "parsing"]
14 | exclude = [".gitattributes", ".gitignore", ".github/**", "examples/**"]
15 | 
16 | [badges]
17 | maintenance = { status = "passively-maintained" }
18 | 
19 | [dependencies]
20 | data-encoding = "2.6.0"
21 | quoted_printable = "0.5.0"
22 | charset = "0.1.3"
23 | 
24 | [dev-dependencies]
25 | ouroboros = "0.17.0"
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2019 by Kartikaya Gupta
 2 | 
 3 | Permission to use, copy, modify, and/or distribute this software for any
 4 | purpose with or without fee is hereby granted.
 5 | 
 6 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
 7 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 8 | AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
 9 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
10 | LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
11 | OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
12 | PERFORMANCE OF THIS SOFTWARE.
13 | 
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | mailparse
 2 | ===
 3 | ![Build Status](https://github.com/staktrace/mailparse/actions/workflows/test.yml/badge.svg)
 4 | [![Crate](https://img.shields.io/crates/v/mailparse.svg)](https://crates.io/crates/mailparse)
 5 | 
 6 | A simple parser for MIME email messages.
 7 | 
 8 | API
 9 | ---
10 | The primary entry point for this library is the following function:
11 | 
12 | ```rust
13 |     parse_mail(&[u8]) -> Result<ParsedMail, MailParseError>
14 | ```
15 | 
16 | This function takes the raw message data, including headers and body, and returns a structured object to more easily access pieces of the email message.
17 | There are other public functions that allow parsing smaller parts of the message as well; refer to the [full documentation](https://docs.rs/mailparse/).
18 | 
19 | The library is designed to process real-world email data such as might be obtained by using the FETCH command on an IMAP server, or in a Maildir.
20 | As such, this library should successfully handle any valid MIME-formatted message, although it may not follow all the strict requirements in the various specifications that cover the format (predominantly IETF RFCs 822, 2045, 2047, 2822, and 5322).
21 | As an example, this library accepts raw message data which uses \n (ASCII LF) as line delimiters rather than the RFC-mandated \r\n (ASCII CRLF) line delimiters.
22 | 
23 | Example usage
24 | ---
25 | 
26 | ```rust
27 |     use mailparse::*;
28 |     let parsed = parse_mail(concat!(
29 |             "Subject: This is a test email\n",
30 |             "Content-Type: multipart/alternative; boundary=foobar\n",
31 |             "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\n",
32 |             "\n",
33 |             "--foobar\n",
34 |             "Content-Type: text/plain; charset=utf-8\n",
35 |             "Content-Transfer-Encoding: quoted-printable\n",
36 |             "\n",
37 |             "This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\n",
38 |             "--foobar\n",
39 |             "Content-Type: text/html\n",
40 |             "Content-Transfer-Encoding: base64\n",
41 |             "\n",
42 |             "PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \n",
43 |             "dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \n",
44 |             "--foobar--\n",
45 |             "After the final boundary stuff gets ignored.\n").as_bytes())
46 |         .unwrap();
47 |     assert_eq!(parsed.headers.get_first_value("Subject"),
48 |         Some("This is a test email".to_string()));
49 |     assert_eq!(parsed.subparts.len(), 2);
50 |     assert_eq!(parsed.subparts[0].get_body().unwrap(),
51 |         "This is the plaintext version, in utf-8. Proof by Euro: \u{20AC}");
52 |     assert_eq!(parsed.subparts[1].headers[1].get_value(), "base64");
53 |     assert_eq!(parsed.subparts[1].ctype.mimetype, "text/html");
54 |     assert!(parsed.subparts[1].get_body().unwrap().starts_with("<html>"));
55 |     assert_eq!(dateparse(parsed.headers.get_first_value("Date").unwrap().as_str()).unwrap(), 1475417182);
56 | ```
57 | 
58 | Documentation
59 | ---
60 | See the rustdoc at [docs.rs](https://docs.rs/mailparse/).
61 | 
62 | MSRV policy
63 | ---
64 | Currently the minimum supported Rust version (MSRV) is 1.51.0.
65 | MSRV increases will be kept to a minimum, and will always be accompanied with a minor version bump.
66 | 
67 | Support mailparse
68 | ---
69 | If you want to support development of `mailparse`, please do so by donating your money, time, and/or energy to fighting climate change.
70 | A quick and easy way is to send a donation to [Replant.ca Environmental](http://www.replant-environmental.ca/donate.html), where every dollar gets a tree planted!
71 | 


--------------------------------------------------------------------------------
/examples/dump_mail.rs:
--------------------------------------------------------------------------------
 1 | extern crate mailparse;
 2 | 
 3 | use std::env;
 4 | use std::fs::File;
 5 | use std::io::prelude::*;
 6 | 
 7 | use mailparse::MailHeaderMap;
 8 | 
 9 | fn dump(pfx: &str, pm: &mailparse::ParsedMail) {
10 |     println!(">> Headers from {} <<", pfx);
11 |     for h in &pm.headers {
12 |         println!("  [{}] => [{}]", h.get_key(), h.get_value());
13 |     }
14 |     println!(">> Addresses from {} <<", pfx);
15 |     pm.headers
16 |         .get_first_value("From")
17 |         .map(|a| println!("{:?}", mailparse::addrparse(&a).unwrap()));
18 |     pm.headers
19 |         .get_first_value("To")
20 |         .map(|a| println!("{:?}", mailparse::addrparse(&a).unwrap()));
21 |     pm.headers
22 |         .get_first_value("Cc")
23 |         .map(|a| println!("{:?}", mailparse::addrparse(&a).unwrap()));
24 |     pm.headers
25 |         .get_first_value("Bcc")
26 |         .map(|a| println!("{:?}", mailparse::addrparse(&a).unwrap()));
27 |     println!(">> Body from {} <<", pfx);
28 |     if pm.ctype.mimetype.starts_with("text/") {
29 |         println!("  [{}]", pm.get_body().unwrap());
30 |     } else {
31 |         println!(
32 |             "   (Body is binary type {}, {} bytes in length)",
33 |             pm.ctype.mimetype,
34 |             pm.get_body().unwrap().len()
35 |         );
36 |     }
37 |     let mut c = 1;
38 |     for s in &pm.subparts {
39 |         println!(">> Subpart {} <<", c);
40 |         dump("subpart", s);
41 |         c += 1;
42 |     }
43 | }
44 | 
45 | // Provide mail files as arguments
46 | fn main() {
47 |     let mut args = env::args();
48 |     args.next();
49 |     loop {
50 |         match args.next() {
51 |             None => break,
52 |             Some(a) => {
53 |                 let mut f = File::open(&a).unwrap();
54 |                 let mut d = Vec::<u8>::new();
55 |                 f.read_to_end(&mut d).unwrap();
56 |                 let mail = mailparse::parse_mail(&d).unwrap();
57 |                 dump(&a, &mail);
58 |             }
59 |         }
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/examples/owned.rs:
--------------------------------------------------------------------------------
 1 | extern crate mailparse;
 2 | extern crate ouroboros;
 3 | 
 4 | use mailparse::{parse_mail, ParsedMail};
 5 | use ouroboros::self_referencing;
 6 | 
 7 | #[self_referencing]
 8 | struct OwnedMail {
 9 |     raw_bytes: Vec<u8>,
10 |     #[borrows(raw_bytes)]
11 |     #[covariant]
12 |     parsed: ParsedMail<'this>,
13 | }
14 | 
15 | fn make_owned_mail(mail_bytes: Vec<u8>) -> OwnedMail {
16 |     OwnedMailBuilder {
17 |         raw_bytes: mail_bytes,
18 |         parsed_builder: |b: &Vec<u8>| parse_mail(b).unwrap(),
19 |     }
20 |     .build()
21 | }
22 | 
23 | fn main() {
24 |     let owned = make_owned_mail(b"Key: value\r\n\r\nSome body stuffs".to_vec());
25 |     println!(
26 |         "Mail body is: {}",
27 |         owned.borrow_parsed().get_body().unwrap()
28 |     );
29 | }
30 | 


--------------------------------------------------------------------------------
/src/addrparse.rs:
--------------------------------------------------------------------------------
   1 | use std::fmt;
   2 | 
   3 | use crate::header::HeaderToken;
   4 | use crate::{MailHeader, MailParseError};
   5 | 
   6 | /// A representation of a single mailbox. Each mailbox has
   7 | /// a routing address `addr` and an optional display name.
   8 | #[derive(Clone, Debug, PartialEq, Eq, Hash)]
   9 | pub struct SingleInfo {
  10 |     pub display_name: Option<String>,
  11 |     pub addr: String,
  12 | }
  13 | 
  14 | impl SingleInfo {
  15 |     fn new(name: Option<String>, addr: String) -> Result<Self, MailParseError> {
  16 |         if addr.contains('@') {
  17 |             Ok(SingleInfo {
  18 |                 display_name: name,
  19 |                 addr,
  20 |             })
  21 |         } else {
  22 |             Err(MailParseError::Generic(
  23 |                 "Invalid address found: must contain a '@' symbol",
  24 |             ))
  25 |         }
  26 |     }
  27 | }
  28 | 
  29 | impl fmt::Display for SingleInfo {
  30 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  31 |         if let Some(name) = &self.display_name {
  32 |             write!(f, r#""{}" <{}>"#, name.replace('"', r#"\""#), self.addr)
  33 |         } else {
  34 |             write!(f, "{}", self.addr)
  35 |         }
  36 |     }
  37 | }
  38 | 
  39 | /// A representation of a group address. It has a name and
  40 | /// a list of mailboxes.
  41 | #[derive(Clone, Debug, PartialEq, Eq, Hash)]
  42 | pub struct GroupInfo {
  43 |     pub group_name: String,
  44 |     pub addrs: Vec<SingleInfo>,
  45 | }
  46 | 
  47 | impl GroupInfo {
  48 |     fn new(name: String, addrs: Vec<SingleInfo>) -> Self {
  49 |         GroupInfo {
  50 |             group_name: name,
  51 |             addrs,
  52 |         }
  53 |     }
  54 | }
  55 | 
  56 | impl fmt::Display for GroupInfo {
  57 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  58 |         write!(f, r#""{}":"#, self.group_name.replace('"', r#"\""#))?;
  59 |         for (i, addr) in self.addrs.iter().enumerate() {
  60 |             if i == 0 {
  61 |                 write!(f, " ")?;
  62 |             } else {
  63 |                 write!(f, ", ")?;
  64 |             }
  65 |             addr.fmt(f)?;
  66 |         }
  67 |         write!(f, ";")
  68 |     }
  69 | }
  70 | 
  71 | /// An abstraction over the two different kinds of top-level addresses allowed
  72 | /// in email headers. Group addresses have a name and a list of mailboxes. Single
  73 | /// addresses are just a mailbox. Each mailbox consists of what you would consider
  74 | /// an email address (e.g. foo@bar.com) and optionally a display name ("Foo Bar").
  75 | /// Groups are represented in email headers with colons and semicolons, e.g.
  76 | ///    To: my-peeps: foo@peeps.org, bar@peeps.org;
  77 | #[derive(Clone, Debug, PartialEq, Eq, Hash)]
  78 | pub enum MailAddr {
  79 |     Group(GroupInfo),
  80 |     Single(SingleInfo),
  81 | }
  82 | 
  83 | #[derive(Debug)]
  84 | enum AddrParseState {
  85 |     Initial,
  86 |     QuotedName,
  87 |     EscapedChar,
  88 |     AfterQuotedName,
  89 |     BracketedAddr,
  90 |     AfterBracketedAddr,
  91 |     Unquoted,
  92 |     NameWithEncodedWord,
  93 |     Comment,
  94 | }
  95 | 
  96 | /// A simple wrapper around `Vec<MailAddr>`. This is primarily here so we can
  97 | /// implement the Display trait on it, and allow user code to easily convert
  98 | /// the return value from `addrparse` back into a string. However there are some
  99 | /// additional utility functions on this wrapper as well.
 100 | #[derive(Clone, Debug, PartialEq, Eq, Hash)]
 101 | pub struct MailAddrList(Vec<MailAddr>);
 102 | 
 103 | impl std::ops::Deref for MailAddrList {
 104 |     type Target = Vec<MailAddr>;
 105 | 
 106 |     fn deref(&self) -> &Vec<MailAddr> {
 107 |         &self.0
 108 |     }
 109 | }
 110 | 
 111 | impl std::ops::DerefMut for MailAddrList {
 112 |     fn deref_mut(&mut self) -> &mut Vec<MailAddr> {
 113 |         &mut self.0
 114 |     }
 115 | }
 116 | 
 117 | impl fmt::Display for MailAddrList {
 118 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 119 |         let mut last_was_group = false;
 120 |         for (i, addr) in self.iter().enumerate() {
 121 |             if i > 0 {
 122 |                 if last_was_group {
 123 |                     write!(f, " ")?;
 124 |                 } else {
 125 |                     write!(f, ", ")?;
 126 |                 }
 127 |             }
 128 |             match addr {
 129 |                 MailAddr::Group(g) => {
 130 |                     g.fmt(f)?;
 131 |                     last_was_group = true;
 132 |                 }
 133 |                 MailAddr::Single(s) => {
 134 |                     s.fmt(f)?;
 135 |                     last_was_group = false;
 136 |                 }
 137 |             }
 138 |         }
 139 |         Ok(())
 140 |     }
 141 | }
 142 | 
 143 | impl From<Vec<MailAddr>> for MailAddrList {
 144 |     fn from(addrs: Vec<MailAddr>) -> Self {
 145 |         MailAddrList(addrs)
 146 |     }
 147 | }
 148 | 
 149 | impl MailAddrList {
 150 |     /// Count the number of `SingleInfo` instances in this list of addresses.
 151 |     pub fn count_addrs(&self) -> usize {
 152 |         self.iter().fold(0, |acc, elem| match elem {
 153 |             MailAddr::Single(_) => acc + 1,
 154 |             MailAddr::Group(g) => acc + g.addrs.len(),
 155 |         })
 156 |     }
 157 | 
 158 |     /// Convenience function to check if this list of addresses contains exactly
 159 |     /// one `SingleInfo`, and if it does, to return it. If there is not exactly
 160 |     /// one `SingleInfo`, this function returns None.
 161 |     pub fn extract_single_info(self) -> Option<SingleInfo> {
 162 |         if self.len() == 1 {
 163 |             match &self[0] {
 164 |                 MailAddr::Group(_) => None,
 165 |                 MailAddr::Single(s) => Some(s.clone()),
 166 |             }
 167 |         } else {
 168 |             None
 169 |         }
 170 |     }
 171 | 
 172 |     /// Consumes the `MailAddrList`, returning the wrapped value.
 173 |     pub fn into_inner(self) -> Vec<MailAddr> {
 174 |         self.0
 175 |     }
 176 | }
 177 | 
 178 | enum HeaderTokenItem<'a> {
 179 |     Char(char),
 180 |     Whitespace(&'a str),
 181 |     Newline(String),
 182 |     DecodedWord(String),
 183 | }
 184 | 
 185 | struct HeaderTokenWalker<'a> {
 186 |     tokens: Vec<HeaderToken<'a>>,
 187 |     cur_token: usize,
 188 |     cur_char_offset: usize,
 189 | }
 190 | 
 191 | impl<'a> Iterator for HeaderTokenWalker<'a> {
 192 |     type Item = HeaderTokenItem<'a>;
 193 | 
 194 |     fn next(&mut self) -> Option<Self::Item> {
 195 |         loop {
 196 |             if self.cur_token >= self.tokens.len() {
 197 |                 return None;
 198 |             }
 199 |             match &self.tokens[self.cur_token] {
 200 |                 HeaderToken::Text(s) => {
 201 |                     let s = &s[self.cur_char_offset..];
 202 |                     let mut chars = s.char_indices();
 203 |                     let c = chars.next();
 204 |                     if let Some((_, c)) = c {
 205 |                         self.cur_char_offset += chars.next().map(|(o, _)| o).unwrap_or(s.len());
 206 |                         return Some(HeaderTokenItem::Char(c));
 207 |                     } else {
 208 |                         self.cur_char_offset = 0;
 209 |                         self.cur_token += 1;
 210 |                         continue;
 211 |                     }
 212 |                 }
 213 |                 HeaderToken::Whitespace(ws) => {
 214 |                     self.cur_token += 1;
 215 |                     return Some(HeaderTokenItem::Whitespace(ws));
 216 |                 }
 217 |                 HeaderToken::Newline(Some(ws)) => {
 218 |                     self.cur_token += 1;
 219 |                     return Some(HeaderTokenItem::Newline(String::from(ws)));
 220 |                 }
 221 |                 HeaderToken::Newline(None) => {
 222 |                     panic!("Should never reach here");
 223 |                 }
 224 |                 HeaderToken::DecodedWord(word) => {
 225 |                     self.cur_token += 1;
 226 |                     return Some(HeaderTokenItem::DecodedWord(String::from(word)));
 227 |                 }
 228 |             }
 229 |         }
 230 |     }
 231 | }
 232 | 
 233 | impl<'a> HeaderTokenWalker<'a> {
 234 |     fn new(tokens: Vec<HeaderToken<'a>>) -> Self {
 235 |         Self {
 236 |             tokens,
 237 |             cur_token: 0,
 238 |             cur_char_offset: 0,
 239 |         }
 240 |     }
 241 | }
 242 | 
 243 | /// Convert an address field from an email header into a structured type.
 244 | /// This function handles the most common formatting of to/from/cc/bcc fields
 245 | /// found in email headers. Note that if you are attempting to parse the
 246 | /// value of a `MailHeader`, it is better (both for correctness and performance
 247 | /// to use the `addrparse_header` function instead of this one. Correctness
 248 | /// is impacted because of the way encoded words within the header are
 249 | /// processed; using `MailHeader::get_value()` will decode encoded words,
 250 | /// which may then contain characters like commas that affect how `addrparse`
 251 | /// parses the value. This can produce incorrect results in some cases; using
 252 | /// `addrparse_header` will avoid this problem.
 253 | ///
 254 | /// # Examples
 255 | /// ```
 256 | ///     use mailparse::{addrparse, MailAddr, SingleInfo};
 257 | ///     match &addrparse("John Doe <john@doe.com>").unwrap()[0] {
 258 | ///         MailAddr::Single(info) => {
 259 | ///             assert_eq!(info.display_name, Some("John Doe".to_string()));
 260 | ///             assert_eq!(info.addr, "john@doe.com".to_string());
 261 | ///         }
 262 | ///         _ => panic!()
 263 | ///     };
 264 | /// ```
 265 | pub fn addrparse(addrs: &str) -> Result<MailAddrList, MailParseError> {
 266 |     let v = vec![HeaderToken::Text(addrs)];
 267 |     let mut w = HeaderTokenWalker::new(v);
 268 |     addrparse_inner(&mut w, false)
 269 | }
 270 | 
 271 | /// Take a `MailHeader` that contains addresses in the value (e.g. from/to/cc/bcc)
 272 | /// and produce a structured type representing those addresses.
 273 | ///
 274 | /// # Examples
 275 | /// ```
 276 | ///     use mailparse::{addrparse_header, parse_mail, MailAddr, MailHeaderMap, SingleInfo};
 277 | ///     let mail = parse_mail(b"From: John Doe <john@doe.com>\n\nBlah Blah").unwrap();
 278 | ///     match &addrparse_header(mail.headers.get_first_header("From").unwrap()).unwrap()[0] {
 279 | ///         MailAddr::Single(info) => {
 280 | ///             assert_eq!(info.display_name, Some("John Doe".to_string()));
 281 | ///             assert_eq!(info.addr, "john@doe.com".to_string());
 282 | ///         }
 283 | ///         _ => panic!()
 284 | ///     };
 285 | /// ```
 286 | pub fn addrparse_header(header: &MailHeader) -> Result<MailAddrList, MailParseError> {
 287 |     let chars = header.decode_utf8_or_latin1();
 288 |     let v = crate::header::normalized_tokens(&chars);
 289 |     let mut w = HeaderTokenWalker::new(v);
 290 |     addrparse_inner(&mut w, false)
 291 | }
 292 | 
 293 | fn addrparse_inner(
 294 |     it: &mut HeaderTokenWalker,
 295 |     in_group: bool,
 296 | ) -> Result<MailAddrList, MailParseError> {
 297 |     let mut result = vec![];
 298 |     let mut state = AddrParseState::Initial;
 299 | 
 300 |     let mut hti = match it.next() {
 301 |         None => return Ok(MailAddrList(vec![])),
 302 |         Some(v) => v,
 303 |     };
 304 | 
 305 |     let mut name = None;
 306 |     let mut addr = None;
 307 |     let mut post_quote_ws = None;
 308 |     let mut comment_return = None;
 309 | 
 310 |     loop {
 311 |         match state {
 312 |             AddrParseState::Initial => {
 313 |                 match hti {
 314 |                     HeaderTokenItem::Char(c) => {
 315 |                         if c.is_whitespace() {
 316 |                             // continue in same state
 317 |                         } else if c == '"' {
 318 |                             state = AddrParseState::QuotedName;
 319 |                             name = Some(String::new());
 320 |                         } else if c == '<' {
 321 |                             state = AddrParseState::BracketedAddr;
 322 |                             addr = Some(String::new());
 323 |                         } else if c == ';' {
 324 |                             if !in_group {
 325 |                                 return Err(MailParseError::Generic(
 326 |                                     "Unexpected group terminator found in initial list",
 327 |                                 ));
 328 |                             }
 329 |                             return Ok(MailAddrList(result));
 330 |                         } else {
 331 |                             state = AddrParseState::Unquoted;
 332 |                             addr = Some(String::new());
 333 |                             addr.as_mut().unwrap().push(c);
 334 |                         }
 335 |                     }
 336 |                     HeaderTokenItem::Whitespace(_) => {
 337 |                         // continue in same state
 338 |                     }
 339 |                     HeaderTokenItem::Newline(_) => {
 340 |                         // continue in same state
 341 |                     }
 342 |                     HeaderTokenItem::DecodedWord(word) => {
 343 |                         state = AddrParseState::NameWithEncodedWord;
 344 |                         addr = Some(String::new());
 345 |                         addr.as_mut().unwrap().push_str(&word);
 346 |                     }
 347 |                 }
 348 |             }
 349 |             AddrParseState::QuotedName => match hti {
 350 |                 HeaderTokenItem::Char(c) => {
 351 |                     if c == '\\' {
 352 |                         state = AddrParseState::EscapedChar;
 353 |                     } else if c == '"' {
 354 |                         state = AddrParseState::AfterQuotedName;
 355 |                     } else {
 356 |                         name.as_mut().unwrap().push(c);
 357 |                     }
 358 |                 }
 359 |                 HeaderTokenItem::Whitespace(ws) => {
 360 |                     name.as_mut().unwrap().push_str(ws);
 361 |                 }
 362 |                 HeaderTokenItem::Newline(ws) => {
 363 |                     name.as_mut().unwrap().push_str(&ws);
 364 |                 }
 365 |                 HeaderTokenItem::DecodedWord(word) => {
 366 |                     name.as_mut().unwrap().push_str(&word);
 367 |                 }
 368 |             },
 369 |             AddrParseState::EscapedChar => match hti {
 370 |                 HeaderTokenItem::Char(c) => {
 371 |                     state = AddrParseState::QuotedName;
 372 |                     name.as_mut().unwrap().push(c);
 373 |                 }
 374 |                 HeaderTokenItem::Whitespace(ws) => {
 375 |                     state = AddrParseState::QuotedName;
 376 |                     name.as_mut().unwrap().push_str(ws);
 377 |                 }
 378 |                 HeaderTokenItem::Newline(ws) => {
 379 |                     state = AddrParseState::QuotedName;
 380 |                     name.as_mut().unwrap().push_str(&ws);
 381 |                 }
 382 |                 HeaderTokenItem::DecodedWord(_) => {
 383 |                     return Err(MailParseError::Generic(
 384 |                         "Unexpected encoded word found inside a quoted name",
 385 |                     ));
 386 |                 }
 387 |             },
 388 |             AddrParseState::AfterQuotedName => {
 389 |                 match hti {
 390 |                     HeaderTokenItem::Char(c) => {
 391 |                         if c.is_whitespace() {
 392 |                             if post_quote_ws.is_none() {
 393 |                                 post_quote_ws = Some(String::new());
 394 |                             }
 395 |                             post_quote_ws.as_mut().unwrap().push(c);
 396 |                         } else if c == '<' {
 397 |                             state = AddrParseState::BracketedAddr;
 398 |                             addr = Some(String::new());
 399 |                         } else if c == ':' {
 400 |                             if in_group {
 401 |                                 return Err(MailParseError::Generic(
 402 |                                     "Found unexpected nested group",
 403 |                                 ));
 404 |                             }
 405 |                             let group_addrs = addrparse_inner(it, true)?;
 406 |                             state = AddrParseState::Initial;
 407 |                             result.push(MailAddr::Group(GroupInfo::new(
 408 |                                 name.unwrap(),
 409 |                                 group_addrs
 410 |                                     .0
 411 |                                     .into_iter()
 412 |                                     .map(|addr| match addr {
 413 |                                         MailAddr::Single(s) => s,
 414 |                                         MailAddr::Group(_) => {
 415 |                                             panic!("Unexpected nested group encountered")
 416 |                                         }
 417 |                                     })
 418 |                                     .collect(),
 419 |                             )));
 420 |                             name = None;
 421 |                         } else {
 422 |                             // I think technically not valid, but this occurs in real-world corpus, so
 423 |                             // handle gracefully
 424 |                             if c == '"' {
 425 |                                 if let Some(ws) = post_quote_ws {
 426 |                                     name.as_mut().unwrap().push_str(&ws)
 427 |                                 }
 428 |                                 state = AddrParseState::QuotedName;
 429 |                             } else {
 430 |                                 if let Some(ws) = post_quote_ws {
 431 |                                     name.as_mut().unwrap().push_str(&ws)
 432 |                                 }
 433 |                                 name.as_mut().unwrap().push(c);
 434 |                             }
 435 |                             post_quote_ws = None;
 436 |                         }
 437 |                     }
 438 |                     HeaderTokenItem::Whitespace(ws) => {
 439 |                         if post_quote_ws.is_none() {
 440 |                             post_quote_ws = Some(String::new());
 441 |                         }
 442 |                         post_quote_ws.as_mut().unwrap().push_str(ws);
 443 |                     }
 444 |                     HeaderTokenItem::Newline(ws) => {
 445 |                         if post_quote_ws.is_none() {
 446 |                             post_quote_ws = Some(String::new());
 447 |                         }
 448 |                         post_quote_ws.as_mut().unwrap().push_str(&ws);
 449 |                     }
 450 |                     HeaderTokenItem::DecodedWord(word) => {
 451 |                         if let Some(ws) = post_quote_ws {
 452 |                             name.as_mut().unwrap().push_str(&ws)
 453 |                         }
 454 |                         name.as_mut().unwrap().push_str(&word);
 455 |                         post_quote_ws = None;
 456 |                     }
 457 |                 }
 458 |             }
 459 |             AddrParseState::BracketedAddr => match hti {
 460 |                 HeaderTokenItem::Char(c) => {
 461 |                     if c == '>' {
 462 |                         state = AddrParseState::AfterBracketedAddr;
 463 |                         result.push(MailAddr::Single(SingleInfo::new(name, addr.unwrap())?));
 464 |                         name = None;
 465 |                         addr = None;
 466 |                     } else {
 467 |                         addr.as_mut().unwrap().push(c);
 468 |                     }
 469 |                 }
 470 |                 HeaderTokenItem::Whitespace(ws) => {
 471 |                     addr.as_mut().unwrap().push_str(ws);
 472 |                 }
 473 |                 HeaderTokenItem::Newline(ws) => {
 474 |                     addr.as_mut().unwrap().push_str(&ws);
 475 |                 }
 476 |                 HeaderTokenItem::DecodedWord(_) => {
 477 |                     return Err(MailParseError::Generic(
 478 |                         "Unexpected encoded word found inside bracketed address",
 479 |                     ));
 480 |                 }
 481 |             },
 482 |             AddrParseState::AfterBracketedAddr => {
 483 |                 match hti {
 484 |                     HeaderTokenItem::Char(c) => {
 485 |                         if c.is_whitespace() {
 486 |                             // continue in same state
 487 |                         } else if c == ',' {
 488 |                             state = AddrParseState::Initial;
 489 |                         } else if c == ';' {
 490 |                             if in_group {
 491 |                                 return Ok(MailAddrList(result));
 492 |                             }
 493 |                             // Technically not valid, but a similar case occurs in real-world corpus, so handle it gracefully
 494 |                             state = AddrParseState::Initial;
 495 |                         } else if c == '(' {
 496 |                             comment_return = Some(AddrParseState::AfterBracketedAddr);
 497 |                             state = AddrParseState::Comment;
 498 |                         } else {
 499 |                             return Err(MailParseError::Generic(
 500 |                                 "Unexpected char found after bracketed address",
 501 |                             ));
 502 |                         }
 503 |                     }
 504 |                     HeaderTokenItem::Whitespace(_) => {
 505 |                         // continue in same state
 506 |                     }
 507 |                     HeaderTokenItem::Newline(_) => {
 508 |                         // continue in same state
 509 |                     }
 510 |                     HeaderTokenItem::DecodedWord(_) => {
 511 |                         return Err(MailParseError::Generic(
 512 |                             "Unexpected encoded word found after bracketed address",
 513 |                         ));
 514 |                     }
 515 |                 }
 516 |             }
 517 |             AddrParseState::NameWithEncodedWord => match hti {
 518 |                 HeaderTokenItem::Char(c) => {
 519 |                     if c == '<' {
 520 |                         state = AddrParseState::BracketedAddr;
 521 |                         name = addr.map(|s| s.trim_end().to_owned());
 522 |                         addr = Some(String::new());
 523 |                     } else if c == ':' {
 524 |                         if in_group {
 525 |                             return Err(MailParseError::Generic("Found unexpected nested group"));
 526 |                         }
 527 |                         let group_addrs = addrparse_inner(it, true)?;
 528 |                         state = AddrParseState::Initial;
 529 |                         result.push(MailAddr::Group(GroupInfo::new(
 530 |                             addr.unwrap().trim_end().to_owned(),
 531 |                             group_addrs
 532 |                                 .0
 533 |                                 .into_iter()
 534 |                                 .map(|addr| match addr {
 535 |                                     MailAddr::Single(s) => s,
 536 |                                     MailAddr::Group(_) => {
 537 |                                         panic!("Unexpected nested group encountered")
 538 |                                     }
 539 |                                 })
 540 |                                 .collect(),
 541 |                         )));
 542 |                         addr = None;
 543 |                     } else {
 544 |                         addr.as_mut().unwrap().push(c);
 545 |                     }
 546 |                 }
 547 |                 HeaderTokenItem::Whitespace(ws) => {
 548 |                     addr.as_mut().unwrap().push_str(ws);
 549 |                 }
 550 |                 HeaderTokenItem::Newline(ws) => {
 551 |                     addr.as_mut().unwrap().push_str(&ws);
 552 |                 }
 553 |                 HeaderTokenItem::DecodedWord(word) => {
 554 |                     addr.as_mut().unwrap().push_str(&word);
 555 |                 }
 556 |             },
 557 |             AddrParseState::Unquoted => {
 558 |                 match hti {
 559 |                     HeaderTokenItem::Char(c) => {
 560 |                         if c == '<' {
 561 |                             state = AddrParseState::BracketedAddr;
 562 |                             name = addr.map(|s| s.trim_end().to_owned());
 563 |                             addr = Some(String::new());
 564 |                         } else if c == ',' {
 565 |                             state = AddrParseState::Initial;
 566 |                             result.push(MailAddr::Single(SingleInfo::new(
 567 |                                 None,
 568 |                                 addr.unwrap().trim_end().to_owned(),
 569 |                             )?));
 570 |                             addr = None;
 571 |                         } else if c == ';' {
 572 |                             result.push(MailAddr::Single(SingleInfo::new(
 573 |                                 None,
 574 |                                 addr.unwrap().trim_end().to_owned(),
 575 |                             )?));
 576 |                             if in_group {
 577 |                                 return Ok(MailAddrList(result));
 578 |                             }
 579 |                             // Technically not valid, but occurs in real-world corpus, so handle it gracefully
 580 |                             state = AddrParseState::Initial;
 581 |                             addr = None;
 582 |                         } else if c == ':' {
 583 |                             if in_group {
 584 |                                 return Err(MailParseError::Generic(
 585 |                                     "Found unexpected nested group",
 586 |                                 ));
 587 |                             }
 588 |                             let group_addrs = addrparse_inner(it, true)?;
 589 |                             state = AddrParseState::Initial;
 590 |                             result.push(MailAddr::Group(GroupInfo::new(
 591 |                                 addr.unwrap().trim_end().to_owned(),
 592 |                                 group_addrs
 593 |                                     .0
 594 |                                     .into_iter()
 595 |                                     .map(|addr| match addr {
 596 |                                         MailAddr::Single(s) => s,
 597 |                                         MailAddr::Group(_) => {
 598 |                                             panic!("Unexpected nested group encountered")
 599 |                                         }
 600 |                                     })
 601 |                                     .collect(),
 602 |                             )));
 603 |                             addr = None;
 604 |                         } else if c == '(' {
 605 |                             comment_return = Some(AddrParseState::Unquoted);
 606 |                             state = AddrParseState::Comment;
 607 |                         } else {
 608 |                             addr.as_mut().unwrap().push(c);
 609 |                         }
 610 |                     }
 611 |                     HeaderTokenItem::Whitespace(ws) => {
 612 |                         addr.as_mut().unwrap().push_str(ws);
 613 |                     }
 614 |                     HeaderTokenItem::Newline(ws) => {
 615 |                         addr.as_mut().unwrap().push_str(&ws);
 616 |                     }
 617 |                     HeaderTokenItem::DecodedWord(word) => {
 618 |                         state = AddrParseState::NameWithEncodedWord;
 619 |                         addr.as_mut().unwrap().push_str(&word);
 620 |                     }
 621 |                 }
 622 |             }
 623 |             AddrParseState::Comment => {
 624 |                 match hti {
 625 |                     HeaderTokenItem::Char(c) => {
 626 |                         if c == ')' {
 627 |                             state = comment_return.take().unwrap();
 628 |                         }
 629 |                     }
 630 |                     HeaderTokenItem::Whitespace(_) => {
 631 |                         // ignore and stay in same state
 632 |                     }
 633 |                     HeaderTokenItem::Newline(_) => {
 634 |                         // ignore and stay in same state
 635 |                     }
 636 |                     HeaderTokenItem::DecodedWord(_) => {
 637 |                         // ignore and stay in same state
 638 |                     }
 639 |                 }
 640 |             }
 641 |         }
 642 | 
 643 |         hti = match it.next() {
 644 |             None => break,
 645 |             Some(v) => v,
 646 |         };
 647 |     }
 648 | 
 649 |     if in_group {
 650 |         return Err(MailParseError::Generic("Found unterminated group address"));
 651 |     }
 652 | 
 653 |     match state {
 654 |         AddrParseState::QuotedName
 655 |         | AddrParseState::EscapedChar
 656 |         | AddrParseState::AfterQuotedName
 657 |         | AddrParseState::BracketedAddr
 658 |         | AddrParseState::Comment
 659 |         | AddrParseState::NameWithEncodedWord => Err(MailParseError::Generic(
 660 |             "Address string unexpectedly terminated",
 661 |         )),
 662 |         AddrParseState::Unquoted => {
 663 |             result.push(MailAddr::Single(SingleInfo::new(
 664 |                 None,
 665 |                 addr.unwrap().trim_end().to_owned(),
 666 |             )?));
 667 |             Ok(MailAddrList(result))
 668 |         }
 669 |         _ => Ok(MailAddrList(result)),
 670 |     }
 671 | }
 672 | 
 673 | #[cfg(test)]
 674 | mod tests {
 675 |     use super::*;
 676 | 
 677 |     #[test]
 678 |     fn parse_basic() {
 679 |         assert_eq!(
 680 |             addrparse("foo bar <foo@bar.com>").unwrap(),
 681 |             MailAddrList(vec![MailAddr::Single(
 682 |                 SingleInfo::new(Some("foo bar".to_string()), "foo@bar.com".to_string()).unwrap()
 683 |             )])
 684 |         );
 685 |         assert_eq!(
 686 |             addrparse("\"foo bar\" <foo@bar.com>").unwrap(),
 687 |             MailAddrList(vec![MailAddr::Single(
 688 |                 SingleInfo::new(Some("foo bar".to_string()), "foo@bar.com".to_string()).unwrap()
 689 |             )])
 690 |         );
 691 |         assert_eq!(
 692 |             addrparse("foo@bar.com ").unwrap(),
 693 |             MailAddrList(vec![MailAddr::Single(
 694 |                 SingleInfo::new(None, "foo@bar.com".to_string()).unwrap()
 695 |             )])
 696 |         );
 697 |         assert_eq!(
 698 |             addrparse("foo <bar@baz.com>").unwrap(),
 699 |             MailAddrList(vec![MailAddr::Single(
 700 |                 SingleInfo::new(Some("foo".to_string()), "bar@baz.com".to_string()).unwrap()
 701 |             )])
 702 |         );
 703 |         assert_eq!(
 704 |             addrparse("\"foo\" <bar@baz.com>").unwrap(),
 705 |             MailAddrList(vec![MailAddr::Single(
 706 |                 SingleInfo::new(Some("foo".to_string()), "bar@baz.com".to_string()).unwrap()
 707 |             )])
 708 |         );
 709 |         assert_eq!(
 710 |             addrparse("\"foo \" <bar@baz.com>").unwrap(),
 711 |             MailAddrList(vec![MailAddr::Single(
 712 |                 SingleInfo::new(Some("foo ".to_string()), "bar@baz.com".to_string()).unwrap()
 713 |             )])
 714 |         );
 715 |     }
 716 | 
 717 |     #[test]
 718 |     fn parse_backslashes() {
 719 |         assert_eq!(
 720 |             addrparse(r#" "First \"nick\" Last" <user@host.tld> "#).unwrap(),
 721 |             MailAddrList(vec![MailAddr::Single(
 722 |                 SingleInfo::new(
 723 |                     Some("First \"nick\" Last".to_string()),
 724 |                     "user@host.tld".to_string()
 725 |                 )
 726 |                 .unwrap()
 727 |             )])
 728 |         );
 729 |         assert_eq!(
 730 |             addrparse(r#" First \"nick\" Last <user@host.tld> "#).unwrap(),
 731 |             MailAddrList(vec![MailAddr::Single(
 732 |                 SingleInfo::new(
 733 |                     Some("First \\\"nick\\\" Last".to_string()),
 734 |                     "user@host.tld".to_string()
 735 |                 )
 736 |                 .unwrap()
 737 |             )])
 738 |         );
 739 |     }
 740 | 
 741 |     #[test]
 742 |     fn parse_multi() {
 743 |         assert_eq!(
 744 |             addrparse("foo <ba@r>, jo@e, baz <qu@ux>").unwrap(),
 745 |             MailAddrList(vec![
 746 |                 MailAddr::Single(
 747 |                     SingleInfo::new(Some("foo".to_string()), "ba@r".to_string()).unwrap()
 748 |                 ),
 749 |                 MailAddr::Single(SingleInfo::new(None, "jo@e".to_string()).unwrap()),
 750 |                 MailAddr::Single(
 751 |                     SingleInfo::new(Some("baz".to_string()), "qu@ux".to_string()).unwrap()
 752 |                 ),
 753 |             ])
 754 |         );
 755 |     }
 756 | 
 757 |     #[test]
 758 |     fn parse_empty_group() {
 759 |         assert_eq!(
 760 |             addrparse("empty-group:;").unwrap(),
 761 |             MailAddrList(vec![MailAddr::Group(GroupInfo::new(
 762 |                 "empty-group".to_string(),
 763 |                 vec![]
 764 |             ))])
 765 |         );
 766 |         assert_eq!(
 767 |             addrparse(" empty-group : ; ").unwrap(),
 768 |             MailAddrList(vec![MailAddr::Group(GroupInfo::new(
 769 |                 "empty-group".to_string(),
 770 |                 vec![]
 771 |             ))])
 772 |         );
 773 |     }
 774 | 
 775 |     #[test]
 776 |     fn parse_simple_group() {
 777 |         assert_eq!(
 778 |             addrparse("bar-group: foo <foo@bar.com>;").unwrap(),
 779 |             MailAddrList(vec![MailAddr::Group(GroupInfo::new(
 780 |                 "bar-group".to_string(),
 781 |                 vec![SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()).unwrap(),]
 782 |             ))])
 783 |         );
 784 |         assert_eq!(
 785 |             addrparse("bar-group: foo <foo@bar.com>, baz@bar.com;").unwrap(),
 786 |             MailAddrList(vec![MailAddr::Group(GroupInfo::new(
 787 |                 "bar-group".to_string(),
 788 |                 vec![
 789 |                     SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()).unwrap(),
 790 |                     SingleInfo::new(None, "baz@bar.com".to_string()).unwrap(),
 791 |                 ]
 792 |             ))])
 793 |         );
 794 |     }
 795 | 
 796 |     #[test]
 797 |     fn parse_mixed() {
 798 |         assert_eq!(
 799 |             addrparse("joe@bloe.com, bar-group: foo <foo@bar.com>;").unwrap(),
 800 |             MailAddrList(vec![
 801 |                 MailAddr::Single(SingleInfo::new(None, "joe@bloe.com".to_string()).unwrap()),
 802 |                 MailAddr::Group(GroupInfo::new(
 803 |                     "bar-group".to_string(),
 804 |                     vec![
 805 |                         SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string())
 806 |                             .unwrap(),
 807 |                     ]
 808 |                 )),
 809 |             ])
 810 |         );
 811 |         assert_eq!(
 812 |             addrparse("bar-group: foo <foo@bar.com>; joe@bloe.com").unwrap(),
 813 |             MailAddrList(vec![
 814 |                 MailAddr::Group(GroupInfo::new(
 815 |                     "bar-group".to_string(),
 816 |                     vec![
 817 |                         SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string())
 818 |                             .unwrap(),
 819 |                     ]
 820 |                 )),
 821 |                 MailAddr::Single(SingleInfo::new(None, "joe@bloe.com".to_string()).unwrap()),
 822 |             ])
 823 |         );
 824 |         assert_eq!(
 825 |             addrparse("flim@flam.com, bar-group: foo <foo@bar.com>; joe@bloe.com").unwrap(),
 826 |             MailAddrList(vec![
 827 |                 MailAddr::Single(SingleInfo::new(None, "flim@flam.com".to_string()).unwrap()),
 828 |                 MailAddr::Group(GroupInfo::new(
 829 |                     "bar-group".to_string(),
 830 |                     vec![
 831 |                         SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string())
 832 |                             .unwrap(),
 833 |                     ]
 834 |                 )),
 835 |                 MailAddr::Single(SingleInfo::new(None, "joe@bloe.com".to_string()).unwrap()),
 836 |             ])
 837 |         );
 838 |         assert_eq!(
 839 |             addrparse("first-group:; flim@flam.com, bar-group: foo <foo@bar.com>; joe@bloe.com, final-group: zi@p, za@p, \"Zaphod\" <zaphod@beeblebrox>;").unwrap(),
 840 |             MailAddrList(vec![
 841 |                 MailAddr::Group(GroupInfo::new("first-group".to_string(), vec![])),
 842 |                 MailAddr::Single(SingleInfo::new(None, "flim@flam.com".to_string()).unwrap()),
 843 |                 MailAddr::Group(GroupInfo::new("bar-group".to_string(), vec![
 844 |                     SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()).unwrap(),
 845 |                 ])),
 846 |                 MailAddr::Single(SingleInfo::new(None, "joe@bloe.com".to_string()).unwrap()),
 847 |                 MailAddr::Group(GroupInfo::new("final-group".to_string(), vec![
 848 |                     SingleInfo::new(None, "zi@p".to_string()).unwrap(),
 849 |                     SingleInfo::new(None, "za@p".to_string()).unwrap(),
 850 |                     SingleInfo::new(Some("Zaphod".to_string()), "zaphod@beeblebrox".to_string()).unwrap(),
 851 |                 ])),
 852 |             ])
 853 |         );
 854 |     }
 855 | 
 856 |     #[test]
 857 |     fn real_world_examples() {
 858 |         // taken from a real "From" header. This might not be valid according to the RFC
 859 |         // but obviously made it through the internet so we should at least not crash.
 860 |         assert_eq!(
 861 |             addrparse("\"The Foo of Bar\" Course Staff <foo-no-reply@bar.edx.org>").unwrap(),
 862 |             MailAddrList(vec![MailAddr::Single(
 863 |                 SingleInfo::new(
 864 |                     Some("The Foo of Bar Course Staff".to_string()),
 865 |                     "foo-no-reply@bar.edx.org".to_string()
 866 |                 )
 867 |                 .unwrap()
 868 |             )])
 869 |         );
 870 | 
 871 |         // This one has a comment tacked on to the end. Adding proper support for comments seems
 872 |         // complicated so I just added trailer comment support.
 873 |         assert_eq!(
 874 |             addrparse("John Doe <support@github.com> (GitHub Staff)").unwrap(),
 875 |             MailAddrList(vec![MailAddr::Single(
 876 |                 SingleInfo::new(
 877 |                     Some("John Doe".to_string()),
 878 |                     "support@github.com".to_string()
 879 |                 )
 880 |                 .unwrap()
 881 |             )])
 882 |         );
 883 | 
 884 |         // Taken from a real world "To" header. It was spam, but still...
 885 |         assert_eq!(
 886 |             addrparse("foo@bar.com;").unwrap(),
 887 |             MailAddrList(vec![MailAddr::Single(
 888 |                 SingleInfo::new(None, "foo@bar.com".to_string()).unwrap()
 889 |             )])
 890 |         );
 891 | 
 892 |         // From https://github.com/deltachat/deltachat-core-rust/pull/1476#issuecomment-629681157
 893 |         assert_eq!(
 894 |             addrparse("mailer-daemon@hq5.merlinux.eu (mail delivery system)").unwrap(),
 895 |             MailAddrList(vec![MailAddr::Single(
 896 |                 SingleInfo::new(None, "mailer-daemon@hq5.merlinux.eu".to_string()).unwrap()
 897 |             )])
 898 |         );
 899 |     }
 900 | 
 901 |     #[test]
 902 |     fn stringify_single() {
 903 |         let tc = SingleInfo::new(Some("John Doe".to_string()), "john@doe.com".to_string()).unwrap();
 904 |         assert_eq!(tc.to_string(), r#""John Doe" <john@doe.com>"#);
 905 |         assert_eq!(
 906 |             addrparse(&tc.to_string()).unwrap(),
 907 |             MailAddrList(vec![MailAddr::Single(tc)])
 908 |         );
 909 | 
 910 |         let tc = SingleInfo::new(
 911 |             Some(r#"John "Jack" Doe"#.to_string()),
 912 |             "john@doe.com".to_string(),
 913 |         )
 914 |         .unwrap();
 915 |         assert_eq!(tc.to_string(), r#""John \"Jack\" Doe" <john@doe.com>"#);
 916 |         assert_eq!(
 917 |             addrparse(&tc.to_string()).unwrap(),
 918 |             MailAddrList(vec![MailAddr::Single(tc)])
 919 |         );
 920 | 
 921 |         let tc = SingleInfo::new(None, "foo@bar.com".to_string()).unwrap();
 922 |         assert_eq!(tc.to_string(), r#"foo@bar.com"#);
 923 |         assert_eq!(
 924 |             addrparse(&tc.to_string()).unwrap(),
 925 |             MailAddrList(vec![MailAddr::Single(tc)])
 926 |         );
 927 |     }
 928 | 
 929 |     #[test]
 930 |     fn stringify_group() {
 931 |         let tc = GroupInfo::new(
 932 |             "group-name".to_string(),
 933 |             vec![
 934 |                 SingleInfo::new(None, "foo@bar.com".to_string()).unwrap(),
 935 |                 SingleInfo::new(Some("A".to_string()), "a@b".to_string()).unwrap(),
 936 |             ],
 937 |         );
 938 |         assert_eq!(tc.to_string(), r#""group-name": foo@bar.com, "A" <a@b>;"#);
 939 |         assert_eq!(
 940 |             addrparse(&tc.to_string()).unwrap(),
 941 |             MailAddrList(vec![MailAddr::Group(tc)])
 942 |         );
 943 | 
 944 |         let tc = GroupInfo::new("empty-group".to_string(), vec![]);
 945 |         assert_eq!(tc.to_string(), r#""empty-group":;"#);
 946 |         assert_eq!(
 947 |             addrparse(&tc.to_string()).unwrap(),
 948 |             MailAddrList(vec![MailAddr::Group(tc)])
 949 |         );
 950 | 
 951 |         let tc = GroupInfo::new(r#"group-with"quote"#.to_string(), vec![]);
 952 |         assert_eq!(tc.to_string(), r#""group-with\"quote":;"#);
 953 |         assert_eq!(
 954 |             addrparse(&tc.to_string()).unwrap(),
 955 |             MailAddrList(vec![MailAddr::Group(tc)])
 956 |         );
 957 |     }
 958 | 
 959 |     #[test]
 960 |     fn stringify_list() {
 961 |         let tc = MailAddrList(vec![
 962 |             MailAddr::Group(GroupInfo::new(
 963 |                 "marvel".to_string(),
 964 |                 vec![
 965 |                     SingleInfo::new(None, "ironman@marvel.com".to_string()).unwrap(),
 966 |                     SingleInfo::new(None, "spiderman@marvel.com".to_string()).unwrap(),
 967 |                 ],
 968 |             )),
 969 |             MailAddr::Single(
 970 |                 SingleInfo::new(Some("b-man".to_string()), "b@man.com".to_string()).unwrap(),
 971 |             ),
 972 |             MailAddr::Group(GroupInfo::new(
 973 |                 "dc".to_string(),
 974 |                 vec![
 975 |                     SingleInfo::new(None, "batman@dc.com".to_string()).unwrap(),
 976 |                     SingleInfo::new(None, "superman@dc.com".to_string()).unwrap(),
 977 |                 ],
 978 |             )),
 979 |             MailAddr::Single(
 980 |                 SingleInfo::new(Some("d-woman".to_string()), "d@woman.com".to_string()).unwrap(),
 981 |             ),
 982 |         ]);
 983 |         assert_eq!(
 984 |             tc.to_string(),
 985 |             r#""marvel": ironman@marvel.com, spiderman@marvel.com; "b-man" <b@man.com>, "dc": batman@dc.com, superman@dc.com; "d-woman" <d@woman.com>"#
 986 |         );
 987 |     }
 988 | 
 989 |     #[test]
 990 |     fn count_addrs() {
 991 |         let tc = MailAddrList(vec![
 992 |             MailAddr::Group(GroupInfo::new(
 993 |                 "marvel".to_string(),
 994 |                 vec![
 995 |                     SingleInfo::new(None, "ironman@marvel.com".to_string()).unwrap(),
 996 |                     SingleInfo::new(None, "spiderman@marvel.com".to_string()).unwrap(),
 997 |                 ],
 998 |             )),
 999 |             MailAddr::Single(
1000 |                 SingleInfo::new(Some("b-man".to_string()), "b@man.com".to_string()).unwrap(),
1001 |             ),
1002 |             MailAddr::Group(GroupInfo::new(
1003 |                 "dc".to_string(),
1004 |                 vec![
1005 |                     SingleInfo::new(None, "batman@dc.com".to_string()).unwrap(),
1006 |                     SingleInfo::new(None, "superman@dc.com".to_string()).unwrap(),
1007 |                 ],
1008 |             )),
1009 |             MailAddr::Single(
1010 |                 SingleInfo::new(Some("d-woman".to_string()), "d@woman.com".to_string()).unwrap(),
1011 |             ),
1012 |         ]);
1013 |         assert_eq!(tc.count_addrs(), 6);
1014 |         assert_eq!(tc.extract_single_info(), None);
1015 | 
1016 |         let tc = MailAddrList(vec![]);
1017 |         assert_eq!(tc.count_addrs(), 0);
1018 |         assert_eq!(tc.extract_single_info(), None);
1019 | 
1020 |         let tc = MailAddrList(vec![MailAddr::Group(GroupInfo::new(
1021 |             "group".to_string(),
1022 |             vec![SingleInfo::new(None, "foo@bar.com".to_string()).unwrap()],
1023 |         ))]);
1024 |         assert_eq!(tc.count_addrs(), 1);
1025 |         assert_eq!(tc.extract_single_info(), None);
1026 | 
1027 |         let tc = MailAddrList(vec![MailAddr::Single(
1028 |             SingleInfo::new(None, "foo@bar.com".to_string()).unwrap(),
1029 |         )]);
1030 |         assert_eq!(tc.count_addrs(), 1);
1031 |         assert_eq!(
1032 |             tc.extract_single_info(),
1033 |             Some(SingleInfo::new(None, "foo@bar.com".to_string()).unwrap())
1034 |         );
1035 | 
1036 |         let tc = MailAddrList(vec![
1037 |             MailAddr::Group(GroupInfo::new("group".to_string(), vec![])),
1038 |             MailAddr::Group(GroupInfo::new("group".to_string(), vec![])),
1039 |         ]);
1040 |         assert_eq!(tc.count_addrs(), 0);
1041 |         assert_eq!(tc.extract_single_info(), None);
1042 |     }
1043 | 
1044 |     #[test]
1045 |     fn parse_invalid() {
1046 |         assert!(addrparse("foo").is_err());
1047 |         assert!(addrparse("foo <bar>").is_err());
1048 |         assert!(addrparse("group: foo <bar>;").is_err());
1049 |     }
1050 | 
1051 |     #[test]
1052 |     fn parse_with_encoded() {
1053 |         let (parsed, _) = crate::parse_header(
1054 |             b"From: =?UTF-8?B?0JjQvNGPLCDQpNCw0LzQuNC70LjRjw==?= <foobar@example.com>",
1055 |         )
1056 |         .unwrap();
1057 |         assert_eq!(
1058 |             addrparse_header(&parsed).unwrap(),
1059 |             MailAddrList(vec![MailAddr::Single(
1060 |                 SingleInfo::new(
1061 |                     Some("Имя, Фамилия".to_string()),
1062 |                     "foobar@example.com".to_string()
1063 |                 )
1064 |                 .unwrap()
1065 |             )])
1066 |         );
1067 |     }
1068 | 
1069 |     #[test]
1070 |     fn parse_quoted_encoded() {
1071 |         let (parsed, _) =
1072 |             crate::parse_header(b"From: \"=?utf-8?q?G=C3=B6tz?= C\" <g@c.de>").unwrap();
1073 |         assert_eq!(
1074 |             addrparse_header(&parsed).unwrap(),
1075 |             MailAddrList(vec![MailAddr::Single(
1076 |                 SingleInfo::new(Some("Götz C".to_string()), "g@c.de".to_string()).unwrap()
1077 |             )])
1078 |         );
1079 |     }
1080 | 
1081 |     #[test]
1082 |     fn parse_second_encoded() {
1083 |         let (parsed, _) = crate::parse_header(
1084 |             b"To: foo <foo@example.org>,=?UTF-8?B?Zm9v8J+Qm2Jhcg==?= <bar@example.org>",
1085 |         )
1086 |         .unwrap();
1087 |         assert_eq!(
1088 |             addrparse_header(&parsed).unwrap(),
1089 |             MailAddrList(vec![
1090 |                 MailAddr::Single(
1091 |                     SingleInfo::new(Some("foo".to_string()), "foo@example.org".to_string())
1092 |                         .unwrap()
1093 |                 ),
1094 |                 MailAddr::Single(
1095 |                     SingleInfo::new(
1096 |                         Some("foo\u{1f41b}bar".to_string()),
1097 |                         "bar@example.org".to_string()
1098 |                     )
1099 |                     .unwrap()
1100 |                 )
1101 |             ])
1102 |         );
1103 |     }
1104 | }
1105 | 


--------------------------------------------------------------------------------
/src/body.rs:
--------------------------------------------------------------------------------
  1 | use charset::{decode_ascii, Charset};
  2 | 
  3 | use crate::{MailParseError, ParsedContentType};
  4 | 
  5 | /// Represents the body of an email (or mail subpart)
  6 | pub enum Body<'a> {
  7 |     /// A body with 'base64' Content-Transfer-Encoding.
  8 |     Base64(EncodedBody<'a>),
  9 |     /// A body with 'quoted-printable' Content-Transfer-Encoding.
 10 |     QuotedPrintable(EncodedBody<'a>),
 11 |     /// A body with '7bit' Content-Transfer-Encoding.
 12 |     SevenBit(TextBody<'a>),
 13 |     /// A body with '8bit' Content-Transfer-Encoding.
 14 |     EightBit(TextBody<'a>),
 15 |     /// A body with 'binary' Content-Transfer-Encoding.
 16 |     Binary(BinaryBody<'a>),
 17 | }
 18 | 
 19 | impl<'a> Body<'a> {
 20 |     pub fn new(
 21 |         body: &'a [u8],
 22 |         ctype: &'a ParsedContentType,
 23 |         transfer_encoding: &Option<String>,
 24 |     ) -> Body<'a> {
 25 |         transfer_encoding
 26 |             .as_ref()
 27 |             .map(|encoding| match encoding.as_ref() {
 28 |                 "base64" => Body::Base64(EncodedBody {
 29 |                     decoder: decode_base64,
 30 |                     body,
 31 |                     ctype,
 32 |                 }),
 33 |                 "quoted-printable" => Body::QuotedPrintable(EncodedBody {
 34 |                     decoder: decode_quoted_printable,
 35 |                     body,
 36 |                     ctype,
 37 |                 }),
 38 |                 "7bit" => Body::SevenBit(TextBody { body, ctype }),
 39 |                 "8bit" => Body::EightBit(TextBody { body, ctype }),
 40 |                 "binary" => Body::Binary(BinaryBody { body, ctype }),
 41 |                 _ => Body::get_default(body, ctype),
 42 |             })
 43 |             .unwrap_or_else(|| Body::get_default(body, ctype))
 44 |     }
 45 | 
 46 |     fn get_default(body: &'a [u8], ctype: &'a ParsedContentType) -> Body<'a> {
 47 |         Body::SevenBit(TextBody { body, ctype })
 48 |     }
 49 | }
 50 | 
 51 | /// Struct that holds the encoded body representation of the message (or message subpart).
 52 | pub struct EncodedBody<'a> {
 53 |     decoder: fn(&[u8]) -> Result<Vec<u8>, MailParseError>,
 54 |     ctype: &'a ParsedContentType,
 55 |     body: &'a [u8],
 56 | }
 57 | 
 58 | impl<'a> EncodedBody<'a> {
 59 |     /// Get the body Content-Type
 60 |     pub fn get_content_type(&self) -> &'a ParsedContentType {
 61 |         self.ctype
 62 |     }
 63 | 
 64 |     /// Get the raw body of the message exactly as it is written in the message (or message subpart).
 65 |     pub fn get_raw(&self) -> &'a [u8] {
 66 |         self.body
 67 |     }
 68 | 
 69 |     /// Get the decoded body of the message (or message subpart).
 70 |     pub fn get_decoded(&self) -> Result<Vec<u8>, MailParseError> {
 71 |         (self.decoder)(self.body)
 72 |     }
 73 | 
 74 |     /// Get the body of the message as a Rust string.
 75 |     /// This function tries to decode the body and then converts
 76 |     /// the result into a Rust UTF-8 string using the charset in the Content-Type
 77 |     /// (or "us-ascii" if the charset was missing or not recognized).
 78 |     /// This operation returns a valid result only if the decoded body
 79 |     /// has a text format.
 80 |     pub fn get_decoded_as_string(&self) -> Result<String, MailParseError> {
 81 |         get_body_as_string(&self.get_decoded()?, self.ctype)
 82 |     }
 83 | }
 84 | 
 85 | /// Struct that holds the textual body representation of the message (or message subpart).
 86 | pub struct TextBody<'a> {
 87 |     ctype: &'a ParsedContentType,
 88 |     body: &'a [u8],
 89 | }
 90 | 
 91 | impl<'a> TextBody<'a> {
 92 |     /// Get the body Content-Type
 93 |     pub fn get_content_type(&self) -> &'a ParsedContentType {
 94 |         self.ctype
 95 |     }
 96 | 
 97 |     /// Get the raw body of the message exactly as it is written in the message (or message subpart).
 98 |     pub fn get_raw(&self) -> &'a [u8] {
 99 |         self.body
100 |     }
101 | 
102 |     /// Get the body of the message as a Rust string.
103 |     /// This function converts the body into a Rust UTF-8 string using the charset
104 |     /// in the Content-Type
105 |     /// (or "us-ascii" if the charset was missing or not recognized).
106 |     pub fn get_as_string(&self) -> Result<String, MailParseError> {
107 |         get_body_as_string(self.body, self.ctype)
108 |     }
109 | }
110 | 
111 | /// Struct that holds a binary body representation of the message (or message subpart).
112 | pub struct BinaryBody<'a> {
113 |     ctype: &'a ParsedContentType,
114 |     body: &'a [u8],
115 | }
116 | 
117 | impl<'a> BinaryBody<'a> {
118 |     /// Get the body Content-Type
119 |     pub fn get_content_type(&self) -> &'a ParsedContentType {
120 |         self.ctype
121 |     }
122 | 
123 |     /// Get the raw body of the message exactly as it is written in the message (or message subpart).
124 |     pub fn get_raw(&self) -> &'a [u8] {
125 |         self.body
126 |     }
127 | 
128 |     /// Get the body of the message as a Rust string. This function attempts
129 |     /// to convert the body into a Rust UTF-8 string using the charset in the
130 |     /// Content-Type header (or "us-ascii" as default). However, this may not
131 |     /// always work for "binary" data. The API is provided anyway for
132 |     /// convenient handling of real-world emails that may provide textual data
133 |     /// with a binary transfer encoding, but use this at your own risk!
134 |     pub fn get_as_string(&self) -> Result<String, MailParseError> {
135 |         get_body_as_string(self.body, self.ctype)
136 |     }
137 | }
138 | 
139 | fn decode_base64(body: &[u8]) -> Result<Vec<u8>, MailParseError> {
140 |     let cleaned = body
141 |         .iter()
142 |         .filter(|c| !c.is_ascii_whitespace())
143 |         .cloned()
144 |         .collect::<Vec<u8>>();
145 |     Ok(data_encoding::BASE64_MIME_PERMISSIVE.decode(&cleaned)?)
146 | }
147 | 
148 | fn decode_quoted_printable(body: &[u8]) -> Result<Vec<u8>, MailParseError> {
149 |     Ok(quoted_printable::decode(
150 |         body,
151 |         quoted_printable::ParseMode::Robust,
152 |     )?)
153 | }
154 | 
155 | fn get_body_as_string(body: &[u8], ctype: &ParsedContentType) -> Result<String, MailParseError> {
156 |     let cow = if let Some(charset) = Charset::for_label(ctype.charset.as_bytes()) {
157 |         let (cow, _, _) = charset.decode(body);
158 |         cow
159 |     } else {
160 |         decode_ascii(body)
161 |     };
162 |     Ok(cow.into_owned())
163 | }
164 | 


--------------------------------------------------------------------------------
/src/dateparse.rs:
--------------------------------------------------------------------------------
  1 | use crate::MailParseError;
  2 | 
  3 | enum DateParseState {
  4 |     Date,
  5 |     Month,
  6 |     Year,
  7 |     Hour,
  8 |     Minute,
  9 |     Second,
 10 |     Timezone,
 11 | }
 12 | 
 13 | fn days_in_month(month: i64, year: i64) -> i64 {
 14 |     match month {
 15 |         0 | 2 | 4 | 6 | 7 | 9 | 11 => 31,
 16 |         3 | 5 | 8 | 10 => 30,
 17 |         1 => {
 18 |             if (year % 400) == 0 {
 19 |                 29
 20 |             } else if (year % 100) == 0 {
 21 |                 28
 22 |             } else if (year % 4) == 0 {
 23 |                 29
 24 |             } else {
 25 |                 28
 26 |             }
 27 |         }
 28 |         _ => 0,
 29 |     }
 30 | }
 31 | 
 32 | fn seconds_to_date(year: i64, month: i64, day: i64) -> i64 {
 33 |     let mut result: i64 = 0;
 34 |     for y in 1970..2001 {
 35 |         if y == year {
 36 |             break;
 37 |         }
 38 |         result += 86400 * 365;
 39 |         if (y % 4) == 0 {
 40 |             result += 86400;
 41 |         }
 42 |     }
 43 |     let mut y = 2001;
 44 |     while y < year {
 45 |         if year - y >= 400 {
 46 |             result += (86400 * 365 * 400) + (86400 * 97);
 47 |             y += 400;
 48 |             continue;
 49 |         }
 50 |         if year - y >= 100 {
 51 |             result += (86400 * 365 * 100) + (86400 * 24);
 52 |             y += 100;
 53 |             continue;
 54 |         }
 55 |         if year - y >= 4 {
 56 |             result += (86400 * 365 * 4) + (86400);
 57 |             y += 4;
 58 |             continue;
 59 |         }
 60 |         result += 86400 * 365;
 61 |         y += 1;
 62 |     }
 63 |     for m in 0..month {
 64 |         result += 86400 * days_in_month(m, year)
 65 |     }
 66 |     result + 86400 * (day - 1)
 67 | }
 68 | 
 69 | /// Convert a date field from an email header into a UNIX epoch timestamp.
 70 | /// This function handles the most common formatting of date fields found in
 71 | /// email headers. It may fail to parse some of the more creative formattings.
 72 | ///
 73 | /// # Examples
 74 | /// ```
 75 | ///     use mailparse::dateparse;
 76 | ///     assert_eq!(dateparse("Sun, 02 Oct 2016 07:06:22 -0700 (PDT)").unwrap(), 1475417182);
 77 | /// ```
 78 | pub fn dateparse(date: &str) -> Result<i64, MailParseError> {
 79 |     let mut result = 0;
 80 |     let mut month = 0;
 81 |     let mut day_of_month = 0;
 82 |     let mut state = DateParseState::Date;
 83 |     for tok in date.split(|c| c == ' ' || c == ':') {
 84 |         if tok.is_empty() {
 85 |             continue;
 86 |         }
 87 |         match state {
 88 |             DateParseState::Date => {
 89 |                 if let Ok(v) = tok.parse::<u8>() {
 90 |                     if !(1..=31).contains(&v) {
 91 |                         return Err(MailParseError::Generic("Invalid day"));
 92 |                     }
 93 |                     day_of_month = v;
 94 |                     state = DateParseState::Month;
 95 |                 };
 96 |                 continue;
 97 |             }
 98 |             DateParseState::Month => {
 99 |                 month = match tok.to_uppercase().as_str() {
100 |                     "JAN" | "JANUARY" => 0,
101 |                     "FEB" | "FEBRUARY" => 1,
102 |                     "MAR" | "MARCH" => 2,
103 |                     "APR" | "APRIL" => 3,
104 |                     "MAY" => 4,
105 |                     "JUN" | "JUNE" => 5,
106 |                     "JUL" | "JULY" => 6,
107 |                     "AUG" | "AUGUST" => 7,
108 |                     "SEP" | "SEPTEMBER" => 8,
109 |                     "OCT" | "OCTOBER" => 9,
110 |                     "NOV" | "NOVEMBER" => 10,
111 |                     "DEC" | "DECEMBER" => 11,
112 |                     _ => return Err(MailParseError::Generic("Unrecognized month")),
113 |                 };
114 |                 state = DateParseState::Year;
115 |                 continue;
116 |             }
117 |             DateParseState::Year => {
118 |                 let year = match tok.parse::<u32>() {
119 |                     Ok(v) if v < 70 => 2000 + v,
120 |                     Ok(v) if v < 100 => 1900 + v,
121 |                     Ok(v) if v < 1970 => return Err(MailParseError::Generic("Disallowed year")),
122 |                     Ok(v) => v,
123 |                     Err(_) => return Err(MailParseError::Generic("Invalid year")),
124 |                 };
125 |                 result =
126 |                     seconds_to_date(i64::from(year), i64::from(month), i64::from(day_of_month));
127 |                 state = DateParseState::Hour;
128 |                 continue;
129 |             }
130 |             DateParseState::Hour => {
131 |                 let hour = match tok.parse::<u8>() {
132 |                     Ok(v) => v,
133 |                     Err(_) => return Err(MailParseError::Generic("Invalid hour")),
134 |                 };
135 |                 result += 3600 * i64::from(hour);
136 |                 state = DateParseState::Minute;
137 |                 continue;
138 |             }
139 |             DateParseState::Minute => {
140 |                 let minute = match tok.parse::<u8>() {
141 |                     Ok(v) => v,
142 |                     Err(_) => return Err(MailParseError::Generic("Invalid minute")),
143 |                 };
144 |                 result += 60 * i64::from(minute);
145 |                 state = DateParseState::Second;
146 |                 continue;
147 |             }
148 |             DateParseState::Second => {
149 |                 let second = match tok.parse::<u8>() {
150 |                     Ok(v) => v,
151 |                     Err(_) => return Err(MailParseError::Generic("Invalid second")),
152 |                 };
153 |                 result += i64::from(second);
154 |                 state = DateParseState::Timezone;
155 |                 continue;
156 |             }
157 |             DateParseState::Timezone => {
158 |                 let (tz, tz_sign) = match tok.parse::<i32>() {
159 |                     Ok(v) if !(-2400..=2400).contains(&v) => {
160 |                         return Err(MailParseError::Generic("Invalid timezone"))
161 |                     }
162 |                     Ok(v) if v < 0 => (-v, -1),
163 |                     Ok(v) => (v, 1),
164 |                     Err(_) => {
165 |                         match tok.to_uppercase().as_str() {
166 |                             // This list taken from IETF RFC 822
167 |                             "UTC" | "UT" | "GMT" | "Z" => (0, 1),
168 |                             "EDT" => (400, -1),
169 |                             "EST" | "CDT" => (500, -1),
170 |                             "CST" | "MDT" => (600, -1),
171 |                             "MST" | "PDT" => (700, -1),
172 |                             "PST" => (800, -1),
173 |                             // Military time zones (RFC 822, RFC 5322).  A-M (except J) are negative offsets, N-Y are positive.  J is not used.
174 |                             "A" => (100, -1),
175 |                             "B" => (200, -1),
176 |                             "C" => (300, -1),
177 |                             "D" => (400, -1),
178 |                             "E" => (500, -1),
179 |                             "F" => (600, -1),
180 |                             "G" => (700, -1),
181 |                             "H" => (800, -1),
182 |                             "I" => (900, -1),
183 |                             // "J" is not used
184 |                             "K" => (1000, -1),
185 |                             "L" => (1100, -1),
186 |                             "M" => (1200, -1),
187 |                             "N" => (100, 1),
188 |                             "O" => (200, 1),
189 |                             "P" => (300, 1),
190 |                             "Q" => (400, 1),
191 |                             "R" => (500, 1),
192 |                             "S" => (600, 1),
193 |                             "T" => (700, 1),
194 |                             "U" => (800, 1),
195 |                             "V" => (900, 1),
196 |                             "W" => (1000, 1),
197 |                             "X" => (1100, 1),
198 |                             "Y" => (1200, 1),
199 |                             _ => return Err(MailParseError::Generic("Invalid timezone")),
200 |                         }
201 |                     }
202 |                 };
203 |                 let tz_hours = tz / 100;
204 |                 let tz_mins = tz % 100;
205 |                 let tz_delta = (tz_hours * 3600) + (tz_mins * 60);
206 |                 if tz_sign < 0 {
207 |                     result += i64::from(tz_delta);
208 |                 } else {
209 |                     result -= i64::from(tz_delta);
210 |                 }
211 |                 break;
212 |             }
213 |         }
214 |     }
215 |     Ok(result)
216 | }
217 | 
218 | #[cfg(test)]
219 | mod tests {
220 |     use super::*;
221 | 
222 |     #[test]
223 |     fn parse_dates() {
224 |         assert_eq!(
225 |             dateparse("Sun, 25 Sep 2016 18:36:33 -0400").unwrap(),
226 |             1474842993
227 |         );
228 |         assert_eq!(
229 |             dateparse("Fri, 01 Jan 2100 11:12:13 +0000").unwrap(),
230 |             4102485133
231 |         );
232 |         assert_eq!(
233 |             dateparse("Fri, 31 Dec 2100 00:00:00 +0000").unwrap(),
234 |             4133894400
235 |         );
236 |         assert_eq!(
237 |             dateparse("Fri, 31 Dec 2399 00:00:00 +0000").unwrap(),
238 |             13569379200
239 |         );
240 |         assert_eq!(
241 |             dateparse("Fri, 31 Dec 2400 00:00:00 +0000").unwrap(),
242 |             13601001600
243 |         );
244 |         assert_eq!(dateparse("17 Sep 2016 16:05:38 -1000").unwrap(), 1474164338);
245 |         assert_eq!(
246 |             dateparse("Fri, 30 Nov 2012 20:57:23 GMT").unwrap(),
247 |             1354309043
248 |         );
249 |         assert_eq!(
250 |             dateparse("Fri, 30 Nov 2012 20:57:23 Q").unwrap(),
251 |             1354294643
252 |         );
253 |         assert_eq!(
254 |             dateparse("Fri, 30 Nov 2012 20:57:23 D").unwrap(),
255 |             1354323443
256 |         );
257 | 
258 |         // Day cannot be zero.
259 |         assert!(dateparse("Wed, 0 Jan 1970 00:00:00 +0000").is_err());
260 | 
261 |         // Regression test for integer overflow on invalid timezone.
262 |         assert!(dateparse("Thu, 1 Jan 1970 00:00:00 +2147483647").is_err());
263 |     }
264 | }
265 | 


--------------------------------------------------------------------------------
/src/header.rs:
--------------------------------------------------------------------------------
  1 | use charset::Charset;
  2 | 
  3 | use crate::find_from;
  4 | 
  5 | /// Some types of tokens that might be present in a MIME header. This
  6 | /// list is incomplete relative the types of tokens defined in the RFC,
  7 | /// but can be expanded as needed. Currently the list of tokens is
  8 | /// sufficient to properly handle encoded words and line unfolding.
  9 | pub enum HeaderToken<'a> {
 10 |     /// A bunch of not-encoded text. This can include whitespace and
 11 |     /// non-whitespace chars.
 12 |     Text(&'a str),
 13 |     /// A bunch of text that is purely whitespace.
 14 |     Whitespace(&'a str),
 15 |     /// An end-of-line marker. If it contains None, then it represents
 16 |     /// a raw CRLF that has not yet been line-unfolded. If it contains
 17 |     /// a string, that represents the whitespace that was produced
 18 |     /// around that CRLF during line unfolding. This may include whitespace
 19 |     /// from the end of the previous line.
 20 |     Newline(Option<String>),
 21 |     /// The decoded value of an encoded word found in the header.
 22 |     DecodedWord(String),
 23 | }
 24 | 
 25 | fn is_boundary(line: &str, ix: Option<usize>) -> bool {
 26 |     ix.and_then(|v| line.chars().nth(v))
 27 |         .map(|c| {
 28 |             c.is_whitespace()
 29 |                 || c == '"'
 30 |                 || c == '('
 31 |                 || c == ')'
 32 |                 || c == '<'
 33 |                 || c == '>'
 34 |                 || c == ','
 35 |         })
 36 |         .unwrap_or(true)
 37 | }
 38 | 
 39 | fn decode_word(encoded: &str) -> Option<String> {
 40 |     let ix_delim1 = encoded.find('?')?;
 41 |     let ix_delim2 = find_from(encoded, ix_delim1 + 1, "?")?;
 42 | 
 43 |     let charset = &encoded[0..ix_delim1];
 44 |     let transfer_coding = &encoded[ix_delim1 + 1..ix_delim2];
 45 |     let input = &encoded[ix_delim2 + 1..];
 46 | 
 47 |     let decoded = match transfer_coding {
 48 |         "B" | "b" => data_encoding::BASE64_MIME_PERMISSIVE
 49 |             .decode(input.as_bytes())
 50 |             .ok()?,
 51 |         "Q" | "q" => {
 52 |             // The quoted_printable module does a trim_end on the input, so if
 53 |             // that affects the output we should save and restore the trailing
 54 |             // whitespace
 55 |             let to_decode = input.replace('_', " ");
 56 |             let trimmed = to_decode.trim_end();
 57 |             let mut d = quoted_printable::decode(trimmed, quoted_printable::ParseMode::Robust);
 58 |             if d.is_ok() && to_decode.len() != trimmed.len() {
 59 |                 d.as_mut()
 60 |                     .unwrap()
 61 |                     .extend_from_slice(to_decode[trimmed.len()..].as_bytes());
 62 |             }
 63 |             d.ok()?
 64 |         }
 65 |         _ => return None,
 66 |     };
 67 |     let charset = Charset::for_label_no_replacement(charset.as_bytes())?;
 68 |     let (cow, _) = charset.decode_without_bom_handling(&decoded);
 69 |     Some(cow.into_owned())
 70 | }
 71 | 
 72 | /// Tokenizes a single line of the header and produces a vector of
 73 | /// tokens. Because this only processes a single line, it will never
 74 | /// generate `HeaderToken::Newline` tokens.
 75 | fn tokenize_header_line(line: &str) -> Vec<HeaderToken> {
 76 |     fn maybe_whitespace(text: &str) -> HeaderToken {
 77 |         if text.trim_end().is_empty() {
 78 |             HeaderToken::Whitespace(text)
 79 |         } else {
 80 |             HeaderToken::Text(text)
 81 |         }
 82 |     }
 83 | 
 84 |     let mut result = Vec::new();
 85 |     let mut ix_search = 0;
 86 |     loop {
 87 |         match find_from(line, ix_search, "=?") {
 88 |             Some(v) => {
 89 |                 let ix_begin = v + 2;
 90 |                 if !is_boundary(line, ix_begin.checked_sub(3)) {
 91 |                     result.push(HeaderToken::Text(&line[ix_search..ix_begin]));
 92 |                     ix_search = ix_begin;
 93 |                     continue;
 94 |                 }
 95 |                 result.push(maybe_whitespace(&line[ix_search..ix_begin - 2]));
 96 |                 let mut ix_end_search = ix_begin;
 97 |                 loop {
 98 |                     match find_from(line, ix_end_search, "?=") {
 99 |                         Some(ix_end) => {
100 |                             if !is_boundary(line, ix_end.checked_add(2)) {
101 |                                 ix_end_search = ix_end + 2;
102 |                                 continue;
103 |                             }
104 |                             match decode_word(&line[ix_begin..ix_end]) {
105 |                                 Some(v) => result.push(HeaderToken::DecodedWord(v)),
106 |                                 None => {
107 |                                     result.push(HeaderToken::Text(&line[ix_begin - 2..ix_end + 2]));
108 |                                 }
109 |                             };
110 |                             ix_search = ix_end;
111 |                         }
112 |                         None => {
113 |                             result.push(HeaderToken::Text("=?"));
114 |                             ix_search = ix_begin - 2;
115 |                         }
116 |                     };
117 |                     break;
118 |                 }
119 |                 ix_search += 2;
120 |                 continue;
121 |             }
122 |             None => {
123 |                 result.push(maybe_whitespace(&line[ix_search..]));
124 |                 break;
125 |             }
126 |         };
127 |     }
128 |     result
129 | }
130 | 
131 | /// Tokenize an entire header, including newlines. This includes
132 | /// decoded words, but doesn't do line unfolding, so any `HeaderToken::Newline`
133 | /// tokens will always have a `None` inner value. Whitespace preceding
134 | /// the newline will be in a separate `HeaderToken::Whitespace` or
135 | /// `HeaderToken::Text` token. Semantically the `HeaderToken::Newline`
136 | /// tokens that come out of this still represent the CRLF newline.
137 | fn tokenize_header(value: &str) -> Vec<HeaderToken> {
138 |     let mut tokens = Vec::new();
139 |     let mut lines = value.lines();
140 |     let mut first = true;
141 |     while let Some(line) = lines.next().map(str::trim_start) {
142 |         if first {
143 |             first = false;
144 |         } else {
145 |             tokens.push(HeaderToken::Newline(None));
146 |         }
147 |         let mut line_tokens = tokenize_header_line(line);
148 |         tokens.append(&mut line_tokens);
149 |     }
150 |     tokens
151 | }
152 | 
153 | /// Takes in a list of tokens and processes them to normalize the whitespace
154 | /// per the RFC. This includes dropping any whitespace between two adjacent
155 | /// encoded words, and also doing line unfolding. As a result, the `HeaderToken::Newline`
156 | /// tokens that come out of this no longer represent the CRLF newline, but instead
157 | /// their contained `Option<String>` will be populated with whatever whitespace gets
158 | /// generated from unfolding the line. This might include end-of-line whitespace from
159 | /// the previous line.
160 | fn normalize_header_whitespace(tokens: Vec<HeaderToken>) -> Vec<HeaderToken> {
161 |     let mut result = Vec::<HeaderToken>::new();
162 | 
163 |     let mut saved_token = None;
164 |     // See RFC 2047 section 6.2 for what's going on here. Basically whitespace
165 |     // that's between two adjacent encoded words should be thrown away.
166 |     for tok in tokens {
167 |         match &tok {
168 |             HeaderToken::Text(_) => {
169 |                 // If we saved some whitespace, put it in since we encountered
170 |                 // non-whitespace chars that weren't part of an encoded word.
171 |                 if let Some(HeaderToken::Whitespace(_)) = &saved_token {
172 |                     result.push(saved_token.unwrap());
173 |                 } else if let Some(HeaderToken::Newline(Some(_))) = &saved_token {
174 |                     result.push(saved_token.unwrap());
175 |                 }
176 |                 // Also put the actual non-whitespace chars.
177 |                 result.push(tok);
178 |                 saved_token = None;
179 |             }
180 |             HeaderToken::Whitespace(_) => {
181 |                 // If the previous token was an encoded word, save the whitespace
182 |                 // as whitespace that's between two encoded words should be dropped.
183 |                 // We only know if this whitespace goes into `result` after parsing
184 |                 // the next token.
185 |                 if let Some(HeaderToken::DecodedWord(_)) = saved_token {
186 |                     saved_token = Some(tok);
187 |                 } else {
188 |                     result.push(tok);
189 |                     saved_token = None;
190 |                 }
191 |             }
192 |             HeaderToken::Newline(_) => {
193 |                 // If we saved whitespace at the end of the line, add an extra space
194 |                 // to it from the line unfolding.
195 |                 if let Some(HeaderToken::Whitespace(ws)) = saved_token {
196 |                     let new_ws = ws.to_owned() + " ";
197 |                     saved_token = Some(HeaderToken::Newline(Some(new_ws)));
198 |                 // If the end of the line had an encoded word, save the space from
199 |                 // line unfolding.
200 |                 } else if let Some(HeaderToken::DecodedWord(_)) = saved_token {
201 |                     saved_token = Some(HeaderToken::Newline(Some(" ".to_string())));
202 |                 } else {
203 |                     result.push(HeaderToken::Newline(Some(" ".to_string())));
204 |                     saved_token = None;
205 |                 }
206 |             }
207 |             HeaderToken::DecodedWord(_) => {
208 |                 // Note that saved_token might be a whitespace thing here. But we
209 |                 // throw it away because that means it fell between two adjacent
210 |                 // encoded words.
211 |                 saved_token = Some(HeaderToken::DecodedWord(String::new()));
212 |                 result.push(tok);
213 |             }
214 |         }
215 |     }
216 |     result
217 | }
218 | 
219 | pub fn normalized_tokens(raw_value: &str) -> Vec<HeaderToken> {
220 |     normalize_header_whitespace(tokenize_header(raw_value))
221 | }
222 | 
223 | #[cfg(test)]
224 | mod tests {
225 |     use super::*;
226 | 
227 |     #[test]
228 |     fn test_is_boundary_multibyte() {
229 |         // Bug #26, Incorrect unwrap() guard in is_boundary()
230 |         // 6x'REPLACEMENT CHARACTER', but 18 bytes of data:
231 |         let test = "\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}";
232 |         assert!(is_boundary(test, Some(8)));
233 |     }
234 | }
235 | 


--------------------------------------------------------------------------------
/src/headers.rs:
--------------------------------------------------------------------------------
  1 | use crate::{MailHeader, MailHeaderMap};
  2 | use std::fmt;
  3 | use std::slice;
  4 | 
  5 | /// A struct that wrapps the header portion of a message and provides
  6 | /// utility functions to look up specific headers.
  7 | pub struct Headers<'a> {
  8 |     raw_bytes: &'a [u8],
  9 |     headers: &'a [MailHeader<'a>],
 10 | }
 11 | 
 12 | impl<'a> Headers<'a> {
 13 |     pub(crate) fn new(raw_bytes: &'a [u8], headers: &'a [MailHeader<'a>]) -> Headers<'a> {
 14 |         Headers { raw_bytes, headers }
 15 |     }
 16 | 
 17 |     /// Returns the raw, unparsed bytes that make up the header block of
 18 |     /// the message. This includes everything up to and including the empty
 19 |     /// line at the end of the header block.
 20 |     ///
 21 |     /// # Examples
 22 |     /// ```
 23 |     ///     use mailparse::{parse_mail, headers::Headers};
 24 |     ///     let mail = parse_mail(concat!(
 25 |     ///             "SubJECT : foo\n",
 26 |     ///             "\n",
 27 |     ///             "Body starts here").as_bytes())
 28 |     ///         .unwrap();
 29 |     ///     assert_eq!(mail.get_headers().get_raw_bytes(), b"SubJECT : foo\n\n");
 30 |     pub fn get_raw_bytes(&self) -> &'a [u8] {
 31 |         self.raw_bytes
 32 |     }
 33 | }
 34 | 
 35 | /// Allows iterating over the individual `MailHeader` items in this block of
 36 | /// headers.
 37 | ///
 38 | /// # Examples
 39 | /// ```
 40 | ///     use mailparse::{parse_mail, headers::Headers};
 41 | ///     let mail = parse_mail(concat!(
 42 | ///             "Subject: foo\n",
 43 | ///             "Another header: bar\n",
 44 | ///             "\n",
 45 | ///             "Body starts here").as_bytes())
 46 | ///         .unwrap();
 47 | ///     let mut iter = mail.get_headers().into_iter();
 48 | ///     assert_eq!(iter.next().unwrap().get_key(), "Subject");
 49 | ///     assert_eq!(iter.next().unwrap().get_key(), "Another header");
 50 | /// ```
 51 | impl<'a> IntoIterator for Headers<'a> {
 52 |     type Item = &'a MailHeader<'a>;
 53 |     type IntoIter = slice::Iter<'a, MailHeader<'a>>;
 54 | 
 55 |     fn into_iter(self) -> Self::IntoIter {
 56 |         self.headers.into_iter()
 57 |     }
 58 | }
 59 | 
 60 | /// Allows formatting and printing the `Headers` struct items.
 61 | ///
 62 | /// # Examples
 63 | /// ```
 64 | ///     use mailparse::parse_mail;
 65 | ///     let mail = parse_mail(concat!(
 66 | ///             "Subject: foo\n",
 67 | ///             "Another header: bar\n",
 68 | ///             "\n",
 69 | ///             "Body starts here").as_bytes())
 70 | ///         .unwrap();
 71 | ///     let mut headers = mail.get_headers();
 72 | ///     assert_eq!(format!("{:?}", headers), "Headers { \
 73 | ///                headers: [MailHeader { key: \"Subject\", value: \"foo\" }, \
 74 | ///                MailHeader { key: \"Another header\", value: \"bar\" }] }");
 75 | /// ```
 76 | impl<'a> fmt::Debug for Headers<'a> {
 77 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 78 |         f.debug_struct("Headers")
 79 |             .field("headers", &self.headers)
 80 |             .finish()
 81 |     }
 82 | }
 83 | 
 84 | impl<'a> MailHeaderMap for Headers<'a> {
 85 |     /// # Examples
 86 |     /// ```
 87 |     ///     use mailparse::{parse_mail, MailHeaderMap, headers::Headers};
 88 |     ///     let mail = parse_mail(concat!(
 89 |     ///             "Subject: Test\n",
 90 |     ///             "\n",
 91 |     ///             "This is a test message").as_bytes())
 92 |     ///         .unwrap();
 93 |     ///     assert_eq!(mail.get_headers().get_first_value("Subject"), Some("Test".to_string()));
 94 |     /// ```
 95 |     fn get_first_value(&self, key: &str) -> Option<String> {
 96 |         self.headers.get_first_value(key)
 97 |     }
 98 | 
 99 |     fn get_first_header(&self, key: &str) -> Option<&MailHeader> {
100 |         self.headers.get_first_header(key)
101 |     }
102 | 
103 |     /// # Examples
104 |     /// ```
105 |     ///     use mailparse::{parse_mail, MailHeaderMap, headers::Headers};
106 |     ///     let mail = parse_mail(concat!(
107 |     ///             "Key: Value1\n",
108 |     ///             "Key: Value2").as_bytes())
109 |     ///         .unwrap();
110 |     ///     assert_eq!(mail.get_headers().get_all_values("Key"),
111 |     ///         vec!["Value1".to_string(), "Value2".to_string()]);
112 |     /// ```
113 |     fn get_all_values(&self, key: &str) -> Vec<String> {
114 |         self.headers.get_all_values(key)
115 |     }
116 | 
117 |     fn get_all_headers(&self, key: &str) -> Vec<&MailHeader> {
118 |         self.headers.get_all_headers(key)
119 |     }
120 | }
121 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
   1 | #![forbid(unsafe_code)]
   2 | 
   3 | extern crate charset;
   4 | extern crate data_encoding;
   5 | extern crate quoted_printable;
   6 | 
   7 | use std::borrow::Cow;
   8 | use std::collections::{BTreeMap, HashMap};
   9 | use std::error;
  10 | use std::fmt;
  11 | 
  12 | use charset::{decode_latin1, Charset};
  13 | 
  14 | mod addrparse;
  15 | pub mod body;
  16 | mod dateparse;
  17 | mod header;
  18 | pub mod headers;
  19 | mod msgidparse;
  20 | 
  21 | pub use crate::addrparse::{
  22 |     addrparse, addrparse_header, GroupInfo, MailAddr, MailAddrList, SingleInfo,
  23 | };
  24 | use crate::body::Body;
  25 | pub use crate::dateparse::dateparse;
  26 | use crate::header::HeaderToken;
  27 | use crate::headers::Headers;
  28 | pub use crate::msgidparse::{msgidparse, MessageIdList};
  29 | 
  30 | /// An error type that represents the different kinds of errors that may be
  31 | /// encountered during message parsing.
  32 | #[derive(Debug)]
  33 | pub enum MailParseError {
  34 |     /// Data that was specified as being in the quoted-printable transfer-encoding
  35 |     /// could not be successfully decoded as quoted-printable data.
  36 |     QuotedPrintableDecodeError(quoted_printable::QuotedPrintableError),
  37 |     /// Data that was specified as being in the base64 transfer-encoding could
  38 |     /// not be successfully decoded as base64 data.
  39 |     Base64DecodeError(data_encoding::DecodeError),
  40 |     /// An error occurred when converting the raw byte data to Rust UTF-8 string
  41 |     /// format using the charset specified in the message.
  42 |     EncodingError(std::borrow::Cow<'static, str>),
  43 |     /// Some other error occurred while parsing the message; the description string
  44 |     /// provides additional details.
  45 |     Generic(&'static str),
  46 | }
  47 | 
  48 | impl fmt::Display for MailParseError {
  49 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
  50 |         match *self {
  51 |             MailParseError::QuotedPrintableDecodeError(ref err) => {
  52 |                 write!(f, "QuotedPrintable decode error: {}", err)
  53 |             }
  54 |             MailParseError::Base64DecodeError(ref err) => write!(f, "Base64 decode error: {}", err),
  55 |             MailParseError::EncodingError(ref err) => write!(f, "Encoding error: {}", err),
  56 |             MailParseError::Generic(ref description) => write!(f, "{}", description),
  57 |         }
  58 |     }
  59 | }
  60 | 
  61 | impl error::Error for MailParseError {
  62 |     fn cause(&self) -> Option<&dyn error::Error> {
  63 |         match *self {
  64 |             MailParseError::QuotedPrintableDecodeError(ref err) => Some(err),
  65 |             MailParseError::Base64DecodeError(ref err) => Some(err),
  66 |             _ => None,
  67 |         }
  68 |     }
  69 | 
  70 |     fn source(&self) -> Option<&(dyn error::Error + 'static)> {
  71 |         match *self {
  72 |             MailParseError::QuotedPrintableDecodeError(ref err) => Some(err),
  73 |             MailParseError::Base64DecodeError(ref err) => Some(err),
  74 |             _ => None,
  75 |         }
  76 |     }
  77 | }
  78 | 
  79 | impl From<quoted_printable::QuotedPrintableError> for MailParseError {
  80 |     fn from(err: quoted_printable::QuotedPrintableError) -> MailParseError {
  81 |         MailParseError::QuotedPrintableDecodeError(err)
  82 |     }
  83 | }
  84 | 
  85 | impl From<data_encoding::DecodeError> for MailParseError {
  86 |     fn from(err: data_encoding::DecodeError) -> MailParseError {
  87 |         MailParseError::Base64DecodeError(err)
  88 |     }
  89 | }
  90 | 
  91 | impl From<std::borrow::Cow<'static, str>> for MailParseError {
  92 |     fn from(err: std::borrow::Cow<'static, str>) -> MailParseError {
  93 |         MailParseError::EncodingError(err)
  94 |     }
  95 | }
  96 | 
  97 | /// A struct that represents a single header in the message.
  98 | /// It holds slices into the raw byte array passed to parse_mail, and so the
  99 | /// lifetime of this struct must be contained within the lifetime of the raw
 100 | /// input. There are additional accessor functions on this struct to extract
 101 | /// the data as Rust strings.
 102 | pub struct MailHeader<'a> {
 103 |     key: &'a [u8],
 104 |     value: &'a [u8],
 105 | }
 106 | 
 107 | /// Custom Debug trait for better formatting and printing of MailHeader items.
 108 | impl<'a> fmt::Debug for MailHeader<'a> {
 109 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 110 |         f.debug_struct("MailHeader")
 111 |             .field("key", &String::from_utf8_lossy(self.key))
 112 |             .field("value", &String::from_utf8_lossy(self.value))
 113 |             .finish()
 114 |     }
 115 | }
 116 | 
 117 | pub(crate) fn find_from(line: &str, ix_start: usize, key: &str) -> Option<usize> {
 118 |     line[ix_start..].find(key).map(|v| ix_start + v)
 119 | }
 120 | 
 121 | fn find_from_u8(line: &[u8], ix_start: usize, key: &[u8]) -> Option<usize> {
 122 |     assert!(!key.is_empty());
 123 |     assert!(ix_start <= line.len());
 124 |     if line.len() < key.len() {
 125 |         return None;
 126 |     }
 127 |     let ix_end = line.len() - key.len();
 128 |     if ix_start <= ix_end {
 129 |         for i in ix_start..=ix_end {
 130 |             if line[i] == key[0] {
 131 |                 let mut success = true;
 132 |                 for j in 1..key.len() {
 133 |                     if line[i + j] != key[j] {
 134 |                         success = false;
 135 |                         break;
 136 |                     }
 137 |                 }
 138 |                 if success {
 139 |                     return Some(i);
 140 |                 }
 141 |             }
 142 |         }
 143 |     }
 144 |     None
 145 | }
 146 | 
 147 | #[test]
 148 | fn test_find_from_u8() {
 149 |     assert_eq!(find_from_u8(b"hello world", 0, b"hell"), Some(0));
 150 |     assert_eq!(find_from_u8(b"hello world", 0, b"o"), Some(4));
 151 |     assert_eq!(find_from_u8(b"hello world", 4, b"o"), Some(4));
 152 |     assert_eq!(find_from_u8(b"hello world", 5, b"o"), Some(7));
 153 |     assert_eq!(find_from_u8(b"hello world", 8, b"o"), None);
 154 |     assert_eq!(find_from_u8(b"hello world", 10, b"d"), Some(10));
 155 |     assert_eq!(find_from_u8(b"hello world", 0, b"world"), Some(6));
 156 | }
 157 | 
 158 | // Like find_from_u8, but additionally filters such that `key` is at the start
 159 | // of a line (preceded by `\n`) or at the start of the search space.
 160 | fn find_from_u8_line_prefix(line: &[u8], ix_start: usize, key: &[u8]) -> Option<usize> {
 161 |     let mut start = ix_start;
 162 |     while let Some(ix) = find_from_u8(line, start, key) {
 163 |         if ix == ix_start || line[ix - 1] == b'\n' {
 164 |             return Some(ix);
 165 |         }
 166 |         start = ix + 1;
 167 |     }
 168 |     None
 169 | }
 170 | 
 171 | #[test]
 172 | fn test_find_from_u8_line_prefix() {
 173 |     assert_eq!(find_from_u8_line_prefix(b"hello world", 0, b"he"), Some(0));
 174 |     assert_eq!(find_from_u8_line_prefix(b"hello\nhello", 0, b"he"), Some(0));
 175 |     assert_eq!(find_from_u8_line_prefix(b"hello\nhello", 1, b"he"), Some(6));
 176 |     assert_eq!(find_from_u8_line_prefix(b"hello world", 0, b"wo"), None);
 177 |     assert_eq!(find_from_u8_line_prefix(b"hello\nworld", 0, b"wo"), Some(6));
 178 |     assert_eq!(find_from_u8_line_prefix(b"hello\nworld", 6, b"wo"), Some(6));
 179 |     assert_eq!(find_from_u8_line_prefix(b"hello\nworld", 7, b"wo"), None);
 180 |     assert_eq!(
 181 |         find_from_u8_line_prefix(b"hello\nworld", 0, b"world"),
 182 |         Some(6)
 183 |     );
 184 | }
 185 | 
 186 | impl<'a> MailHeader<'a> {
 187 |     /// Get the name of the header. Note that header names are case-insensitive.
 188 |     /// Prefer using get_key_ref where possible for better performance.
 189 |     pub fn get_key(&self) -> String {
 190 |         decode_latin1(self.key).into_owned()
 191 |     }
 192 | 
 193 |     /// Get the name of the header, borrowing if it's ASCII-only.
 194 |     /// Note that header names are case-insensitive.
 195 |     pub fn get_key_ref(&self) -> Cow<str> {
 196 |         decode_latin1(self.key)
 197 |     }
 198 | 
 199 |     pub(crate) fn decode_utf8_or_latin1(&'a self) -> Cow<'a, str> {
 200 |         // RFC 6532 says that header values can be UTF-8. Let's try that first, and
 201 |         // fall back to latin1 if that fails, for better backwards-compatibility with
 202 |         // older versions of this library that didn't try UTF-8.
 203 |         match std::str::from_utf8(self.value) {
 204 |             Ok(s) => Cow::Borrowed(s),
 205 |             Err(_) => decode_latin1(self.value),
 206 |         }
 207 |     }
 208 | 
 209 |     /// Get the value of the header. Any sequences of newlines characters followed
 210 |     /// by whitespace are collapsed into a single space. In effect, header values
 211 |     /// wrapped across multiple lines are compacted back into one line, while
 212 |     /// discarding the extra whitespace required by the MIME format. Additionally,
 213 |     /// any quoted-printable words in the value are decoded.
 214 |     /// Note that this function attempts to decode the header value bytes as UTF-8
 215 |     /// first, and falls back to Latin-1 if the UTF-8 decoding fails. This attempts
 216 |     /// to be compliant with both RFC 6532 as well as older versions of this library.
 217 |     /// To avoid the Latin-1 fallback decoding, which may end up returning "garbage",
 218 |     /// prefer using the get_value_utf8 function instead, which will fail and return
 219 |     /// an error instead of falling back to Latin-1.
 220 |     ///
 221 |     /// # Examples
 222 |     /// ```
 223 |     ///     use mailparse::parse_header;
 224 |     ///     let (parsed, _) = parse_header(b"Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=").unwrap();
 225 |     ///     assert_eq!(parsed.get_key(), "Subject");
 226 |     ///     assert_eq!(parsed.get_value(), "\u{a1}Hola, se\u{f1}or!");
 227 |     /// ```
 228 |     pub fn get_value(&self) -> String {
 229 |         let chars = self.decode_utf8_or_latin1();
 230 |         self.normalize_header(chars)
 231 |     }
 232 | 
 233 |     fn normalize_header(&'a self, chars: Cow<'a, str>) -> String {
 234 |         let mut result = String::new();
 235 | 
 236 |         for tok in header::normalized_tokens(&chars) {
 237 |             match tok {
 238 |                 HeaderToken::Text(t) => {
 239 |                     result.push_str(t);
 240 |                 }
 241 |                 HeaderToken::Whitespace(ws) => {
 242 |                     result.push_str(ws);
 243 |                 }
 244 |                 HeaderToken::Newline(Some(ws)) => {
 245 |                     result.push_str(&ws);
 246 |                 }
 247 |                 HeaderToken::Newline(None) => {}
 248 |                 HeaderToken::DecodedWord(dw) => {
 249 |                     result.push_str(&dw);
 250 |                 }
 251 |             }
 252 |         }
 253 | 
 254 |         result
 255 |     }
 256 | 
 257 |     /// Get the value of the header. Any sequences of newlines characters followed
 258 |     /// by whitespace are collapsed into a single space. In effect, header values
 259 |     /// wrapped across multiple lines are compacted back into one line, while
 260 |     /// discarding the extra whitespace required by the MIME format. Additionally,
 261 |     /// any quoted-printable words in the value are decoded. As per RFC 6532, this
 262 |     /// function assumes the raw header value is encoded as UTF-8, and does that
 263 |     /// decoding prior to tokenization and other processing. An EncodingError is
 264 |     /// returned if the raw header value cannot be decoded as UTF-8.
 265 |     ///
 266 |     /// # Examples
 267 |     /// ```
 268 |     ///     use mailparse::parse_header;
 269 |     ///     let (parsed, _) = parse_header(b"Subject: \xC2\xA1Hola, se\xC3\xB1or!").unwrap();
 270 |     ///     assert_eq!(parsed.get_key(), "Subject");
 271 |     ///     assert_eq!(parsed.get_value(), "\u{a1}Hola, se\u{f1}or!");
 272 |     /// ```
 273 |     pub fn get_value_utf8(&self) -> Result<String, MailParseError> {
 274 |         let chars = std::str::from_utf8(self.value).map_err(|_| {
 275 |             MailParseError::EncodingError(Cow::Borrowed("Invalid UTF-8 in header value"))
 276 |         })?;
 277 |         Ok(self.normalize_header(Cow::Borrowed(chars)))
 278 |     }
 279 | 
 280 |     /// Get the raw, unparsed value of the header key.
 281 |     ///
 282 |     /// # Examples
 283 |     /// ```
 284 |     ///     use mailparse::parse_header;
 285 |     ///     let (parsed, _) = parse_header(b"SuBJect : =?iso-8859-1?Q?=A1Hola,_se=F1or!?=").unwrap();
 286 |     ///     assert_eq!(parsed.get_key_raw(), "SuBJect ".as_bytes());
 287 |     /// ```
 288 |     pub fn get_key_raw(&self) -> &[u8] {
 289 |         self.key
 290 |     }
 291 | 
 292 |     /// Get the raw, unparsed value of the header value.
 293 |     ///
 294 |     /// # Examples
 295 |     /// ```
 296 |     ///     use mailparse::parse_header;
 297 |     ///     let (parsed, _) = parse_header(b"Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=").unwrap();
 298 |     ///     assert_eq!(parsed.get_key(), "Subject");
 299 |     ///     assert_eq!(parsed.get_value_raw(), "=?iso-8859-1?Q?=A1Hola,_se=F1or!?=".as_bytes());
 300 |     /// ```
 301 |     pub fn get_value_raw(&self) -> &[u8] {
 302 |         self.value
 303 |     }
 304 | }
 305 | 
 306 | #[derive(Debug)]
 307 | enum HeaderParseState {
 308 |     Initial,
 309 |     Key,
 310 |     PreValue,
 311 |     Value,
 312 |     ValueNewline,
 313 | }
 314 | 
 315 | /// Parse a single header from the raw data given.
 316 | /// This function takes raw byte data, and starts parsing it, expecting there
 317 | /// to be a MIME header key-value pair right at the beginning. It parses that
 318 | /// header and returns it, along with the index at which the next header is
 319 | /// expected to start. If you just want to parse a single header, you can ignore
 320 | /// the second component of the tuple, which is the index of the next header.
 321 | /// Error values are returned if the data could not be successfully interpreted
 322 | /// as a MIME key-value pair.
 323 | ///
 324 | /// # Examples
 325 | /// ```
 326 | ///     use mailparse::parse_header;
 327 | ///     let (parsed, _) = parse_header(concat!(
 328 | ///             "Subject: Hello, sir,\n",
 329 | ///             "   I am multiline\n",
 330 | ///             "Next:Header").as_bytes())
 331 | ///         .unwrap();
 332 | ///     assert_eq!(parsed.get_key(), "Subject");
 333 | ///     assert_eq!(parsed.get_value(), "Hello, sir, I am multiline");
 334 | /// ```
 335 | pub fn parse_header(raw_data: &[u8]) -> Result<(MailHeader, usize), MailParseError> {
 336 |     let mut it = raw_data.iter();
 337 |     let mut ix = 0;
 338 |     let mut c = match it.next() {
 339 |         None => return Err(MailParseError::Generic("Empty string provided")),
 340 |         Some(v) => *v,
 341 |     };
 342 | 
 343 |     let mut ix_key_end = None;
 344 |     let mut ix_value_start = 0;
 345 |     let mut ix_value_end = 0;
 346 | 
 347 |     let mut state = HeaderParseState::Initial;
 348 |     loop {
 349 |         match state {
 350 |             HeaderParseState::Initial => {
 351 |                 if c == b' ' {
 352 |                     return Err(MailParseError::Generic(
 353 |                         "Header cannot start with a space; it is \
 354 |                          likely an overhanging line from a \
 355 |                          previous header",
 356 |                     ));
 357 |                 };
 358 |                 state = HeaderParseState::Key;
 359 |                 continue;
 360 |             }
 361 |             HeaderParseState::Key => {
 362 |                 if c == b':' {
 363 |                     ix_key_end = Some(ix);
 364 |                     state = HeaderParseState::PreValue;
 365 |                 } else if c == b'\n' {
 366 |                     // Technically this is invalid. We'll handle it gracefully
 367 |                     // since it does appear to happen in the wild and other
 368 |                     // MTAs deal with it. Our handling is to just treat everything
 369 |                     // encountered so far on this line as the header key, and
 370 |                     // leave the value empty.
 371 |                     ix_key_end = Some(ix);
 372 |                     ix_value_start = ix;
 373 |                     ix_value_end = ix;
 374 |                     ix += 1;
 375 |                     break;
 376 |                 }
 377 |             }
 378 |             HeaderParseState::PreValue => {
 379 |                 if c != b' ' {
 380 |                     ix_value_start = ix;
 381 |                     ix_value_end = ix;
 382 |                     state = HeaderParseState::Value;
 383 |                     continue;
 384 |                 }
 385 |             }
 386 |             HeaderParseState::Value => {
 387 |                 if c == b'\n' {
 388 |                     state = HeaderParseState::ValueNewline;
 389 |                 } else if c != b'\r' {
 390 |                     ix_value_end = ix + 1;
 391 |                 }
 392 |             }
 393 |             HeaderParseState::ValueNewline => {
 394 |                 if c == b' ' || c == b'\t' {
 395 |                     state = HeaderParseState::Value;
 396 |                     continue;
 397 |                 } else {
 398 |                     break;
 399 |                 }
 400 |             }
 401 |         }
 402 |         ix += 1;
 403 |         c = match it.next() {
 404 |             None => break,
 405 |             Some(v) => *v,
 406 |         };
 407 |     }
 408 |     match ix_key_end {
 409 |         Some(v) => Ok((
 410 |             MailHeader {
 411 |                 key: &raw_data[0..v],
 412 |                 value: &raw_data[ix_value_start..ix_value_end],
 413 |             },
 414 |             ix,
 415 |         )),
 416 | 
 417 |         None => Ok((
 418 |             // Technically this is invalid. We'll handle it gracefully
 419 |             // since we handle the analogous situation above. Our handling
 420 |             // is to just treat everything encountered on this line as
 421 |             // the header key, and leave the value empty.
 422 |             MailHeader {
 423 |                 key: &raw_data[0..ix],
 424 |                 value: &raw_data[ix..ix],
 425 |             },
 426 |             ix,
 427 |         )),
 428 |     }
 429 | }
 430 | 
 431 | /// A trait that is implemented by the [MailHeader] slice. These functions are
 432 | /// also available on Vec<MailHeader> which is returned by the parse_headers
 433 | /// function. It provides a map-like interface to look up header values by their
 434 | /// name.
 435 | pub trait MailHeaderMap {
 436 |     /// Look through the list of headers and return the value of the first one
 437 |     /// that matches the provided key. It returns Ok(None) if the no matching
 438 |     /// header was found. Header names are matched case-insensitively.
 439 |     ///
 440 |     /// # Examples
 441 |     /// ```
 442 |     ///     use mailparse::{parse_mail, MailHeaderMap};
 443 |     ///     let headers = parse_mail(concat!(
 444 |     ///             "Subject: Test\n",
 445 |     ///             "\n",
 446 |     ///             "This is a test message").as_bytes())
 447 |     ///         .unwrap().headers;
 448 |     ///     assert_eq!(headers.get_first_value("Subject"), Some("Test".to_string()));
 449 |     /// ```
 450 |     fn get_first_value(&self, key: &str) -> Option<String>;
 451 | 
 452 |     /// Similar to `get_first_value`, except it returns a reference to the
 453 |     /// MailHeader struct instead of just extracting the value.
 454 |     fn get_first_header(&self, key: &str) -> Option<&MailHeader>;
 455 | 
 456 |     /// Look through the list of headers and return the values of all headers
 457 |     /// matching the provided key. Returns an empty vector if no matching headers
 458 |     /// were found. The order of the returned values is the same as the order
 459 |     /// of the matching headers in the message. Header names are matched
 460 |     /// case-insensitively.
 461 |     ///
 462 |     /// # Examples
 463 |     /// ```
 464 |     ///     use mailparse::{parse_mail, MailHeaderMap};
 465 |     ///     let headers = parse_mail(concat!(
 466 |     ///             "Key: Value1\n",
 467 |     ///             "Key: Value2").as_bytes())
 468 |     ///         .unwrap().headers;
 469 |     ///     assert_eq!(headers.get_all_values("Key"),
 470 |     ///         vec!["Value1".to_string(), "Value2".to_string()]);
 471 |     /// ```
 472 |     fn get_all_values(&self, key: &str) -> Vec<String>;
 473 | 
 474 |     /// Similar to `get_all_values`, except it returns references to the
 475 |     /// MailHeader structs instead of just extracting the values.
 476 |     fn get_all_headers(&self, key: &str) -> Vec<&MailHeader>;
 477 | }
 478 | 
 479 | impl<'a> MailHeaderMap for [MailHeader<'a>] {
 480 |     fn get_first_value(&self, key: &str) -> Option<String> {
 481 |         for x in self {
 482 |             if x.get_key_ref().eq_ignore_ascii_case(key) {
 483 |                 return Some(x.get_value());
 484 |             }
 485 |         }
 486 |         None
 487 |     }
 488 | 
 489 |     fn get_first_header(&self, key: &str) -> Option<&MailHeader> {
 490 |         self.iter()
 491 |             .find(|&x| x.get_key_ref().eq_ignore_ascii_case(key))
 492 |     }
 493 | 
 494 |     fn get_all_values(&self, key: &str) -> Vec<String> {
 495 |         let mut values: Vec<String> = Vec::new();
 496 |         for x in self {
 497 |             if x.get_key_ref().eq_ignore_ascii_case(key) {
 498 |                 values.push(x.get_value());
 499 |             }
 500 |         }
 501 |         values
 502 |     }
 503 | 
 504 |     fn get_all_headers(&self, key: &str) -> Vec<&MailHeader> {
 505 |         let mut headers: Vec<&MailHeader> = Vec::new();
 506 |         for x in self {
 507 |             if x.get_key_ref().eq_ignore_ascii_case(key) {
 508 |                 headers.push(x);
 509 |             }
 510 |         }
 511 |         headers
 512 |     }
 513 | }
 514 | 
 515 | /// Parses all the headers from the raw data given.
 516 | /// This function takes raw byte data, and starts parsing it, expecting there
 517 | /// to be zero or more MIME header key-value pair right at the beginning,
 518 | /// followed by two consecutive newlines (i.e. a blank line). It parses those
 519 | /// headers and returns them in a vector. The normal vector functions can be
 520 | /// used to access the headers linearly, or the MailHeaderMap trait can be used
 521 | /// to access them in a map-like fashion. Along with this vector, the function
 522 | /// returns the index at which the message body is expected to start. If you
 523 | /// just care about the headers, you can ignore the second component of the
 524 | /// returned tuple.
 525 | /// Error values are returned if there was some sort of parsing error.
 526 | ///
 527 | /// # Examples
 528 | /// ```
 529 | ///     use mailparse::{parse_headers, MailHeaderMap};
 530 | ///     let (headers, _) = parse_headers(concat!(
 531 | ///             "Subject: Test\n",
 532 | ///             "From: me@myself.com\n",
 533 | ///             "To: you@yourself.com").as_bytes())
 534 | ///         .unwrap();
 535 | ///     assert_eq!(headers[1].get_key(), "From");
 536 | ///     assert_eq!(headers.get_first_value("To"), Some("you@yourself.com".to_string()));
 537 | /// ```
 538 | pub fn parse_headers(raw_data: &[u8]) -> Result<(Vec<MailHeader>, usize), MailParseError> {
 539 |     let mut headers: Vec<MailHeader> = Vec::new();
 540 |     let mut ix = 0;
 541 |     loop {
 542 |         if ix >= raw_data.len() {
 543 |             break;
 544 |         } else if raw_data[ix] == b'\n' {
 545 |             ix += 1;
 546 |             break;
 547 |         } else if raw_data[ix] == b'\r' {
 548 |             if ix + 1 < raw_data.len() && raw_data[ix + 1] == b'\n' {
 549 |                 ix += 2;
 550 |                 break;
 551 |             } else {
 552 |                 return Err(MailParseError::Generic(
 553 |                     "Headers were followed by an unexpected lone \
 554 |                      CR character!",
 555 |                 ));
 556 |             }
 557 |         }
 558 |         let (header, ix_next) = parse_header(&raw_data[ix..])?;
 559 |         headers.push(header);
 560 |         ix += ix_next;
 561 |     }
 562 |     Ok((headers, ix))
 563 | }
 564 | 
 565 | /// A struct to hold a more structured representation of the Content-Type header.
 566 | /// This is provided mostly as a convenience since this metadata is usually
 567 | /// needed to interpret the message body properly.
 568 | #[derive(Debug)]
 569 | pub struct ParsedContentType {
 570 |     /// The type of the data, for example "text/plain" or "application/pdf".
 571 |     pub mimetype: String,
 572 |     /// The charset used to decode the raw byte data, for example "iso-8859-1"
 573 |     /// or "utf-8".
 574 |     pub charset: String,
 575 |     /// The additional params of Content-Type, e.g. filename and boundary. The
 576 |     /// keys in the map will be lowercased, and the values will have any
 577 |     /// enclosing quotes stripped.
 578 |     pub params: BTreeMap<String, String>,
 579 | }
 580 | 
 581 | impl Default for ParsedContentType {
 582 |     fn default() -> Self {
 583 |         ParsedContentType {
 584 |             mimetype: "text/plain".to_string(),
 585 |             charset: "us-ascii".to_string(),
 586 |             params: BTreeMap::new(),
 587 |         }
 588 |     }
 589 | }
 590 | 
 591 | impl ParsedContentType {
 592 |     fn default_conditional(in_multipart_digest: bool) -> Self {
 593 |         let mut default = Self::default();
 594 |         if in_multipart_digest {
 595 |             default.mimetype = "message/rfc822".to_string();
 596 |         }
 597 |         default
 598 |     }
 599 | }
 600 | 
 601 | /// Helper method to parse a header value as a Content-Type header. Note that
 602 | /// the returned object's `params` map will contain a charset key if a charset
 603 | /// was explicitly specified in the header; otherwise the `params` map will not
 604 | /// contain a charset key. Regardless, the `charset` field will contain a
 605 | /// charset - either the one explicitly specified or the default of "us-ascii".
 606 | ///
 607 | /// # Examples
 608 | /// ```
 609 | ///     use mailparse::{parse_header, parse_content_type};
 610 | ///     let (parsed, _) = parse_header(
 611 | ///             b"Content-Type: text/html; charset=foo; boundary=\"quotes_are_removed\"")
 612 | ///         .unwrap();
 613 | ///     let ctype = parse_content_type(&parsed.get_value());
 614 | ///     assert_eq!(ctype.mimetype, "text/html");
 615 | ///     assert_eq!(ctype.charset, "foo");
 616 | ///     assert_eq!(ctype.params.get("boundary"), Some(&"quotes_are_removed".to_string()));
 617 | ///     assert_eq!(ctype.params.get("charset"), Some(&"foo".to_string()));
 618 | /// ```
 619 | /// ```
 620 | ///     use mailparse::{parse_header, parse_content_type};
 621 | ///     let (parsed, _) = parse_header(b"Content-Type: bogus").unwrap();
 622 | ///     let ctype = parse_content_type(&parsed.get_value());
 623 | ///     assert_eq!(ctype.mimetype, "bogus");
 624 | ///     assert_eq!(ctype.charset, "us-ascii");
 625 | ///     assert_eq!(ctype.params.get("boundary"), None);
 626 | ///     assert_eq!(ctype.params.get("charset"), None);
 627 | /// ```
 628 | /// ```
 629 | ///     use mailparse::{parse_header, parse_content_type};
 630 | ///     let (parsed, _) = parse_header(br#"Content-Type: application/octet-stream;name="=?utf8?B?6L+O5ai255m95a+M576O?=";charset="utf8""#).unwrap();
 631 | ///     let ctype = parse_content_type(&parsed.get_value());
 632 | ///     assert_eq!(ctype.mimetype, "application/octet-stream");
 633 | ///     assert_eq!(ctype.charset, "utf8");
 634 | ///     assert_eq!(ctype.params.get("boundary"), None);
 635 | ///     assert_eq!(ctype.params.get("name"), Some(&"迎娶白富美".to_string()));
 636 | /// ```
 637 | pub fn parse_content_type(header: &str) -> ParsedContentType {
 638 |     let params = parse_param_content(header);
 639 |     let mimetype = params.value.to_lowercase();
 640 |     let charset = params
 641 |         .params
 642 |         .get("charset")
 643 |         .cloned()
 644 |         .unwrap_or_else(|| "us-ascii".to_string());
 645 | 
 646 |     ParsedContentType {
 647 |         mimetype,
 648 |         charset,
 649 |         params: params.params,
 650 |     }
 651 | }
 652 | 
 653 | /// The possible disposition types in a Content-Disposition header. A more
 654 | /// comprehensive list of IANA-recognized types can be found at
 655 | /// https://www.iana.org/assignments/cont-disp/cont-disp.xhtml. This library
 656 | /// only enumerates the types most commonly found in email messages, and
 657 | /// provides the `Extension` value for holding all other types.
 658 | #[derive(Debug, Clone, PartialEq)]
 659 | pub enum DispositionType {
 660 |     /// Default value, indicating the content is to be displayed inline as
 661 |     /// part of the enclosing document.
 662 |     Inline,
 663 |     /// A disposition indicating the content is not meant for inline display,
 664 |     /// but whose content can be accessed for use.
 665 |     Attachment,
 666 |     /// A disposition indicating the content contains a form submission.
 667 |     FormData,
 668 |     /// Extension type to hold any disposition not explicitly enumerated.
 669 |     Extension(String),
 670 | }
 671 | 
 672 | impl Default for DispositionType {
 673 |     fn default() -> Self {
 674 |         DispositionType::Inline
 675 |     }
 676 | }
 677 | 
 678 | /// Convert the string represented disposition type to enum.
 679 | fn parse_disposition_type(disposition: &str) -> DispositionType {
 680 |     match &disposition.to_lowercase()[..] {
 681 |         "inline" => DispositionType::Inline,
 682 |         "attachment" => DispositionType::Attachment,
 683 |         "form-data" => DispositionType::FormData,
 684 |         extension => DispositionType::Extension(extension.to_string()),
 685 |     }
 686 | }
 687 | 
 688 | /// A struct to hold a more structured representation of the Content-Disposition header.
 689 | /// This is provided mostly as a convenience since this metadata is usually
 690 | /// needed to interpret the message body properly.
 691 | #[derive(Debug, Default)]
 692 | pub struct ParsedContentDisposition {
 693 |     /// The disposition type of the Content-Disposition header. If this
 694 |     /// is an extension type, the string will be lowercased.
 695 |     pub disposition: DispositionType,
 696 |     /// The additional params of Content-Disposition, e.g. filename. The
 697 |     /// keys in the map will be lowercased, and the values will have any
 698 |     /// enclosing quotes stripped.
 699 |     pub params: BTreeMap<String, String>,
 700 | }
 701 | 
 702 | /// Helper method to parse a header value as a Content-Disposition header. The disposition
 703 | /// defaults to "inline" if no disposition parameter is provided in the header
 704 | /// value.
 705 | ///
 706 | /// # Examples
 707 | /// ```
 708 | ///     use mailparse::{parse_header, parse_content_disposition, DispositionType};
 709 | ///     let (parsed, _) = parse_header(
 710 | ///             b"Content-Disposition: attachment; filename=\"yummy dummy\"")
 711 | ///         .unwrap();
 712 | ///     let dis = parse_content_disposition(&parsed.get_value());
 713 | ///     assert_eq!(dis.disposition, DispositionType::Attachment);
 714 | ///     assert_eq!(dis.params.get("name"), None);
 715 | ///     assert_eq!(dis.params.get("filename"), Some(&"yummy dummy".to_string()));
 716 | /// ```
 717 | pub fn parse_content_disposition(header: &str) -> ParsedContentDisposition {
 718 |     let params = parse_param_content(header);
 719 |     let disposition = parse_disposition_type(&params.value);
 720 |     ParsedContentDisposition {
 721 |         disposition,
 722 |         params: params.params,
 723 |     }
 724 | }
 725 | 
 726 | /// Struct that holds the structured representation of the message. Note that
 727 | /// since MIME allows for nested multipart messages, a tree-like structure is
 728 | /// necessary to represent it properly. This struct accomplishes that by holding
 729 | /// a vector of other ParsedMail structures for the subparts.
 730 | #[derive(Debug)]
 731 | pub struct ParsedMail<'a> {
 732 |     /// The raw bytes that make up this message (or subpart).
 733 |     pub raw_bytes: &'a [u8],
 734 |     /// The raw bytes that make up the header block for this message (or subpart).
 735 |     header_bytes: &'a [u8],
 736 |     /// The headers for the message (or message subpart).
 737 |     pub headers: Vec<MailHeader<'a>>,
 738 |     /// The Content-Type information for the message (or message subpart).
 739 |     pub ctype: ParsedContentType,
 740 |     /// The raw bytes that make up the body of the message (or message subpart).
 741 |     body_bytes: &'a [u8],
 742 |     /// The subparts of this message or subpart. This vector is only non-empty
 743 |     /// if ctype.mimetype starts with "multipart/".
 744 |     pub subparts: Vec<ParsedMail<'a>>,
 745 | }
 746 | 
 747 | impl<'a> ParsedMail<'a> {
 748 |     /// Get the body of the message as a Rust string. This function tries to
 749 |     /// unapply the Content-Transfer-Encoding if there is one, and then converts
 750 |     /// the result into a Rust UTF-8 string using the charset in the Content-Type
 751 |     /// (or "us-ascii" if the charset was missing or not recognized). Note that
 752 |     /// in some cases the body may be binary data that doesn't make sense as a
 753 |     /// Rust string - it is up to the caller to handle those cases gracefully.
 754 |     /// These cases may occur in particular when the body is of a "binary"
 755 |     /// Content-Transfer-Encoding (i.e. where `get_body_encoded()` returns a
 756 |     /// `Body::Binary` variant) but may also occur in other cases because of the
 757 |     /// messiness of the real world and non-compliant mail implementations.
 758 |     ///
 759 |     /// # Examples
 760 |     /// ```
 761 |     ///     use mailparse::parse_mail;
 762 |     ///     let p = parse_mail(concat!(
 763 |     ///             "Subject: test\n",
 764 |     ///             "\n",
 765 |     ///             "This is the body").as_bytes())
 766 |     ///         .unwrap();
 767 |     ///     assert_eq!(p.get_body().unwrap(), "This is the body");
 768 |     /// ```
 769 |     pub fn get_body(&self) -> Result<String, MailParseError> {
 770 |         match self.get_body_encoded() {
 771 |             Body::Base64(body) | Body::QuotedPrintable(body) => body.get_decoded_as_string(),
 772 |             Body::SevenBit(body) | Body::EightBit(body) => body.get_as_string(),
 773 |             Body::Binary(body) => body.get_as_string(),
 774 |         }
 775 |     }
 776 | 
 777 |     /// Get the body of the message as a Rust Vec<u8>. This function tries to
 778 |     /// unapply the Content-Transfer-Encoding if there is one, but won't do
 779 |     /// any charset decoding.
 780 |     ///
 781 |     /// # Examples
 782 |     /// ```
 783 |     ///     use mailparse::parse_mail;
 784 |     ///     let p = parse_mail(concat!(
 785 |     ///             "Subject: test\n",
 786 |     ///             "\n",
 787 |     ///             "This is the body").as_bytes())
 788 |     ///         .unwrap();
 789 |     ///     assert_eq!(p.get_body_raw().unwrap(), b"This is the body");
 790 |     /// ```
 791 |     pub fn get_body_raw(&self) -> Result<Vec<u8>, MailParseError> {
 792 |         match self.get_body_encoded() {
 793 |             Body::Base64(body) | Body::QuotedPrintable(body) => body.get_decoded(),
 794 |             Body::SevenBit(body) | Body::EightBit(body) => Ok(Vec::<u8>::from(body.get_raw())),
 795 |             Body::Binary(body) => Ok(Vec::<u8>::from(body.get_raw())),
 796 |         }
 797 |     }
 798 | 
 799 |     /// Get the body of the message.
 800 |     /// This function returns the original body without attempting to
 801 |     /// unapply the Content-Transfer-Encoding. The returned object
 802 |     /// contains information that allows the caller to control decoding
 803 |     /// as desired.
 804 |     ///
 805 |     /// # Examples
 806 |     /// ```
 807 |     ///     use mailparse::parse_mail;
 808 |     ///     use mailparse::body::Body;
 809 |     ///
 810 |     ///     let mail = parse_mail(b"Content-Transfer-Encoding: base64\r\n\r\naGVsbG 8gd\r\n29ybGQ=").unwrap();
 811 |     ///
 812 |     ///     match mail.get_body_encoded() {
 813 |     ///         Body::Base64(body) => {
 814 |     ///             assert_eq!(body.get_raw(), b"aGVsbG 8gd\r\n29ybGQ=");
 815 |     ///             assert_eq!(body.get_decoded().unwrap(), b"hello world");
 816 |     ///             assert_eq!(body.get_decoded_as_string().unwrap(), "hello world");
 817 |     ///         },
 818 |     ///         _ => assert!(false),
 819 |     ///     };
 820 |     ///
 821 |     ///
 822 |     ///     // An email whose body encoding is not known upfront
 823 |     ///     let another_mail = parse_mail(b"").unwrap();
 824 |     ///
 825 |     ///     match another_mail.get_body_encoded() {
 826 |     ///         Body::Base64(body) | Body::QuotedPrintable(body) => {
 827 |     ///             println!("mail body encoded: {:?}", body.get_raw());
 828 |     ///             println!("mail body decoded: {:?}", body.get_decoded().unwrap());
 829 |     ///             println!("mail body decoded as string: {}", body.get_decoded_as_string().unwrap());
 830 |     ///         },
 831 |     ///         Body::SevenBit(body) | Body::EightBit(body) => {
 832 |     ///             println!("mail body: {:?}", body.get_raw());
 833 |     ///             println!("mail body as string: {}", body.get_as_string().unwrap());
 834 |     ///         },
 835 |     ///         Body::Binary(body) => {
 836 |     ///             println!("mail body binary: {:?}", body.get_raw());
 837 |     ///         }
 838 |     ///     }
 839 |     /// ```
 840 |     pub fn get_body_encoded(&'a self) -> Body<'a> {
 841 |         let transfer_encoding = self
 842 |             .headers
 843 |             .get_first_value("Content-Transfer-Encoding")
 844 |             .map(|s| s.to_lowercase());
 845 | 
 846 |         Body::new(self.body_bytes, &self.ctype, &transfer_encoding)
 847 |     }
 848 | 
 849 |     /// Returns a struct that wraps the headers for this message.
 850 |     /// The struct provides utility methods to read the individual headers.
 851 |     pub fn get_headers(&'a self) -> Headers<'a> {
 852 |         Headers::new(self.header_bytes, &self.headers)
 853 |     }
 854 | 
 855 |     /// Returns a struct containing a parsed representation of the
 856 |     /// Content-Disposition header. The first header with this name
 857 |     /// is used, if there are multiple. See the `parse_content_disposition`
 858 |     /// method documentation for more details on the semantics of the
 859 |     /// returned object.
 860 |     pub fn get_content_disposition(&self) -> ParsedContentDisposition {
 861 |         self.headers
 862 |             .get_first_value("Content-Disposition")
 863 |             .map(|s| parse_content_disposition(&s))
 864 |             .unwrap_or_default()
 865 |     }
 866 | 
 867 |     /// Returns a depth-first pre-order traversal of the subparts of
 868 |     /// this ParsedMail instance. The first item returned will be this
 869 |     /// ParsedMail itself.
 870 |     pub fn parts(&'a self) -> PartsIterator<'a> {
 871 |         PartsIterator {
 872 |             parts: vec![self],
 873 |             index: 0,
 874 |         }
 875 |     }
 876 | }
 877 | 
 878 | pub struct PartsIterator<'a> {
 879 |     parts: Vec<&'a ParsedMail<'a>>,
 880 |     index: usize,
 881 | }
 882 | 
 883 | impl<'a> Iterator for PartsIterator<'a> {
 884 |     type Item = &'a ParsedMail<'a>;
 885 | 
 886 |     fn next(&mut self) -> Option<Self::Item> {
 887 |         if self.index >= self.parts.len() {
 888 |             return None;
 889 |         }
 890 | 
 891 |         let cur = self.parts[self.index];
 892 |         self.index += 1;
 893 |         self.parts
 894 |             .splice(self.index..self.index, cur.subparts.iter());
 895 |         Some(cur)
 896 |     }
 897 | }
 898 | 
 899 | /// The main mail-parsing entry point.
 900 | /// This function takes the raw data making up the message body and returns a
 901 | /// structured version of it, which allows easily accessing the header and body
 902 | /// information as needed.
 903 | ///
 904 | /// # Examples
 905 | /// ```
 906 | ///     use mailparse::*;
 907 | ///     let parsed = parse_mail(concat!(
 908 | ///             "Subject: This is a test email\n",
 909 | ///             "Content-Type: multipart/alternative; boundary=foobar\n",
 910 | ///             "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\n",
 911 | ///             "\n",
 912 | ///             "--foobar\n",
 913 | ///             "Content-Type: text/plain; charset=utf-8\n",
 914 | ///             "Content-Transfer-Encoding: quoted-printable\n",
 915 | ///             "\n",
 916 | ///             "This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\n",
 917 | ///             "--foobar\n",
 918 | ///             "Content-Type: text/html\n",
 919 | ///             "Content-Transfer-Encoding: base64\n",
 920 | ///             "\n",
 921 | ///             "PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \n",
 922 | ///             "dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \n",
 923 | ///             "--foobar--\n",
 924 | ///             "After the final boundary stuff gets ignored.\n").as_bytes())
 925 | ///         .unwrap();
 926 | ///     assert_eq!(parsed.headers.get_first_value("Subject"),
 927 | ///         Some("This is a test email".to_string()));
 928 | ///     assert_eq!(parsed.subparts.len(), 2);
 929 | ///     assert_eq!(parsed.subparts[0].get_body().unwrap(),
 930 | ///         "This is the plaintext version, in utf-8. Proof by Euro: \u{20AC}");
 931 | ///     assert_eq!(parsed.subparts[1].headers[1].get_value(), "base64");
 932 | ///     assert_eq!(parsed.subparts[1].ctype.mimetype, "text/html");
 933 | ///     assert!(parsed.subparts[1].get_body().unwrap().starts_with("<html>"));
 934 | ///     assert_eq!(dateparse(parsed.headers.get_first_value("Date").unwrap().as_str()).unwrap(), 1475417182);
 935 | /// ```
 936 | pub fn parse_mail(raw_data: &[u8]) -> Result<ParsedMail, MailParseError> {
 937 |     parse_mail_recursive(raw_data, false)
 938 | }
 939 | 
 940 | /// Strips LF or CRLF if there is one at the end of the string raw_data[ix_start..ix].
 941 | /// This is used to ensure that CRLF just before a boundary is treated as part of the
 942 | /// boundary, not the body part that was before the boundary. See discussion in
 943 | /// https://github.com/staktrace/mailparse/issues/127.
 944 | fn strip_trailing_crlf(raw_data: &[u8], ix_start: usize, mut ix: usize) -> usize {
 945 |     if ix > ix_start && raw_data[ix - 1] == b'\n' {
 946 |         ix -= 1;
 947 |         if ix > ix_start && raw_data[ix - 1] == b'\r' {
 948 |             ix -= 1;
 949 |         }
 950 |     }
 951 |     ix
 952 | }
 953 | 
 954 | fn parse_mail_recursive(
 955 |     raw_data: &[u8],
 956 |     in_multipart_digest: bool,
 957 | ) -> Result<ParsedMail, MailParseError> {
 958 |     let (headers, ix_body) = parse_headers(raw_data)?;
 959 |     let ctype = headers
 960 |         .get_first_value("Content-Type")
 961 |         .map(|s| parse_content_type(&s))
 962 |         .unwrap_or_else(|| ParsedContentType::default_conditional(in_multipart_digest));
 963 | 
 964 |     let mut result = ParsedMail {
 965 |         raw_bytes: raw_data,
 966 |         header_bytes: &raw_data[0..ix_body],
 967 |         headers,
 968 |         ctype,
 969 |         body_bytes: &raw_data[ix_body..],
 970 |         subparts: Vec::<ParsedMail>::new(),
 971 |     };
 972 |     if result.ctype.mimetype.starts_with("multipart/")
 973 |         && result.ctype.params.get("boundary").is_some()
 974 |         && raw_data.len() > ix_body
 975 |     {
 976 |         let in_multipart_digest = result.ctype.mimetype == "multipart/digest";
 977 |         let boundary = String::from("--") + &result.ctype.params["boundary"];
 978 |         if let Some(ix_boundary_start) =
 979 |             find_from_u8_line_prefix(raw_data, ix_body, boundary.as_bytes())
 980 |         {
 981 |             let ix_body_end = strip_trailing_crlf(raw_data, ix_body, ix_boundary_start);
 982 |             result.body_bytes = &raw_data[ix_body..ix_body_end];
 983 |             let mut ix_boundary_end = ix_boundary_start + boundary.len();
 984 |             while let Some(ix_part_start) =
 985 |                 find_from_u8(raw_data, ix_boundary_end, b"\n").map(|v| v + 1)
 986 |             {
 987 |                 let ix_part_boundary_start =
 988 |                     find_from_u8_line_prefix(raw_data, ix_part_start, boundary.as_bytes());
 989 |                 let ix_part_end = ix_part_boundary_start
 990 |                     .map(|x| strip_trailing_crlf(raw_data, ix_part_start, x))
 991 |                     // if there is no terminating boundary, assume the part end is the end of the email
 992 |                     .unwrap_or(raw_data.len());
 993 | 
 994 |                 result.subparts.push(parse_mail_recursive(
 995 |                     &raw_data[ix_part_start..ix_part_end],
 996 |                     in_multipart_digest,
 997 |                 )?);
 998 |                 ix_boundary_end = ix_part_boundary_start
 999 |                     .map(|x| x + boundary.len())
1000 |                     .unwrap_or(raw_data.len());
1001 |                 if ix_boundary_end + 2 > raw_data.len()
1002 |                     || (raw_data[ix_boundary_end] == b'-' && raw_data[ix_boundary_end + 1] == b'-')
1003 |                 {
1004 |                     break;
1005 |                 }
1006 |             }
1007 |         }
1008 |     }
1009 |     Ok(result)
1010 | }
1011 | 
1012 | /// Used to store params for content-type and content-disposition
1013 | struct ParamContent {
1014 |     value: String,
1015 |     params: BTreeMap<String, String>,
1016 | }
1017 | 
1018 | /// Parse parameterized header values such as that for Content-Type
1019 | /// e.g. `multipart/alternative; boundary=foobar`
1020 | /// Note: this function is not made public as it may require
1021 | /// significant changes to be fully correct. For instance,
1022 | /// it does not handle quoted parameter values containing the
1023 | /// semicolon (';') character. It also produces a BTreeMap,
1024 | /// which implicitly does not support multiple parameters with
1025 | /// the same key. Also, the parameter values may contain language
1026 | /// information in a format specified by RFC 2184 which is thrown
1027 | /// away. The format for parameterized header values doesn't
1028 | /// appear to be strongly specified anywhere.
1029 | fn parse_param_content(content: &str) -> ParamContent {
1030 |     let mut tokens = content.split(';');
1031 |     // There must be at least one token produced by split, even if it's empty.
1032 |     let value = tokens.next().unwrap().trim();
1033 |     let mut map: BTreeMap<String, String> = tokens
1034 |         .filter_map(|kv| {
1035 |             kv.find('=').map(|idx| {
1036 |                 let key = kv[0..idx].trim().to_lowercase();
1037 |                 let mut value = kv[idx + 1..].trim();
1038 |                 if value.starts_with('"') && value.ends_with('"') && value.len() > 1 {
1039 |                     value = &value[1..value.len() - 1];
1040 |                 }
1041 |                 (key, value.to_string())
1042 |             })
1043 |         })
1044 |         .collect();
1045 | 
1046 |     // Decode charset encoding, as described in RFC 2184, Section 4.
1047 |     let decode_key_list: Vec<String> = map
1048 |         .keys()
1049 |         .filter_map(|k| k.strip_suffix('*'))
1050 |         .map(String::from)
1051 |         // Skip encoded keys where there is already an equivalent decoded key in the map
1052 |         .filter(|k| !map.contains_key(k))
1053 |         .collect();
1054 |     let encodings = compute_parameter_encodings(&map, &decode_key_list);
1055 |     // Note that when we get here, we might still have entries in `encodings` for continuation segments
1056 |     // that didn't have a *0 segment at all. These shouldn't exist per spec so we can do whatever we want,
1057 |     // as long as we don't panic.
1058 |     for (k, (e, strip)) in encodings {
1059 |         if let Some(charset) = Charset::for_label_no_replacement(e.as_bytes()) {
1060 |             let key = format!("{}*", k);
1061 |             let percent_encoded_value = map.remove(&key).unwrap();
1062 |             let encoded_value = if strip {
1063 |                 percent_decode(percent_encoded_value.splitn(3, '\'').nth(2).unwrap_or(""))
1064 |             } else {
1065 |                 percent_decode(&percent_encoded_value)
1066 |             };
1067 |             let decoded_value = charset.decode_without_bom_handling(&encoded_value).0;
1068 |             map.insert(k, decoded_value.to_string());
1069 |         }
1070 |     }
1071 | 
1072 |     // Unwrap parameter value continuations, as described in RFC 2184, Section 3.
1073 |     let unwrap_key_list: Vec<String> = map
1074 |         .keys()
1075 |         .filter_map(|k| k.strip_suffix("*0"))
1076 |         .map(String::from)
1077 |         // Skip wrapped keys where there is already an unwrapped equivalent in the map
1078 |         .filter(|k| !map.contains_key(k))
1079 |         .collect();
1080 |     for unwrap_key in unwrap_key_list {
1081 |         let mut unwrapped_value = String::new();
1082 |         let mut index = 0;
1083 |         while let Some(wrapped_value_part) = map.remove(&format!("{}*{}", &unwrap_key, index)) {
1084 |             index += 1;
1085 |             unwrapped_value.push_str(&wrapped_value_part);
1086 |         }
1087 |         let old_value = map.insert(unwrap_key, unwrapped_value);
1088 |         assert!(old_value.is_none());
1089 |     }
1090 | 
1091 |     ParamContent {
1092 |         value: value.into(),
1093 |         params: map,
1094 |     }
1095 | }
1096 | 
1097 | /// In the returned map, the key is one of the entries from the decode_key_list,
1098 | /// (i.e. the parameter key with the trailing '*' stripped). The value is a tuple
1099 | /// containing the encoding (or empty string for no encoding found) and a flag
1100 | /// that indicates if the encoding needs to be stripped from the value. This is
1101 | /// set to true for non-continuation parameter values.
1102 | fn compute_parameter_encodings(
1103 |     map: &BTreeMap<String, String>,
1104 |     decode_key_list: &Vec<String>,
1105 | ) -> HashMap<String, (String, bool)> {
1106 |     // To handle section 4.1 (combining encodings with continuations), we first
1107 |     // compute the encoding for each parameter value or parameter value segment
1108 |     // that is encoded. For continuation segments the encoding from the *0 segment
1109 |     // overwrites the continuation segment's encoding, if there is one.
1110 |     let mut encodings: HashMap<String, (String, bool)> = HashMap::new();
1111 |     for decode_key in decode_key_list {
1112 |         if let Some(unwrap_key) = decode_key.strip_suffix("*0") {
1113 |             // Per spec, there should always be an encoding. If it's missing, handle that case gracefully
1114 |             // by setting it to an empty string that we handle specially later.
1115 |             let encoding = map
1116 |                 .get(&format!("{}*", decode_key))
1117 |                 .unwrap()
1118 |                 .split('\'')
1119 |                 .next()
1120 |                 .unwrap_or("");
1121 |             let continuation_prefix = format!("{}*", unwrap_key);
1122 |             for continuation_key in decode_key_list {
1123 |                 if continuation_key.starts_with(&continuation_prefix) {
1124 |                     // This may (intentionally) overwite encodings previously found for the
1125 |                     // continuation segments (which are bogus). In those cases, the flag
1126 |                     // in the tuple should get updated from true to false.
1127 |                     encodings.insert(
1128 |                         continuation_key.clone(),
1129 |                         (encoding.to_string(), continuation_key == decode_key),
1130 |                     );
1131 |                 }
1132 |             }
1133 |         } else if !encodings.contains_key(decode_key) {
1134 |             let encoding = map
1135 |                 .get(&format!("{}*", decode_key))
1136 |                 .unwrap()
1137 |                 .split('\'')
1138 |                 .next()
1139 |                 .unwrap_or("")
1140 |                 .to_string();
1141 |             let old_value = encodings.insert(decode_key.clone(), (encoding, true));
1142 |             assert!(old_value.is_none());
1143 |         }
1144 |         // else this is a continuation segment and the encoding has already been populated
1145 |         // by the initial *0 segment, so we can ignore it.
1146 |     }
1147 |     encodings
1148 | }
1149 | 
1150 | fn percent_decode(encoded: &str) -> Vec<u8> {
1151 |     let mut decoded = Vec::with_capacity(encoded.len());
1152 |     let mut bytes = encoded.bytes();
1153 |     let mut next = bytes.next();
1154 |     while next.is_some() {
1155 |         let b = next.unwrap();
1156 |         if b != b'%' {
1157 |             decoded.push(b);
1158 |             next = bytes.next();
1159 |             continue;
1160 |         }
1161 | 
1162 |         let top = match bytes.next() {
1163 |             Some(n) if n.is_ascii_hexdigit() => n,
1164 |             n => {
1165 |                 decoded.push(b);
1166 |                 next = n;
1167 |                 continue;
1168 |             }
1169 |         };
1170 |         let bottom = match bytes.next() {
1171 |             Some(n) if n.is_ascii_hexdigit() => n,
1172 |             n => {
1173 |                 decoded.push(b);
1174 |                 decoded.push(top);
1175 |                 next = n;
1176 |                 continue;
1177 |             }
1178 |         };
1179 |         let decoded_byte = (hex_to_nybble(top) << 4) | hex_to_nybble(bottom);
1180 |         decoded.push(decoded_byte);
1181 | 
1182 |         next = bytes.next();
1183 |     }
1184 |     decoded
1185 | }
1186 | 
1187 | fn hex_to_nybble(byte: u8) -> u8 {
1188 |     match byte {
1189 |         b'0'..=b'9' => byte - b'0',
1190 |         b'a'..=b'f' => byte - b'a' + 10,
1191 |         b'A'..=b'F' => byte - b'A' + 10,
1192 |         _ => panic!("Not a hex character!"),
1193 |     }
1194 | }
1195 | 
1196 | #[cfg(test)]
1197 | mod tests {
1198 |     use super::*;
1199 | 
1200 |     #[test]
1201 |     fn parse_basic_header() {
1202 |         let (parsed, _) = parse_header(b"Key: Value").unwrap();
1203 |         assert_eq!(parsed.key, b"Key");
1204 |         assert_eq!(parsed.get_key(), "Key");
1205 |         assert_eq!(parsed.get_key_ref(), "Key");
1206 |         assert_eq!(parsed.value, b"Value");
1207 |         assert_eq!(parsed.get_value(), "Value");
1208 |         assert_eq!(parsed.get_value_raw(), "Value".as_bytes());
1209 | 
1210 |         let (parsed, _) = parse_header(b"Key :  Value ").unwrap();
1211 |         assert_eq!(parsed.key, b"Key ");
1212 |         assert_eq!(parsed.value, b"Value ");
1213 |         assert_eq!(parsed.get_value(), "Value ");
1214 |         assert_eq!(parsed.get_value_raw(), "Value ".as_bytes());
1215 | 
1216 |         let (parsed, _) = parse_header(b"Key:").unwrap();
1217 |         assert_eq!(parsed.key, b"Key");
1218 |         assert_eq!(parsed.value, b"");
1219 | 
1220 |         let (parsed, _) = parse_header(b":\n").unwrap();
1221 |         assert_eq!(parsed.key, b"");
1222 |         assert_eq!(parsed.value, b"");
1223 | 
1224 |         let (parsed, _) = parse_header(b"Key:Multi-line\n value").unwrap();
1225 |         assert_eq!(parsed.key, b"Key");
1226 |         assert_eq!(parsed.value, b"Multi-line\n value");
1227 |         assert_eq!(parsed.get_value(), "Multi-line value");
1228 |         assert_eq!(parsed.get_value_raw(), "Multi-line\n value".as_bytes());
1229 | 
1230 |         let (parsed, _) = parse_header(b"Key:  Multi\n  line\n value\n").unwrap();
1231 |         assert_eq!(parsed.key, b"Key");
1232 |         assert_eq!(parsed.value, b"Multi\n  line\n value");
1233 |         assert_eq!(parsed.get_value(), "Multi line value");
1234 |         assert_eq!(parsed.get_value_raw(), "Multi\n  line\n value".as_bytes());
1235 | 
1236 |         let (parsed, _) = parse_header(b"Key: One\nKey2: Two").unwrap();
1237 |         assert_eq!(parsed.key, b"Key");
1238 |         assert_eq!(parsed.value, b"One");
1239 | 
1240 |         let (parsed, _) = parse_header(b"Key: One\n\tOverhang").unwrap();
1241 |         assert_eq!(parsed.key, b"Key");
1242 |         assert_eq!(parsed.value, b"One\n\tOverhang");
1243 |         assert_eq!(parsed.get_value(), "One Overhang");
1244 |         assert_eq!(parsed.get_value_raw(), "One\n\tOverhang".as_bytes());
1245 | 
1246 |         let (parsed, _) = parse_header(b"SPAM: VIAGRA \xAE").unwrap();
1247 |         assert_eq!(parsed.key, b"SPAM");
1248 |         assert_eq!(parsed.value, b"VIAGRA \xAE");
1249 |         assert_eq!(parsed.get_value(), "VIAGRA \u{ae}");
1250 |         assert_eq!(parsed.get_value_raw(), b"VIAGRA \xAE");
1251 | 
1252 |         parse_header(b" Leading: Space").unwrap_err();
1253 | 
1254 |         let (parsed, _) = parse_header(b"Just a string").unwrap();
1255 |         assert_eq!(parsed.key, b"Just a string");
1256 |         assert_eq!(parsed.value, b"");
1257 |         assert_eq!(parsed.get_value(), "");
1258 |         assert_eq!(parsed.get_value_raw(), b"");
1259 | 
1260 |         let (parsed, _) = parse_header(b"Key\nBroken: Value").unwrap();
1261 |         assert_eq!(parsed.key, b"Key");
1262 |         assert_eq!(parsed.value, b"");
1263 |         assert_eq!(parsed.get_value(), "");
1264 |         assert_eq!(parsed.get_value_raw(), b"");
1265 | 
1266 |         let (parsed, _) = parse_header(b"Key: With CRLF\r\n").unwrap();
1267 |         assert_eq!(parsed.key, b"Key");
1268 |         assert_eq!(parsed.value, b"With CRLF");
1269 |         assert_eq!(parsed.get_value(), "With CRLF");
1270 |         assert_eq!(parsed.get_value_raw(), b"With CRLF");
1271 | 
1272 |         let (parsed, _) = parse_header(b"Key: With spurious CRs\r\r\r\n").unwrap();
1273 |         assert_eq!(parsed.value, b"With spurious CRs");
1274 |         assert_eq!(parsed.get_value(), "With spurious CRs");
1275 |         assert_eq!(parsed.get_value_raw(), b"With spurious CRs");
1276 | 
1277 |         let (parsed, _) = parse_header(b"Key: With \r mixed CR\r\n").unwrap();
1278 |         assert_eq!(parsed.value, b"With \r mixed CR");
1279 |         assert_eq!(parsed.get_value(), "With \r mixed CR");
1280 |         assert_eq!(parsed.get_value_raw(), b"With \r mixed CR");
1281 | 
1282 |         let (parsed, _) = parse_header(b"Key:\r\n Value after linebreak").unwrap();
1283 |         assert_eq!(parsed.value, b"\r\n Value after linebreak");
1284 |         assert_eq!(parsed.get_value(), " Value after linebreak");
1285 |         assert_eq!(parsed.get_value_raw(), b"\r\n Value after linebreak");
1286 |     }
1287 | 
1288 |     #[test]
1289 |     fn parse_encoded_headers() {
1290 |         let (parsed, _) = parse_header(b"Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=").unwrap();
1291 |         assert_eq!(parsed.get_key(), "Subject");
1292 |         assert_eq!(parsed.get_key_ref(), "Subject");
1293 |         assert_eq!(parsed.get_value(), "\u{a1}Hola, se\u{f1}or!");
1294 |         assert_eq!(
1295 |             parsed.get_value_raw(),
1296 |             "=?iso-8859-1?Q?=A1Hola,_se=F1or!?=".as_bytes()
1297 |         );
1298 | 
1299 |         let (parsed, _) = parse_header(
1300 |             b"Subject: =?iso-8859-1?Q?=A1Hola,?=\n \
1301 |                                         =?iso-8859-1?Q?_se=F1or!?=",
1302 |         )
1303 |         .unwrap();
1304 |         assert_eq!(parsed.get_key(), "Subject");
1305 |         assert_eq!(parsed.get_key_ref(), "Subject");
1306 |         assert_eq!(parsed.get_value(), "\u{a1}Hola, se\u{f1}or!");
1307 |         assert_eq!(
1308 |             parsed.get_value_raw(),
1309 |             "=?iso-8859-1?Q?=A1Hola,?=\n \
1310 |                                           =?iso-8859-1?Q?_se=F1or!?="
1311 |                 .as_bytes()
1312 |         );
1313 | 
1314 |         let (parsed, _) = parse_header(b"Euro: =?utf-8?Q?=E2=82=AC?=").unwrap();
1315 |         assert_eq!(parsed.get_key(), "Euro");
1316 |         assert_eq!(parsed.get_key_ref(), "Euro");
1317 |         assert_eq!(parsed.get_value(), "\u{20ac}");
1318 |         assert_eq!(parsed.get_value_raw(), "=?utf-8?Q?=E2=82=AC?=".as_bytes());
1319 | 
1320 |         let (parsed, _) = parse_header(b"HelloWorld: =?utf-8?B?aGVsbG8gd29ybGQ=?=").unwrap();
1321 |         assert_eq!(parsed.get_value(), "hello world");
1322 |         assert_eq!(
1323 |             parsed.get_value_raw(),
1324 |             "=?utf-8?B?aGVsbG8gd29ybGQ=?=".as_bytes()
1325 |         );
1326 | 
1327 |         let (parsed, _) = parse_header(b"Empty: =?utf-8?Q??=").unwrap();
1328 |         assert_eq!(parsed.get_value(), "");
1329 |         assert_eq!(parsed.get_value_raw(), "=?utf-8?Q??=".as_bytes());
1330 | 
1331 |         let (parsed, _) = parse_header(b"Incomplete: =?").unwrap();
1332 |         assert_eq!(parsed.get_value(), "=?");
1333 |         assert_eq!(parsed.get_value_raw(), "=?".as_bytes());
1334 | 
1335 |         let (parsed, _) = parse_header(b"BadEncoding: =?garbage?Q??=").unwrap();
1336 |         assert_eq!(parsed.get_value(), "=?garbage?Q??=");
1337 |         assert_eq!(parsed.get_value_raw(), "=?garbage?Q??=".as_bytes());
1338 | 
1339 |         let (parsed, _) = parse_header(b"Invalid: =?utf-8?Q?=E2=AC?=").unwrap();
1340 |         assert_eq!(parsed.get_value(), "\u{fffd}");
1341 | 
1342 |         let (parsed, _) = parse_header(b"LineBreak: =?utf-8?Q?=E2=82\n =AC?=").unwrap();
1343 |         assert_eq!(parsed.get_value(), "=?utf-8?Q?=E2=82 =AC?=");
1344 | 
1345 |         let (parsed, _) = parse_header(b"NotSeparateWord: hello=?utf-8?Q?world?=").unwrap();
1346 |         assert_eq!(parsed.get_value(), "hello=?utf-8?Q?world?=");
1347 | 
1348 |         let (parsed, _) = parse_header(b"NotSeparateWord2: =?utf-8?Q?hello?=world").unwrap();
1349 |         assert_eq!(parsed.get_value(), "=?utf-8?Q?hello?=world");
1350 | 
1351 |         let (parsed, _) = parse_header(b"Key: \"=?utf-8?Q?value?=\"").unwrap();
1352 |         assert_eq!(parsed.get_value(), "\"value\"");
1353 | 
1354 |         let (parsed, _) = parse_header(b"Subject: =?utf-8?q?=5BOntario_Builder=5D_Understanding_home_shopping_=E2=80=93_a_q?=\n \
1355 |                                         =?utf-8?q?uick_survey?=")
1356 |             .unwrap();
1357 |         assert_eq!(parsed.get_key(), "Subject");
1358 |         assert_eq!(parsed.get_key_ref(), "Subject");
1359 |         assert_eq!(
1360 |             parsed.get_value(),
1361 |             "[Ontario Builder] Understanding home shopping \u{2013} a quick survey"
1362 |         );
1363 | 
1364 |         let (parsed, _) = parse_header(
1365 |             b"Subject: =?utf-8?q?=5BOntario_Builder=5D?= non-qp words\n \
1366 |              and the subject continues",
1367 |         )
1368 |         .unwrap();
1369 |         assert_eq!(
1370 |             parsed.get_value(),
1371 |             "[Ontario Builder] non-qp words and the subject continues"
1372 |         );
1373 | 
1374 |         let (parsed, _) = parse_header(
1375 |             b"Subject: =?utf-8?q?=5BOntario_Builder=5D?= \n \
1376 |              and the subject continues",
1377 |         )
1378 |         .unwrap();
1379 |         assert_eq!(
1380 |             parsed.get_value(),
1381 |             "[Ontario Builder]  and the subject continues"
1382 |         );
1383 |         assert_eq!(
1384 |             parsed.get_value_raw(),
1385 |             "=?utf-8?q?=5BOntario_Builder=5D?= \n \
1386 |                and the subject continues"
1387 |                 .as_bytes()
1388 |         );
1389 | 
1390 |         let (parsed, _) = parse_header(b"Subject: =?ISO-2022-JP?B?GyRCRnwbKEI=?=\n\t=?ISO-2022-JP?B?GyRCS1wbKEI=?=\n\t=?ISO-2022-JP?B?GyRCOGwbKEI=?=")
1391 |             .unwrap();
1392 |         assert_eq!(parsed.get_key(), "Subject");
1393 |         assert_eq!(parsed.get_key_ref(), "Subject");
1394 |         assert_eq!(parsed.get_key_raw(), "Subject".as_bytes());
1395 |         assert_eq!(parsed.get_value(), "\u{65E5}\u{672C}\u{8A9E}");
1396 |         assert_eq!(parsed.get_value_raw(), "=?ISO-2022-JP?B?GyRCRnwbKEI=?=\n\t=?ISO-2022-JP?B?GyRCS1wbKEI=?=\n\t=?ISO-2022-JP?B?GyRCOGwbKEI=?=".as_bytes());
1397 | 
1398 |         let (parsed, _) = parse_header(b"Subject: =?ISO-2022-JP?Q?=1B\x24\x42\x46\x7C=1B\x28\x42?=\n\t=?ISO-2022-JP?Q?=1B\x24\x42\x4B\x5C=1B\x28\x42?=\n\t=?ISO-2022-JP?Q?=1B\x24\x42\x38\x6C=1B\x28\x42?=")
1399 |             .unwrap();
1400 |         assert_eq!(parsed.get_key(), "Subject");
1401 |         assert_eq!(parsed.get_key_ref(), "Subject");
1402 |         assert_eq!(parsed.get_key_raw(), "Subject".as_bytes());
1403 |         assert_eq!(parsed.get_value(), "\u{65E5}\u{672C}\u{8A9E}");
1404 |         assert_eq!(parsed.get_value_raw(), "=?ISO-2022-JP?Q?=1B\x24\x42\x46\x7C=1B\x28\x42?=\n\t=?ISO-2022-JP?Q?=1B\x24\x42\x4B\x5C=1B\x28\x42?=\n\t=?ISO-2022-JP?Q?=1B\x24\x42\x38\x6C=1B\x28\x42?=".as_bytes());
1405 | 
1406 |         let (parsed, _) = parse_header(b"Subject: =?UTF-7?Q?+JgM-?=").unwrap();
1407 |         assert_eq!(parsed.get_key(), "Subject");
1408 |         assert_eq!(parsed.get_key_ref(), "Subject");
1409 |         assert_eq!(parsed.get_key_raw(), "Subject".as_bytes());
1410 |         assert_eq!(parsed.get_value(), "\u{2603}");
1411 |         assert_eq!(parsed.get_value_raw(), b"=?UTF-7?Q?+JgM-?=");
1412 | 
1413 |         let (parsed, _) =
1414 |             parse_header(b"Content-Type: image/jpeg; name=\"=?UTF-8?B?MDY2MTM5ODEuanBn?=\"")
1415 |                 .unwrap();
1416 |         assert_eq!(parsed.get_key(), "Content-Type");
1417 |         assert_eq!(parsed.get_key_ref(), "Content-Type");
1418 |         assert_eq!(parsed.get_key_raw(), "Content-Type".as_bytes());
1419 |         assert_eq!(parsed.get_value(), "image/jpeg; name=\"06613981.jpg\"");
1420 |         assert_eq!(
1421 |             parsed.get_value_raw(),
1422 |             "image/jpeg; name=\"=?UTF-8?B?MDY2MTM5ODEuanBn?=\"".as_bytes()
1423 |         );
1424 | 
1425 |         let (parsed, _) = parse_header(
1426 |             b"From: =?UTF-8?Q?\"Motorola_Owners=E2=80=99_Forums\"_?=<forums@motorola.com>",
1427 |         )
1428 |         .unwrap();
1429 |         assert_eq!(parsed.get_key(), "From");
1430 |         assert_eq!(parsed.get_key_ref(), "From");
1431 |         assert_eq!(parsed.get_key_raw(), "From".as_bytes());
1432 |         assert_eq!(
1433 |             parsed.get_value(),
1434 |             "\"Motorola Owners\u{2019} Forums\" <forums@motorola.com>"
1435 |         );
1436 |     }
1437 | 
1438 |     #[test]
1439 |     fn encoded_words_and_spaces() {
1440 |         let (parsed, _) = parse_header(b"K: an =?utf-8?q?encoded?=\n word").unwrap();
1441 |         assert_eq!(parsed.get_value(), "an encoded word");
1442 |         assert_eq!(
1443 |             parsed.get_value_raw(),
1444 |             "an =?utf-8?q?encoded?=\n word".as_bytes()
1445 |         );
1446 | 
1447 |         let (parsed, _) = parse_header(b"K: =?utf-8?q?glue?= =?utf-8?q?these?= \n words").unwrap();
1448 |         assert_eq!(parsed.get_value(), "gluethese  words");
1449 |         assert_eq!(
1450 |             parsed.get_value_raw(),
1451 |             "=?utf-8?q?glue?= =?utf-8?q?these?= \n words".as_bytes()
1452 |         );
1453 | 
1454 |         let (parsed, _) = parse_header(b"K: =?utf-8?q?glue?= \n =?utf-8?q?again?=").unwrap();
1455 |         assert_eq!(parsed.get_value(), "glueagain");
1456 |         assert_eq!(
1457 |             parsed.get_value_raw(),
1458 |             "=?utf-8?q?glue?= \n =?utf-8?q?again?=".as_bytes()
1459 |         );
1460 |     }
1461 | 
1462 |     #[test]
1463 |     fn parse_multiple_headers() {
1464 |         let (parsed, _) = parse_headers(b"Key: Value\nTwo: Second").unwrap();
1465 |         assert_eq!(parsed.len(), 2);
1466 |         assert_eq!(parsed[0].key, b"Key");
1467 |         assert_eq!(parsed[0].value, b"Value");
1468 |         assert_eq!(parsed[1].key, b"Two");
1469 |         assert_eq!(parsed[1].value, b"Second");
1470 | 
1471 |         let (parsed, _) =
1472 |             parse_headers(b"Key: Value\n Overhang\nTwo: Second\nThree: Third").unwrap();
1473 |         assert_eq!(parsed.len(), 3);
1474 |         assert_eq!(parsed[0].key, b"Key");
1475 |         assert_eq!(parsed[0].value, b"Value\n Overhang");
1476 |         assert_eq!(parsed[1].key, b"Two");
1477 |         assert_eq!(parsed[1].value, b"Second");
1478 |         assert_eq!(parsed[2].key, b"Three");
1479 |         assert_eq!(parsed[2].value, b"Third");
1480 | 
1481 |         let (parsed, _) = parse_headers(b"Key: Value\nTwo: Second\n\nBody").unwrap();
1482 |         assert_eq!(parsed.len(), 2);
1483 |         assert_eq!(parsed[0].key, b"Key");
1484 |         assert_eq!(parsed[0].value, b"Value");
1485 |         assert_eq!(parsed[1].key, b"Two");
1486 |         assert_eq!(parsed[1].value, b"Second");
1487 | 
1488 |         let (parsed, _) = parse_headers(
1489 |             concat!(
1490 |                 "Return-Path: <kats@foobar.staktrace.com>\n",
1491 |                 "X-Original-To: kats@baz.staktrace.com\n",
1492 |                 "Delivered-To: kats@baz.staktrace.com\n",
1493 |                 "Received: from foobar.staktrace.com (localhost [127.0.0.1])\n",
1494 |                 "    by foobar.staktrace.com (Postfix) with ESMTP id \
1495 |                  139F711C1C34\n",
1496 |                 "    for <kats@baz.staktrace.com>; Fri, 27 May 2016 02:34:26 \
1497 |                  -0400 (EDT)\n",
1498 |                 "Date: Fri, 27 May 2016 02:34:25 -0400\n",
1499 |                 "To: kats@baz.staktrace.com\n",
1500 |                 "From: kats@foobar.staktrace.com\n",
1501 |                 "Subject: test Fri, 27 May 2016 02:34:25 -0400\n",
1502 |                 "X-Mailer: swaks v20130209.0 jetmore.org/john/code/swaks/\n",
1503 |                 "Message-Id: \
1504 |                  <20160527063426.139F711C1C34@foobar.staktrace.com>\n",
1505 |                 "\n",
1506 |                 "This is a test mailing\n"
1507 |             )
1508 |             .as_bytes(),
1509 |         )
1510 |         .unwrap();
1511 |         assert_eq!(parsed.len(), 10);
1512 |         assert_eq!(parsed[0].key, b"Return-Path");
1513 |         assert_eq!(parsed[9].key, b"Message-Id");
1514 | 
1515 |         let (parsed, _) =
1516 |             parse_headers(b"Key: Value\nAnotherKey: AnotherValue\nKey: Value2\nKey: Value3\n")
1517 |                 .unwrap();
1518 |         assert_eq!(parsed.len(), 4);
1519 |         assert_eq!(parsed.get_first_value("Key"), Some("Value".to_string()));
1520 |         assert_eq!(
1521 |             parsed.get_all_values("Key"),
1522 |             vec!["Value", "Value2", "Value3"]
1523 |         );
1524 |         assert_eq!(
1525 |             parsed.get_first_value("AnotherKey"),
1526 |             Some("AnotherValue".to_string())
1527 |         );
1528 |         assert_eq!(parsed.get_all_values("AnotherKey"), vec!["AnotherValue"]);
1529 |         assert_eq!(parsed.get_first_value("NoKey"), None);
1530 |         assert_eq!(parsed.get_all_values("NoKey"), Vec::<String>::new());
1531 | 
1532 |         let (parsed, _) = parse_headers(b"Key: value\r\nWith: CRLF\r\n\r\nBody").unwrap();
1533 |         assert_eq!(parsed.len(), 2);
1534 |         assert_eq!(parsed.get_first_value("Key"), Some("value".to_string()));
1535 |         assert_eq!(parsed.get_first_value("With"), Some("CRLF".to_string()));
1536 | 
1537 |         let (parsed, _) = parse_headers(b"Bad\nKey\n").unwrap();
1538 |         assert_eq!(parsed.len(), 2);
1539 |         assert_eq!(parsed.get_first_value("Bad"), Some("".to_string()));
1540 |         assert_eq!(parsed.get_first_value("Key"), Some("".to_string()));
1541 | 
1542 |         let (parsed, _) = parse_headers(b"K:V\nBad\nKey").unwrap();
1543 |         assert_eq!(parsed.len(), 3);
1544 |         assert_eq!(parsed.get_first_value("K"), Some("V".to_string()));
1545 |         assert_eq!(parsed.get_first_value("Bad"), Some("".to_string()));
1546 |         assert_eq!(parsed.get_first_value("Key"), Some("".to_string()));
1547 |     }
1548 | 
1549 |     #[test]
1550 |     fn test_parse_content_type() {
1551 |         let ctype = parse_content_type("text/html; charset=utf-8");
1552 |         assert_eq!(ctype.mimetype, "text/html");
1553 |         assert_eq!(ctype.charset, "utf-8");
1554 |         assert_eq!(ctype.params.get("boundary"), None);
1555 | 
1556 |         let ctype = parse_content_type(" foo/bar; x=y; charset=\"fake\" ; x2=y2");
1557 |         assert_eq!(ctype.mimetype, "foo/bar");
1558 |         assert_eq!(ctype.charset, "fake");
1559 |         assert_eq!(ctype.params.get("boundary"), None);
1560 | 
1561 |         let ctype = parse_content_type(" multipart/bar; boundary=foo ");
1562 |         assert_eq!(ctype.mimetype, "multipart/bar");
1563 |         assert_eq!(ctype.charset, "us-ascii");
1564 |         assert_eq!(ctype.params.get("boundary").unwrap(), "foo");
1565 |     }
1566 | 
1567 |     #[test]
1568 |     fn test_parse_content_disposition() {
1569 |         let dis = parse_content_disposition("inline");
1570 |         assert_eq!(dis.disposition, DispositionType::Inline);
1571 |         assert_eq!(dis.params.get("name"), None);
1572 |         assert_eq!(dis.params.get("filename"), None);
1573 | 
1574 |         let dis = parse_content_disposition(
1575 |             " attachment; x=y; charset=\"fake\" ; x2=y2; name=\"King Joffrey.death\"",
1576 |         );
1577 |         assert_eq!(dis.disposition, DispositionType::Attachment);
1578 |         assert_eq!(
1579 |             dis.params.get("name"),
1580 |             Some(&"King Joffrey.death".to_string())
1581 |         );
1582 |         assert_eq!(dis.params.get("filename"), None);
1583 | 
1584 |         let dis = parse_content_disposition(" form-data");
1585 |         assert_eq!(dis.disposition, DispositionType::FormData);
1586 |         assert_eq!(dis.params.get("name"), None);
1587 |         assert_eq!(dis.params.get("filename"), None);
1588 |     }
1589 | 
1590 |     #[test]
1591 |     fn test_parse_mail() {
1592 |         let mail = parse_mail(b"Key: value\r\n\r\nSome body stuffs").unwrap();
1593 |         assert_eq!(mail.header_bytes, b"Key: value\r\n\r\n");
1594 |         assert_eq!(mail.headers.len(), 1);
1595 |         assert_eq!(mail.headers[0].get_key(), "Key");
1596 |         assert_eq!(mail.headers[0].get_key_ref(), "Key");
1597 |         assert_eq!(mail.headers[0].get_value(), "value");
1598 |         assert_eq!(mail.ctype.mimetype, "text/plain");
1599 |         assert_eq!(mail.ctype.charset, "us-ascii");
1600 |         assert_eq!(mail.ctype.params.get("boundary"), None);
1601 |         assert_eq!(mail.body_bytes, b"Some body stuffs");
1602 |         assert_eq!(mail.get_body_raw().unwrap(), b"Some body stuffs");
1603 |         assert_eq!(mail.get_body().unwrap(), "Some body stuffs");
1604 |         assert_eq!(mail.subparts.len(), 0);
1605 | 
1606 |         let mail = parse_mail(
1607 |             concat!(
1608 |                 "Content-Type: MULTIpart/alternative; bounDAry=myboundary\r\n\r\n",
1609 |                 "--myboundary\r\n",
1610 |                 "Content-Type: text/plain\r\n\r\n",
1611 |                 "This is the plaintext version.\r\n",
1612 |                 "--myboundary\r\n",
1613 |                 "Content-Type: text/html;chARset=utf-8\r\n\r\n",
1614 |                 "This is the <b>HTML</b> version with fake --MYBOUNDARY.\r\n",
1615 |                 "--myboundary--"
1616 |             )
1617 |             .as_bytes(),
1618 |         )
1619 |         .unwrap();
1620 |         assert_eq!(mail.headers.len(), 1);
1621 |         assert_eq!(mail.headers[0].get_key(), "Content-Type");
1622 |         assert_eq!(mail.headers[0].get_key_ref(), "Content-Type");
1623 |         assert_eq!(mail.ctype.mimetype, "multipart/alternative");
1624 |         assert_eq!(mail.ctype.charset, "us-ascii");
1625 |         assert_eq!(mail.ctype.params.get("boundary").unwrap(), "myboundary");
1626 |         assert_eq!(mail.subparts.len(), 2);
1627 |         assert_eq!(mail.subparts[0].headers.len(), 1);
1628 |         assert_eq!(mail.subparts[0].ctype.mimetype, "text/plain");
1629 |         assert_eq!(mail.subparts[0].ctype.charset, "us-ascii");
1630 |         assert_eq!(mail.subparts[0].ctype.params.get("boundary"), None);
1631 |         assert_eq!(mail.subparts[1].ctype.mimetype, "text/html");
1632 |         assert_eq!(mail.subparts[1].ctype.charset, "utf-8");
1633 |         assert_eq!(mail.subparts[1].ctype.params.get("boundary"), None);
1634 | 
1635 |         let mail =
1636 |             parse_mail(b"Content-Transfer-Encoding: base64\r\n\r\naGVsbG 8gd\r\n29ybGQ=").unwrap();
1637 |         assert_eq!(mail.get_body_raw().unwrap(), b"hello world");
1638 |         assert_eq!(mail.get_body().unwrap(), "hello world");
1639 | 
1640 |         let mail =
1641 |             parse_mail(b"Content-Type: text/plain; charset=x-unknown\r\n\r\nhello world").unwrap();
1642 |         assert_eq!(mail.get_body_raw().unwrap(), b"hello world");
1643 |         assert_eq!(mail.get_body().unwrap(), "hello world");
1644 | 
1645 |         let mail = parse_mail(b"ConTENT-tyPE: text/html\r\n\r\nhello world").unwrap();
1646 |         assert_eq!(mail.ctype.mimetype, "text/html");
1647 |         assert_eq!(mail.get_body_raw().unwrap(), b"hello world");
1648 |         assert_eq!(mail.get_body().unwrap(), "hello world");
1649 | 
1650 |         let mail = parse_mail(
1651 |             b"Content-Type: text/plain; charset=UTF-7\r\nContent-Transfer-Encoding: quoted-printable\r\n\r\n+JgM-",
1652 |         ).unwrap();
1653 |         assert_eq!(mail.get_body_raw().unwrap(), b"+JgM-");
1654 |         assert_eq!(mail.get_body().unwrap(), "\u{2603}");
1655 | 
1656 |         let mail = parse_mail(b"Content-Type: text/plain; charset=UTF-7\r\n\r\n+JgM-").unwrap();
1657 |         assert_eq!(mail.get_body_raw().unwrap(), b"+JgM-");
1658 |         assert_eq!(mail.get_body().unwrap(), "\u{2603}");
1659 |     }
1660 | 
1661 |     #[test]
1662 |     fn test_missing_terminating_boundary() {
1663 |         let mail = parse_mail(
1664 |             concat!(
1665 |                 "Content-Type: multipart/alternative; boundary=myboundary\r\n\r\n",
1666 |                 "--myboundary\r\n",
1667 |                 "Content-Type: text/plain\r\n\r\n",
1668 |                 "part0\r\n",
1669 |                 "--myboundary\r\n",
1670 |                 "Content-Type: text/html\r\n\r\n",
1671 |                 "part1\r\n"
1672 |             )
1673 |             .as_bytes(),
1674 |         )
1675 |         .unwrap();
1676 |         assert_eq!(mail.subparts[0].get_body().unwrap(), "part0");
1677 |         assert_eq!(mail.subparts[1].get_body().unwrap(), "part1\r\n");
1678 |     }
1679 | 
1680 |     #[test]
1681 |     fn test_missing_body() {
1682 |         let parsed =
1683 |             parse_mail("Content-Type: multipart/related; boundary=\"----=_\"\n".as_bytes())
1684 |                 .unwrap();
1685 |         assert_eq!(parsed.headers[0].get_key(), "Content-Type");
1686 |         assert_eq!(parsed.get_body_raw().unwrap(), b"");
1687 |         assert_eq!(parsed.get_body().unwrap(), "");
1688 |     }
1689 | 
1690 |     #[test]
1691 |     fn test_no_headers_in_subpart() {
1692 |         let mail = parse_mail(
1693 |             concat!(
1694 |                 "Content-Type: multipart/report; report-type=delivery-status;\n",
1695 |                 "\tboundary=\"1404630116.22555.postech.q0.x.x.x\"\n",
1696 |                 "\n",
1697 |                 "--1404630116.22555.postech.q0.x.x.x\n",
1698 |                 "\n",
1699 |                 "--1404630116.22555.postech.q0.x.x.x--\n"
1700 |             )
1701 |             .as_bytes(),
1702 |         )
1703 |         .unwrap();
1704 |         assert_eq!(mail.ctype.mimetype, "multipart/report");
1705 |         assert_eq!(mail.subparts[0].headers.len(), 0);
1706 |         assert_eq!(mail.subparts[0].ctype.mimetype, "text/plain");
1707 |         assert_eq!(mail.subparts[0].get_body_raw().unwrap(), b"");
1708 |         assert_eq!(mail.subparts[0].get_body().unwrap(), "");
1709 |     }
1710 | 
1711 |     #[test]
1712 |     fn test_empty() {
1713 |         let mail = parse_mail("".as_bytes()).unwrap();
1714 |         assert_eq!(mail.get_body_raw().unwrap(), b"");
1715 |         assert_eq!(mail.get_body().unwrap(), "");
1716 |     }
1717 | 
1718 |     #[test]
1719 |     fn test_dont_panic_for_value_with_new_lines() {
1720 |         let parsed = parse_param_content(r#"application/octet-stream; name=""#);
1721 |         assert_eq!(parsed.params["name"], "\"");
1722 |     }
1723 | 
1724 |     #[test]
1725 |     fn test_parameter_value_continuations() {
1726 |         let parsed =
1727 |             parse_param_content("attachment;\n\tfilename*0=\"X\";\n\tfilename*1=\"Y.pdf\"");
1728 |         assert_eq!(parsed.value, "attachment");
1729 |         assert_eq!(parsed.params["filename"], "XY.pdf");
1730 |         assert_eq!(parsed.params.contains_key("filename*0"), false);
1731 |         assert_eq!(parsed.params.contains_key("filename*1"), false);
1732 | 
1733 |         let parsed = parse_param_content(
1734 |             "attachment;\n\tfilename=XX.pdf;\n\tfilename*0=\"X\";\n\tfilename*1=\"Y.pdf\"",
1735 |         );
1736 |         assert_eq!(parsed.value, "attachment");
1737 |         assert_eq!(parsed.params["filename"], "XX.pdf");
1738 |         assert_eq!(parsed.params["filename*0"], "X");
1739 |         assert_eq!(parsed.params["filename*1"], "Y.pdf");
1740 | 
1741 |         let parsed = parse_param_content("attachment; filename*1=\"Y.pdf\"");
1742 |         assert_eq!(parsed.params["filename*1"], "Y.pdf");
1743 |         assert_eq!(parsed.params.contains_key("filename"), false);
1744 |     }
1745 | 
1746 |     #[test]
1747 |     fn test_parameter_encodings() {
1748 |         let parsed = parse_param_content("attachment;\n\tfilename*0*=us-ascii''%28X%29%20801%20-%20X;\n\tfilename*1*=%20%E2%80%93%20X%20;\n\tfilename*2*=X%20X%2Epdf");
1749 |         // Note this is a real-world case from mutt, but it's wrong. The original filename had an en dash \u{2013} but mutt
1750 |         // declared us-ascii as the encoding instead of utf-8 for some reason.
1751 |         assert_eq!(
1752 |             parsed.params["filename"],
1753 |             "(X) 801 - X \u{00E2}\u{20AC}\u{201C} X X X.pdf"
1754 |         );
1755 |         assert_eq!(parsed.params.contains_key("filename*0*"), false);
1756 |         assert_eq!(parsed.params.contains_key("filename*0"), false);
1757 |         assert_eq!(parsed.params.contains_key("filename*1*"), false);
1758 |         assert_eq!(parsed.params.contains_key("filename*1"), false);
1759 |         assert_eq!(parsed.params.contains_key("filename*2*"), false);
1760 |         assert_eq!(parsed.params.contains_key("filename*2"), false);
1761 | 
1762 |         // Here is the corrected version.
1763 |         let parsed = parse_param_content("attachment;\n\tfilename*0*=utf-8''%28X%29%20801%20-%20X;\n\tfilename*1*=%20%E2%80%93%20X%20;\n\tfilename*2*=X%20X%2Epdf");
1764 |         assert_eq!(parsed.params["filename"], "(X) 801 - X \u{2013} X X X.pdf");
1765 |         assert_eq!(parsed.params.contains_key("filename*0*"), false);
1766 |         assert_eq!(parsed.params.contains_key("filename*0"), false);
1767 |         assert_eq!(parsed.params.contains_key("filename*1*"), false);
1768 |         assert_eq!(parsed.params.contains_key("filename*1"), false);
1769 |         assert_eq!(parsed.params.contains_key("filename*2*"), false);
1770 |         assert_eq!(parsed.params.contains_key("filename*2"), false);
1771 |         let parsed = parse_param_content("attachment; filename*=utf-8'en'%e2%80%A1.bin");
1772 |         assert_eq!(parsed.params["filename"], "\u{2021}.bin");
1773 |         assert_eq!(parsed.params.contains_key("filename*"), false);
1774 | 
1775 |         let parsed = parse_param_content("attachment; filename*='foo'%e2%80%A1.bin");
1776 |         assert_eq!(parsed.params["filename*"], "'foo'%e2%80%A1.bin");
1777 |         assert_eq!(parsed.params.contains_key("filename"), false);
1778 | 
1779 |         let parsed = parse_param_content("attachment; filename*=nonexistent'foo'%e2%80%a1.bin");
1780 |         assert_eq!(parsed.params["filename*"], "nonexistent'foo'%e2%80%a1.bin");
1781 |         assert_eq!(parsed.params.contains_key("filename"), false);
1782 | 
1783 |         let parsed = parse_param_content(
1784 |             "attachment; filename*0*=utf-8'en'%e2%80%a1; filename*1*=%e2%80%A1.bin",
1785 |         );
1786 |         assert_eq!(parsed.params["filename"], "\u{2021}\u{2021}.bin");
1787 |         assert_eq!(parsed.params.contains_key("filename*0*"), false);
1788 |         assert_eq!(parsed.params.contains_key("filename*0"), false);
1789 |         assert_eq!(parsed.params.contains_key("filename*1*"), false);
1790 |         assert_eq!(parsed.params.contains_key("filename*1"), false);
1791 | 
1792 |         let parsed =
1793 |             parse_param_content("attachment; filename*0*=utf-8'en'%e2%80%a1; filename*1=%20.bin");
1794 |         assert_eq!(parsed.params["filename"], "\u{2021}%20.bin");
1795 |         assert_eq!(parsed.params.contains_key("filename*0*"), false);
1796 |         assert_eq!(parsed.params.contains_key("filename*0"), false);
1797 |         assert_eq!(parsed.params.contains_key("filename*1*"), false);
1798 |         assert_eq!(parsed.params.contains_key("filename*1"), false);
1799 | 
1800 |         let parsed =
1801 |             parse_param_content("attachment; filename*0*=utf-8'en'%e2%80%a1; filename*2*=%20.bin");
1802 |         assert_eq!(parsed.params["filename"], "\u{2021}");
1803 |         assert_eq!(parsed.params["filename*2"], " .bin");
1804 |         assert_eq!(parsed.params.contains_key("filename*0*"), false);
1805 |         assert_eq!(parsed.params.contains_key("filename*0"), false);
1806 |         assert_eq!(parsed.params.contains_key("filename*2*"), false);
1807 | 
1808 |         let parsed =
1809 |             parse_param_content("attachment; filename*0*=utf-8'en'%e2%80%a1; filename*0=foo.bin");
1810 |         assert_eq!(parsed.params["filename"], "foo.bin");
1811 |         assert_eq!(parsed.params["filename*0*"], "utf-8'en'%e2%80%a1");
1812 |         assert_eq!(parsed.params.contains_key("filename*0"), false);
1813 |     }
1814 | 
1815 |     #[test]
1816 |     fn test_default_content_encoding() {
1817 |         let mail = parse_mail(b"Content-Type: text/plain; charset=UTF-7\r\n\r\n+JgM-").unwrap();
1818 |         let body = mail.get_body_encoded();
1819 |         match body {
1820 |             Body::SevenBit(body) => {
1821 |                 assert_eq!(body.get_raw(), b"+JgM-");
1822 |                 assert_eq!(body.get_as_string().unwrap(), "\u{2603}");
1823 |             }
1824 |             _ => assert!(false),
1825 |         };
1826 |     }
1827 | 
1828 |     #[test]
1829 |     fn test_7bit_content_encoding() {
1830 |         let mail = parse_mail(b"Content-Type: text/plain; charset=UTF-7\r\nContent-Transfer-Encoding: 7bit\r\n\r\n+JgM-").unwrap();
1831 |         let body = mail.get_body_encoded();
1832 |         match body {
1833 |             Body::SevenBit(body) => {
1834 |                 assert_eq!(body.get_raw(), b"+JgM-");
1835 |                 assert_eq!(body.get_as_string().unwrap(), "\u{2603}");
1836 |             }
1837 |             _ => assert!(false),
1838 |         };
1839 |     }
1840 | 
1841 |     #[test]
1842 |     fn test_8bit_content_encoding() {
1843 |         let mail = parse_mail(b"Content-Type: text/plain; charset=UTF-7\r\nContent-Transfer-Encoding: 8bit\r\n\r\n+JgM-").unwrap();
1844 |         let body = mail.get_body_encoded();
1845 |         match body {
1846 |             Body::EightBit(body) => {
1847 |                 assert_eq!(body.get_raw(), b"+JgM-");
1848 |                 assert_eq!(body.get_as_string().unwrap(), "\u{2603}");
1849 |             }
1850 |             _ => assert!(false),
1851 |         };
1852 |     }
1853 | 
1854 |     #[test]
1855 |     fn test_quoted_printable_content_encoding() {
1856 |         let mail = parse_mail(
1857 |             b"Content-Type: text/plain; charset=UTF-7\r\nContent-Transfer-Encoding: quoted-printable\r\n\r\n+JgM-",
1858 |         ).unwrap();
1859 |         match mail.get_body_encoded() {
1860 |             Body::QuotedPrintable(body) => {
1861 |                 assert_eq!(body.get_raw(), b"+JgM-");
1862 |                 assert_eq!(body.get_decoded().unwrap(), b"+JgM-");
1863 |                 assert_eq!(body.get_decoded_as_string().unwrap(), "\u{2603}");
1864 |             }
1865 |             _ => assert!(false),
1866 |         };
1867 |     }
1868 | 
1869 |     #[test]
1870 |     fn test_base64_content_encoding() {
1871 |         let mail =
1872 |             parse_mail(b"Content-Transfer-Encoding: base64\r\n\r\naGVsbG 8gd\r\n29ybGQ=").unwrap();
1873 |         match mail.get_body_encoded() {
1874 |             Body::Base64(body) => {
1875 |                 assert_eq!(body.get_raw(), b"aGVsbG 8gd\r\n29ybGQ=");
1876 |                 assert_eq!(body.get_decoded().unwrap(), b"hello world");
1877 |                 assert_eq!(body.get_decoded_as_string().unwrap(), "hello world");
1878 |             }
1879 |             _ => assert!(false),
1880 |         };
1881 |     }
1882 | 
1883 |     #[test]
1884 |     fn test_base64_content_encoding_multiple_strings() {
1885 |         let mail = parse_mail(
1886 |             b"Content-Transfer-Encoding: base64\r\n\r\naGVsbG 8gd\r\n29ybGQ=\r\nZm9vCg==",
1887 |         )
1888 |         .unwrap();
1889 |         match mail.get_body_encoded() {
1890 |             Body::Base64(body) => {
1891 |                 assert_eq!(body.get_raw(), b"aGVsbG 8gd\r\n29ybGQ=\r\nZm9vCg==");
1892 |                 assert_eq!(body.get_decoded().unwrap(), b"hello worldfoo\n");
1893 |                 assert_eq!(body.get_decoded_as_string().unwrap(), "hello worldfoo\n");
1894 |             }
1895 |             _ => assert!(false),
1896 |         };
1897 |     }
1898 | 
1899 |     #[test]
1900 |     fn test_binary_content_encoding() {
1901 |         let mail = parse_mail(b"Content-Transfer-Encoding: binary\r\n\r\n######").unwrap();
1902 |         let body = mail.get_body_encoded();
1903 |         match body {
1904 |             Body::Binary(body) => {
1905 |                 assert_eq!(body.get_raw(), b"######");
1906 |             }
1907 |             _ => assert!(false),
1908 |         };
1909 |     }
1910 | 
1911 |     #[test]
1912 |     fn test_body_content_encoding_with_multipart() {
1913 |         let mail_filepath = "./tests/files/test_email_01.txt";
1914 |         let mail = std::fs::read(mail_filepath)
1915 |             .expect(&format!("Unable to open the file [{}]", mail_filepath));
1916 |         let mail = parse_mail(&mail).unwrap();
1917 | 
1918 |         let subpart_0 = mail.subparts.get(0).unwrap();
1919 |         match subpart_0.get_body_encoded() {
1920 |             Body::SevenBit(body) => {
1921 |                 assert_eq!(
1922 |                     body.get_as_string().unwrap().trim(),
1923 |                     "<html>Test with attachments</html>"
1924 |                 );
1925 |             }
1926 |             _ => assert!(false),
1927 |         };
1928 | 
1929 |         let subpart_1 = mail.subparts.get(1).unwrap();
1930 |         match subpart_1.get_body_encoded() {
1931 |             Body::Base64(body) => {
1932 |                 let pdf_filepath = "./tests/files/test_email_01_sample.pdf";
1933 |                 let original_pdf = std::fs::read(pdf_filepath)
1934 |                     .expect(&format!("Unable to open the file [{}]", pdf_filepath));
1935 |                 assert_eq!(body.get_decoded().unwrap(), original_pdf);
1936 |             }
1937 |             _ => assert!(false),
1938 |         };
1939 | 
1940 |         let subpart_2 = mail.subparts.get(2).unwrap();
1941 |         match subpart_2.get_body_encoded() {
1942 |             Body::Base64(body) => {
1943 |                 assert_eq!(
1944 |                     body.get_decoded_as_string().unwrap(),
1945 |                     "txt file context for email collector\n1234567890987654321\n"
1946 |                 );
1947 |             }
1948 |             _ => assert!(false),
1949 |         };
1950 |     }
1951 | 
1952 |     #[test]
1953 |     fn test_fuzzer_testcase() {
1954 |         const INPUT: &str = "U3ViamVjdDplcy1UeXBlOiBtdW50ZW50LVV5cGU6IW11bAAAAAAAAAAAamVjdDplcy1UeXBlOiBtdW50ZW50LVV5cGU6IG11bAAAAAAAAAAAAAAAAABTTUFZdWJqZf86OiP/dCBTdWJqZWN0Ol8KRGF0ZTog/////////////////////wAAAAAAAAAAAHQgYnJmAHQgYnJmZXItRW5jeXBlOnY9NmU3OjA2OgAAAAAAAAAAAAAAADEAAAAAAP/8mAAAAAAAAAAA+f///wAAAAAAAP8AAAAAAAAAAAAAAAAAAAAAAAAAPT0/PzEAAAEAAA==";
1955 | 
1956 |         if let Ok(parsed) = parse_mail(&data_encoding::BASE64.decode(INPUT.as_bytes()).unwrap()) {
1957 |             if let Some(date) = parsed.headers.get_first_value("Date") {
1958 |                 let _ = dateparse(&date);
1959 |             }
1960 |         }
1961 |     }
1962 | 
1963 |     #[test]
1964 |     fn test_fuzzer_testcase_2() {
1965 |         const INPUT: &str = "U3ViamVjdDogVGhpcyBpcyBhIHRlc3QgZW1haWwKQ29udGVudC1UeXBlOiBtdWx0aXBhcnQvYWx0ZXJuYXRpdmU7IGJvdW5kYXJ5PczMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMZm9vYmFyCkRhdGU6IFN1biwgMDIgT2MKCi1TdWJqZWMtZm9vYmFydDo=";
1966 |         if let Ok(parsed) = parse_mail(&data_encoding::BASE64.decode(INPUT.as_bytes()).unwrap()) {
1967 |             if let Some(date) = parsed.headers.get_first_value("Date") {
1968 |                 let _ = dateparse(&date);
1969 |             }
1970 |         }
1971 |     }
1972 | 
1973 |     #[test]
1974 |     fn test_header_split() {
1975 |         let mail = parse_mail(
1976 |             b"Content-Type: text/plain;\r\ncharset=\"utf-8\"\r\nContent-Transfer-Encoding: 8bit\r\n\r\n",
1977 |         ).unwrap();
1978 |         assert_eq!(mail.ctype.mimetype, "text/plain");
1979 |         assert_eq!(mail.ctype.charset, "us-ascii");
1980 |     }
1981 | 
1982 |     #[test]
1983 |     fn test_percent_decoder() {
1984 |         assert_eq!(percent_decode("hi %0d%0A%%2A%zz%"), b"hi \r\n%*%zz%");
1985 |     }
1986 | 
1987 |     #[test]
1988 |     fn test_default_content_type_in_multipart_digest() {
1989 |         // Per https://datatracker.ietf.org/doc/html/rfc2046#section-5.1.5
1990 |         let mail = parse_mail(
1991 |             concat!(
1992 |                 "Content-Type: multipart/digest; boundary=myboundary\r\n\r\n",
1993 |                 "--myboundary\r\n\r\n",
1994 |                 "blah blah blah\r\n\r\n",
1995 |                 "--myboundary--\r\n"
1996 |             )
1997 |             .as_bytes(),
1998 |         )
1999 |         .unwrap();
2000 |         assert_eq!(mail.headers.len(), 1);
2001 |         assert_eq!(mail.ctype.mimetype, "multipart/digest");
2002 |         assert_eq!(mail.subparts[0].headers.len(), 0);
2003 |         assert_eq!(mail.subparts[0].ctype.mimetype, "message/rfc822");
2004 | 
2005 |         let mail = parse_mail(
2006 |             concat!(
2007 |                 "Content-Type: multipart/whatever; boundary=myboundary\n",
2008 |                 "\n",
2009 |                 "--myboundary\n",
2010 |                 "\n",
2011 |                 "blah blah blah\n",
2012 |                 "--myboundary\n",
2013 |                 "Content-Type: multipart/digest; boundary=nestedboundary\n",
2014 |                 "\n",
2015 |                 "--nestedboundary\n",
2016 |                 "\n",
2017 |                 "nested default part\n",
2018 |                 "--nestedboundary\n",
2019 |                 "Content-Type: text/html\n",
2020 |                 "\n",
2021 |                 "nested html part\n",
2022 |                 "--nestedboundary\n",
2023 |                 "Content-Type: multipart/insidedigest; boundary=insideboundary\n",
2024 |                 "\n",
2025 |                 "--insideboundary\n",
2026 |                 "\n",
2027 |                 "inside part\n",
2028 |                 "--insideboundary--\n",
2029 |                 "--nestedboundary--\n",
2030 |                 "--myboundary--\n"
2031 |             )
2032 |             .as_bytes(),
2033 |         )
2034 |         .unwrap();
2035 |         let mut parts = mail.parts();
2036 |         let mut part = parts.next().unwrap(); // mail
2037 | 
2038 |         assert_eq!(part.headers.len(), 1);
2039 |         assert_eq!(part.ctype.mimetype, "multipart/whatever");
2040 | 
2041 |         part = parts.next().unwrap(); // mail.subparts[0]
2042 |         assert_eq!(part.headers.len(), 0);
2043 |         assert_eq!(part.ctype.mimetype, "text/plain");
2044 |         assert_eq!(part.get_body_raw().unwrap(), b"blah blah blah");
2045 | 
2046 |         part = parts.next().unwrap(); // mail.subparts[1]
2047 |         assert_eq!(part.ctype.mimetype, "multipart/digest");
2048 | 
2049 |         part = parts.next().unwrap(); // mail.subparts[1].subparts[0]
2050 |         assert_eq!(part.headers.len(), 0);
2051 |         assert_eq!(part.ctype.mimetype, "message/rfc822");
2052 |         assert_eq!(part.get_body_raw().unwrap(), b"nested default part");
2053 | 
2054 |         part = parts.next().unwrap(); // mail.subparts[1].subparts[1]
2055 |         assert_eq!(part.headers.len(), 1);
2056 |         assert_eq!(part.ctype.mimetype, "text/html");
2057 |         assert_eq!(part.get_body_raw().unwrap(), b"nested html part");
2058 | 
2059 |         part = parts.next().unwrap(); // mail.subparts[1].subparts[2]
2060 |         assert_eq!(part.headers.len(), 1);
2061 |         assert_eq!(part.ctype.mimetype, "multipart/insidedigest");
2062 | 
2063 |         part = parts.next().unwrap(); // mail.subparts[1].subparts[2].subparts[0]
2064 |         assert_eq!(part.headers.len(), 0);
2065 |         assert_eq!(part.ctype.mimetype, "text/plain");
2066 |         assert_eq!(part.get_body_raw().unwrap(), b"inside part");
2067 | 
2068 |         assert!(parts.next().is_none());
2069 |     }
2070 | 
2071 |     #[test]
2072 |     fn boundary_is_suffix_of_another_boundary() {
2073 |         // From https://github.com/staktrace/mailparse/issues/100
2074 |         let mail = parse_mail(
2075 |             concat!(
2076 |                 "Content-Type: multipart/mixed; boundary=\"section_boundary\"\n",
2077 |                 "\n",
2078 |                 "--section_boundary\n",
2079 |                 "Content-Type: multipart/alternative; boundary=\"--section_boundary\"\n",
2080 |                 "\n",
2081 |                 "----section_boundary\n",
2082 |                 "Content-Type: text/html;\n",
2083 |                 "\n",
2084 |                 "<em>Good evening!</em>\n",
2085 |                 "----section_boundary\n",
2086 |                 "Content-Type: text/plain;\n",
2087 |                 "\n",
2088 |                 "Good evening!\n",
2089 |                 "----section_boundary\n",
2090 |                 "--section_boundary\n"
2091 |             )
2092 |             .as_bytes(),
2093 |         )
2094 |         .unwrap();
2095 | 
2096 |         let mut parts = mail.parts();
2097 |         let mut part = parts.next().unwrap(); // mail
2098 | 
2099 |         assert_eq!(part.headers.len(), 1);
2100 |         assert_eq!(part.ctype.mimetype, "multipart/mixed");
2101 |         assert_eq!(part.subparts.len(), 1);
2102 | 
2103 |         part = parts.next().unwrap(); // mail.subparts[0]
2104 |         assert_eq!(part.headers.len(), 1);
2105 |         assert_eq!(part.ctype.mimetype, "multipart/alternative");
2106 |         assert_eq!(part.subparts.len(), 2);
2107 | 
2108 |         part = parts.next().unwrap(); // mail.subparts[0].subparts[0]
2109 |         assert_eq!(part.headers.len(), 1);
2110 |         assert_eq!(part.ctype.mimetype, "text/html");
2111 |         assert_eq!(part.get_body_raw().unwrap(), b"<em>Good evening!</em>");
2112 |         assert_eq!(part.subparts.len(), 0);
2113 | 
2114 |         part = parts.next().unwrap(); // mail.subparts[0].subparts[1]
2115 |         assert_eq!(part.headers.len(), 1);
2116 |         assert_eq!(part.ctype.mimetype, "text/plain");
2117 |         assert_eq!(part.get_body_raw().unwrap(), b"Good evening!");
2118 |         assert_eq!(part.subparts.len(), 0);
2119 | 
2120 |         assert!(parts.next().is_none());
2121 |     }
2122 | 
2123 |     #[test]
2124 |     fn test_parts_iterator() {
2125 |         let mail = parse_mail(
2126 |             concat!(
2127 |                 "Content-Type: multipart/mixed; boundary=\"top_boundary\"\n",
2128 |                 "\n",
2129 |                 "--top_boundary\n",
2130 |                 "Content-Type: multipart/alternative; boundary=\"internal_boundary\"\n",
2131 |                 "\n",
2132 |                 "--internal_boundary\n",
2133 |                 "Content-Type: text/html;\n",
2134 |                 "\n",
2135 |                 "<em>Good evening!</em>\n",
2136 |                 "--internal_boundary\n",
2137 |                 "Content-Type: text/plain;\n",
2138 |                 "\n",
2139 |                 "Good evening!\n",
2140 |                 "--internal_boundary\n",
2141 |                 "--top_boundary\n",
2142 |                 "Content-Type: text/unknown;\n",
2143 |                 "\n",
2144 |                 "You read this?\n",
2145 |                 "--top_boundary\n"
2146 |             )
2147 |             .as_bytes(),
2148 |         )
2149 |         .unwrap();
2150 | 
2151 |         let mut parts = mail.parts();
2152 |         assert_eq!(parts.next().unwrap().ctype.mimetype, "multipart/mixed");
2153 |         assert_eq!(
2154 |             parts.next().unwrap().ctype.mimetype,
2155 |             "multipart/alternative"
2156 |         );
2157 |         assert_eq!(parts.next().unwrap().ctype.mimetype, "text/html");
2158 |         assert_eq!(parts.next().unwrap().ctype.mimetype, "text/plain");
2159 |         assert_eq!(parts.next().unwrap().ctype.mimetype, "text/unknown");
2160 |         assert!(parts.next().is_none());
2161 | 
2162 |         let mail = parse_mail(concat!("Content-Type: text/plain\n").as_bytes()).unwrap();
2163 | 
2164 |         let mut parts = mail.parts();
2165 |         assert_eq!(parts.next().unwrap().ctype.mimetype, "text/plain");
2166 |         assert!(parts.next().is_none());
2167 |     }
2168 | 
2169 |     #[test]
2170 |     fn test_no_parts() {
2171 |         let mail = parse_mail(
2172 |             concat!(
2173 |                 "Content-Type: multipart/mixed; boundary=\"foobar\"\n",
2174 |                 "\n",
2175 |                 "--foobar--\n"
2176 |             )
2177 |             .as_bytes(),
2178 |         )
2179 |         .unwrap();
2180 | 
2181 |         let mut parts = mail.parts();
2182 |         let part = parts.next().unwrap();
2183 |         assert_eq!(part.ctype.mimetype, "multipart/mixed");
2184 | 
2185 |         let part = parts.next().unwrap();
2186 |         assert_eq!(part.ctype.mimetype, "text/plain");
2187 |         assert!(parts.next().is_none());
2188 |     }
2189 | }
2190 | 


--------------------------------------------------------------------------------
/src/msgidparse.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt;
  2 | 
  3 | use crate::MailParseError;
  4 | 
  5 | /// A simple wrapper around `Vec<String>`. This is primarily here so we can
  6 | /// implement the Display trait on it, and allow user code to easily convert
  7 | /// the return value from `msgidparse` back into a string. This also allows
  8 | /// to add additional methods on this type in the future.
  9 | #[derive(Clone, Debug, PartialEq)]
 10 | pub struct MessageIdList(Vec<String>);
 11 | 
 12 | impl std::ops::Deref for MessageIdList {
 13 |     type Target = Vec<String>;
 14 | 
 15 |     fn deref(&self) -> &Vec<String> {
 16 |         &self.0
 17 |     }
 18 | }
 19 | 
 20 | impl std::ops::DerefMut for MessageIdList {
 21 |     fn deref_mut(&mut self) -> &mut Vec<String> {
 22 |         &mut self.0
 23 |     }
 24 | }
 25 | 
 26 | impl fmt::Display for MessageIdList {
 27 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 28 |         let mut first = true;
 29 |         for msgid in self.iter() {
 30 |             if !first {
 31 |                 write!(f, " ")?;
 32 |             }
 33 |             write!(f, "<{}>", msgid)?;
 34 |             first = false;
 35 |         }
 36 |         Ok(())
 37 |     }
 38 | }
 39 | 
 40 | /// Parse an email header into a structured type holding a list of message ids.
 41 | /// This function can be used to parse headers containing message IDs, such as
 42 | /// `Message-ID`, `In-Reply-To`, and `References`.
 43 | /// This function is currently mostly trivial (splits on whitespace and strips
 44 | /// angle-brackets) but may be enhanced in the future to strip comments (which
 45 | /// are technically allowed by the RFCs but never really used in practice).
 46 | ///
 47 | /// # Examples
 48 | /// ```
 49 | ///     use mailparse::{msgidparse, MessageIdList};
 50 | ///     let parsed_ids = msgidparse("<msg_one@foo.com>  <msg_two@bar.com>").unwrap();
 51 | ///     assert_eq!(parsed_ids[0], "msg_one@foo.com");
 52 | ///     assert_eq!(parsed_ids[1], "msg_two@bar.com");
 53 | /// ```
 54 | pub fn msgidparse(ids: &str) -> Result<MessageIdList, MailParseError> {
 55 |     let mut msgids = Vec::new();
 56 | 
 57 |     // The remaining section of the header, not yet chomped
 58 |     let mut remaining = ids.trim_start();
 59 |     // While we have some value of the header remaining
 60 |     while !remaining.is_empty() {
 61 |         // The next character should be the start of a Message ID
 62 |         if !remaining.starts_with('<') {
 63 |             return Err(MailParseError::Generic("Message IDs must start with <"));
 64 |         }
 65 |         // The ID ends at the next '>'
 66 |         let end_index = remaining
 67 |             .find('>')
 68 |             .ok_or(MailParseError::Generic("Message IDs must end with >"))?;
 69 |         msgids.push(remaining[1..end_index].to_string());
 70 | 
 71 |         // Chomp the part of the string we just processed, and any trailing whitespace
 72 |         remaining = remaining[end_index + 1..].trim_start();
 73 |     }
 74 |     Ok(MessageIdList(msgids))
 75 | }
 76 | 
 77 | #[cfg(test)]
 78 | mod tests {
 79 |     use super::*;
 80 | 
 81 |     #[test]
 82 |     fn parse_message_ids() {
 83 |         assert_eq!(
 84 |             msgidparse("").expect("Empty string"),
 85 |             MessageIdList(Vec::new())
 86 |         );
 87 |         assert_eq!(
 88 |             msgidparse("<msg_one@foo.com>").expect("Single reference"),
 89 |             MessageIdList(vec!["msg_one@foo.com".to_string()])
 90 |         );
 91 |         assert_eq!(
 92 |             msgidparse(" <msg_one@foo.com>").expect("Single reference, leading whitespace"),
 93 |             MessageIdList(vec!["msg_one@foo.com".to_string()])
 94 |         );
 95 |         assert_eq!(
 96 |             msgidparse("<msg_one@foo.com> ").expect("Single reference, trailing whitespace"),
 97 |             MessageIdList(vec!["msg_one@foo.com".to_string()])
 98 |         );
 99 |         assert_eq!(
100 |             msgidparse("<msg_one@foo.com> <msg_two@bar.com>")
101 |                 .expect("Multiple references separated by space"),
102 |             MessageIdList(vec![
103 |                 "msg_one@foo.com".to_string(),
104 |                 "msg_two@bar.com".to_string(),
105 |             ])
106 |         );
107 |         assert_eq!(
108 |             msgidparse("\n<msg_one@foo.com> <msg_two@bar.com>\t<msg_three@qux.com>\r ")
109 |                 .expect("Multiple references separated by various whitespace"),
110 |             MessageIdList(vec![
111 |                 "msg_one@foo.com".to_string(),
112 |                 "msg_two@bar.com".to_string(),
113 |                 "msg_three@qux.com".to_string(),
114 |             ])
115 |         );
116 | 
117 |         // Non whitespace separator tests
118 |         assert_eq!(
119 |             msgidparse("<msg_one@foo.com><msg_two@bar.com>")
120 |                 .expect("Multiple references, no whitespace"),
121 |             MessageIdList(vec![
122 |                 "msg_one@foo.com".to_string(),
123 |                 "msg_two@bar.com".to_string(),
124 |             ])
125 |         );
126 |         assert_eq!(
127 |             msgidparse("<msg_one@foo.com><msg_two@bar.com> <msg_three@spam.com> ")
128 |                 .expect("Mixed whitespace/non-whitespace separator"),
129 |             MessageIdList(vec![
130 |                 "msg_one@foo.com".to_string(),
131 |                 "msg_two@bar.com".to_string(),
132 |                 "msg_three@spam.com".to_string(),
133 |             ])
134 |         );
135 |     }
136 | }
137 | 


--------------------------------------------------------------------------------
/tests/files/test_email_01.txt:
--------------------------------------------------------------------------------
 1 | Subject: Test with attachments
 2 | Content-Type: multipart/mixed;
 3 |  boundary="------------E5401F4DD68F2F7A872C2A83"
 4 | Content-Language: en-US
 5 | 
 6 | This is a multi-part message in MIME format.
 7 | --------------E5401F4DD68F2F7A872C2A83
 8 | Content-Type: text/html; charset=utf-8
 9 | Content-Transfer-Encoding: 7bit
10 | 
11 | <html>Test with attachments</html>
12 | 
13 | --------------E5401F4DD68F2F7A872C2A83
14 | Content-Type: application/pdf;
15 |  name="sample.pdf"
16 | Content-Transfer-Encoding: base64
17 | Content-Disposition: attachment;
18 |  filename="sample.pdf"
19 | 
20 | JVBERi0xLjMNCiXi48/TDQoNCjEgMCBvYmoNCjw8DQovVHlwZSAvQ2F0YWxvZw0KL091dGxp
21 | bmVzIDIgMCBSDQovUGFnZXMgMyAwIFINCj4+DQplbmRvYmoNCg0KMiAwIG9iag0KPDwNCi9U
22 | eXBlIC9PdXRsaW5lcw0KL0NvdW50IDANCj4+DQplbmRvYmoNCg0KMyAwIG9iag0KPDwNCi9U
23 | eXBlIC9QYWdlcw0KL0NvdW50IDINCi9LaWRzIFsgNCAwIFIgNiAwIFIgXSANCj4+DQplbmRv
24 | YmoNCg0KNCAwIG9iag0KPDwNCi9UeXBlIC9QYWdlDQovUGFyZW50IDMgMCBSDQovUmVzb3Vy
25 | Y2VzIDw8DQovRm9udCA8PA0KL0YxIDkgMCBSIA0KPj4NCi9Qcm9jU2V0IDggMCBSDQo+Pg0K
26 | L01lZGlhQm94IFswIDAgNjEyLjAwMDAgNzkyLjAwMDBdDQovQ29udGVudHMgNSAwIFINCj4+
27 | DQplbmRvYmoNCg0KNSAwIG9iag0KPDwgL0xlbmd0aCAxMDc0ID4+DQpzdHJlYW0NCjIgSg0K
28 | QlQNCjAgMCAwIHJnDQovRjEgMDAyNyBUZg0KNTcuMzc1MCA3MjIuMjgwMCBUZA0KKCBBIFNp
29 | bXBsZSBQREYgRmlsZSApIFRqDQpFVA0KQlQNCi9GMSAwMDEwIFRmDQo2OS4yNTAwIDY4OC42
30 | MDgwIFRkDQooIFRoaXMgaXMgYSBzbWFsbCBkZW1vbnN0cmF0aW9uIC5wZGYgZmlsZSAtICkg
31 | VGoNCkVUDQpCVA0KL0YxIDAwMTAgVGYNCjY5LjI1MDAgNjY0LjcwNDAgVGQNCigganVzdCBm
32 | b3IgdXNlIGluIHRoZSBWaXJ0dWFsIE1lY2hhbmljcyB0dXRvcmlhbHMuIE1vcmUgdGV4dC4g
33 | QW5kIG1vcmUgKSBUag0KRVQNCkJUDQovRjEgMDAxMCBUZg0KNjkuMjUwMCA2NTIuNzUyMCBU
34 | ZA0KKCB0ZXh0LiBBbmQgbW9yZSB0ZXh0LiBBbmQgbW9yZSB0ZXh0LiBBbmQgbW9yZSB0ZXh0
35 | LiApIFRqDQpFVA0KQlQNCi9GMSAwMDEwIFRmDQo2OS4yNTAwIDYyOC44NDgwIFRkDQooIEFu
36 | ZCBtb3JlIHRleHQuIEFuZCBtb3JlIHRleHQuIEFuZCBtb3JlIHRleHQuIEFuZCBtb3JlIHRl
37 | eHQuIEFuZCBtb3JlICkgVGoNCkVUDQpCVA0KL0YxIDAwMTAgVGYNCjY5LjI1MDAgNjE2Ljg5
38 | NjAgVGQNCiggdGV4dC4gQW5kIG1vcmUgdGV4dC4gQm9yaW5nLCB6enp6ei4gQW5kIG1vcmUg
39 | dGV4dC4gQW5kIG1vcmUgdGV4dC4gQW5kICkgVGoNCkVUDQpCVA0KL0YxIDAwMTAgVGYNCjY5
40 | LjI1MDAgNjA0Ljk0NDAgVGQNCiggbW9yZSB0ZXh0LiBBbmQgbW9yZSB0ZXh0LiBBbmQgbW9y
41 | ZSB0ZXh0LiBBbmQgbW9yZSB0ZXh0LiBBbmQgbW9yZSB0ZXh0LiApIFRqDQpFVA0KQlQNCi9G
42 | MSAwMDEwIFRmDQo2OS4yNTAwIDU5Mi45OTIwIFRkDQooIEFuZCBtb3JlIHRleHQuIEFuZCBt
43 | b3JlIHRleHQuICkgVGoNCkVUDQpCVA0KL0YxIDAwMTAgVGYNCjY5LjI1MDAgNTY5LjA4ODAg
44 | VGQNCiggQW5kIG1vcmUgdGV4dC4gQW5kIG1vcmUgdGV4dC4gQW5kIG1vcmUgdGV4dC4gQW5k
45 | IG1vcmUgdGV4dC4gQW5kIG1vcmUgKSBUag0KRVQNCkJUDQovRjEgMDAxMCBUZg0KNjkuMjUw
46 | MCA1NTcuMTM2MCBUZA0KKCB0ZXh0LiBBbmQgbW9yZSB0ZXh0LiBBbmQgbW9yZSB0ZXh0LiBF
47 | dmVuIG1vcmUuIENvbnRpbnVlZCBvbiBwYWdlIDIgLi4uKSBUag0KRVQNCmVuZHN0cmVhbQ0K
48 | ZW5kb2JqDQoNCjYgMCBvYmoNCjw8DQovVHlwZSAvUGFnZQ0KL1BhcmVudCAzIDAgUg0KL1Jl
49 | c291cmNlcyA8PA0KL0ZvbnQgPDwNCi9GMSA5IDAgUiANCj4+DQovUHJvY1NldCA4IDAgUg0K
50 | Pj4NCi9NZWRpYUJveCBbMCAwIDYxMi4wMDAwIDc5Mi4wMDAwXQ0KL0NvbnRlbnRzIDcgMCBS
51 | DQo+Pg0KZW5kb2JqDQoNCjcgMCBvYmoNCjw8IC9MZW5ndGggNjc2ID4+DQpzdHJlYW0NCjIg
52 | Sg0KQlQNCjAgMCAwIHJnDQovRjEgMDAyNyBUZg0KNTcuMzc1MCA3MjIuMjgwMCBUZA0KKCBT
53 | aW1wbGUgUERGIEZpbGUgMiApIFRqDQpFVA0KQlQNCi9GMSAwMDEwIFRmDQo2OS4yNTAwIDY4
54 | OC42MDgwIFRkDQooIC4uLmNvbnRpbnVlZCBmcm9tIHBhZ2UgMS4gWWV0IG1vcmUgdGV4dC4g
55 | QW5kIG1vcmUgdGV4dC4gQW5kIG1vcmUgdGV4dC4gKSBUag0KRVQNCkJUDQovRjEgMDAxMCBU
56 | Zg0KNjkuMjUwMCA2NzYuNjU2MCBUZA0KKCBBbmQgbW9yZSB0ZXh0LiBBbmQgbW9yZSB0ZXh0
57 | LiBBbmQgbW9yZSB0ZXh0LiBBbmQgbW9yZSB0ZXh0LiBBbmQgbW9yZSApIFRqDQpFVA0KQlQN
58 | Ci9GMSAwMDEwIFRmDQo2OS4yNTAwIDY2NC43MDQwIFRkDQooIHRleHQuIE9oLCBob3cgYm9y
59 | aW5nIHR5cGluZyB0aGlzIHN0dWZmLiBCdXQgbm90IGFzIGJvcmluZyBhcyB3YXRjaGluZyAp
60 | IFRqDQpFVA0KQlQNCi9GMSAwMDEwIFRmDQo2OS4yNTAwIDY1Mi43NTIwIFRkDQooIHBhaW50
61 | IGRyeS4gQW5kIG1vcmUgdGV4dC4gQW5kIG1vcmUgdGV4dC4gQW5kIG1vcmUgdGV4dC4gQW5k
62 | IG1vcmUgdGV4dC4gKSBUag0KRVQNCkJUDQovRjEgMDAxMCBUZg0KNjkuMjUwMCA2NDAuODAw
63 | MCBUZA0KKCBCb3JpbmcuICBNb3JlLCBhIGxpdHRsZSBtb3JlIHRleHQuIFRoZSBlbmQsIGFu
64 | ZCBqdXN0IGFzIHdlbGwuICkgVGoNCkVUDQplbmRzdHJlYW0NCmVuZG9iag0KDQo4IDAgb2Jq
65 | DQpbL1BERiAvVGV4dF0NCmVuZG9iag0KDQo5IDAgb2JqDQo8PA0KL1R5cGUgL0ZvbnQNCi9T
66 | dWJ0eXBlIC9UeXBlMQ0KL05hbWUgL0YxDQovQmFzZUZvbnQgL0hlbHZldGljYQ0KL0VuY29k
67 | aW5nIC9XaW5BbnNpRW5jb2RpbmcNCj4+DQplbmRvYmoNCg0KMTAgMCBvYmoNCjw8DQovQ3Jl
68 | YXRvciAoUmF2ZSBcKGh0dHA6Ly93d3cubmV2cm9uYS5jb20vcmF2ZVwpKQ0KL1Byb2R1Y2Vy
69 | IChOZXZyb25hIERlc2lnbnMpDQovQ3JlYXRpb25EYXRlIChEOjIwMDYwMzAxMDcyODI2KQ0K
70 | Pj4NCmVuZG9iag0KDQp4cmVmDQowIDExDQowMDAwMDAwMDAwIDY1NTM1IGYNCjAwMDAwMDAw
71 | MTkgMDAwMDAgbg0KMDAwMDAwMDA5MyAwMDAwMCBuDQowMDAwMDAwMTQ3IDAwMDAwIG4NCjAw
72 | MDAwMDAyMjIgMDAwMDAgbg0KMDAwMDAwMDM5MCAwMDAwMCBuDQowMDAwMDAxNTIyIDAwMDAw
73 | IG4NCjAwMDAwMDE2OTAgMDAwMDAgbg0KMDAwMDAwMjQyMyAwMDAwMCBuDQowMDAwMDAyNDU2
74 | IDAwMDAwIG4NCjAwMDAwMDI1NzQgMDAwMDAgbg0KDQp0cmFpbGVyDQo8PA0KL1NpemUgMTEN
75 | Ci9Sb290IDEgMCBSDQovSW5mbyAxMCAwIFINCj4+DQoNCnN0YXJ0eHJlZg0KMjcxNA0KJSVF
76 | T0YNCg==
77 | --------------E5401F4DD68F2F7A872C2A83
78 | Content-Type: text/plain; charset=UTF-8;
79 |  name="sample.txt"
80 | Content-Transfer-Encoding: base64
81 | Content-Disposition: attachment;
82 |  filename="sample.txt"
83 | 
84 | dHh0IGZpbGUgY29udGV4dCBmb3IgZW1haWwgY29sbGVjdG9yCjEyMzQ1Njc4OTA5ODc2NTQz
85 | MjEK
86 | --------------E5401F4DD68F2F7A872C2A83--
87 | 


--------------------------------------------------------------------------------
/tests/files/test_email_01_sample.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/staktrace/mailparse/85b1591b364688a5fc440009ecdcde632f3ceb27/tests/files/test_email_01_sample.pdf


--------------------------------------------------------------------------------