├── .eslintignore
├── index.js
├── .gitignore
├── test
    ├── data
    │   ├── basic1
    │   ├── charsets
    │   ├── shift-jis-image
    │   ├── multipart4
    │   ├── base64-2
    │   ├── multipart1
    │   ├── multipart-base64-3
    │   ├── multipart-base64-1
    │   ├── multipart-base64-2
    │   ├── multipart-binary
    │   ├── multipart2
    │   ├── multipart3
    │   ├── base64-1
    │   ├── multipart-addresses
    │   ├── multipart-addresses-groups
    │   ├── message-encoded
    │   ├── multipart-empty-attachment
    │   ├── multipart-complex1
    │   ├── multipart-encrypted-subject-utf8
    │   ├── multipart-content-id
    │   ├── multipart-complex2
    │   ├── multipartmalt-detach
    │   ├── bug505221
    │   └── bugmail11
    ├── test_custom_headers.js
    ├── test_structured_header_emitters.js
    ├── utils.js
    ├── test_mail_parser.ts
    ├── test_structured_headers.js
    ├── test_header_emitter.js
    └── test_mime_tree.js
├── lib
    ├── jsmime.js
    ├── textDecoders.js
    ├── utils.js
    ├── structuredHeaders.js
    ├── mailParser.js
    └── headerEmitter.js
├── .github
    ├── dependabot.yml
    └── workflows
    │   └── tests.yml
├── docs
    ├── Developing.mkd
    └── RelatedSpecifications.mkd
├── index.d.ts
├── LICENSE
├── package.json
├── .eslintrc.json
├── karma.conf.js
└── README.md


/.eslintignore:
--------------------------------------------------------------------------------
1 | index.d.ts


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | export { parseMail } from './lib/mailParser';
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | node_modules
3 | test/.DS_Store
4 | .DS_Store
5 | 


--------------------------------------------------------------------------------
/test/data/basic1:
--------------------------------------------------------------------------------
1 | Content-Type: text/plain; charset=iso-8859-1
2 | 
3 | Hello, world!
4 | 


--------------------------------------------------------------------------------
/test/data/charsets:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/protonmail/jsmimeparser/main/test/data/charsets


--------------------------------------------------------------------------------
/test/data/shift-jis-image:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/protonmail/jsmimeparser/main/test/data/shift-jis-image


--------------------------------------------------------------------------------
/test/data/multipart4:
--------------------------------------------------------------------------------
1 | Content-Type: multipart/mixed; boundary=boundary
2 | 
3 | --boundary
4 | 
5 | This has no headers, so should be recognized as plain text.
6 | 
7 | --boundary--
8 | 


--------------------------------------------------------------------------------
/lib/jsmime.js:
--------------------------------------------------------------------------------
1 | export { default as MimeParser } from './rawMimeParser';
2 | export { default as headerparser } from './headerParser';
3 | import * as headeremitter from './headerEmitter';
4 | 
5 | export { headeremitter };
6 | 


--------------------------------------------------------------------------------
/test/data/base64-2:
--------------------------------------------------------------------------------
1 | Content-Type: text/html; encoding=iso-8859-1
2 | Content-Transfer-Encoding: base64
3 | 
4 | PGh0bWw+PGJvZHk+VGhpcyBpcyBiYXNlNjQgZW5jb2RlZCBIVE1MIHRleHQsIGFuZCB0aGUgdGFncyB
5 | zaG91bGRuJ3QgYmUgc3RyaXBwZWQuDQo8Yj5Cb2xkIHRleHQgaXMgYm9sZCE8L2I+PC9ib2R5PjwvaH
6 | RtbD4NCg==
7 | 


--------------------------------------------------------------------------------
/test/data/multipart1:
--------------------------------------------------------------------------------
 1 | Content-Type: multipart/mixed;
 2 |  boundary="boundary"
 3 | 
 4 | This is a text message in MIME format.
 5 | This part shouldn't appear in the output.
 6 | 
 7 | --boundary
 8 | Content-Type: text/plain
 9 | 
10 | Hello, world! (yet again...)
11 | 
12 | --boundary--
13 | 


--------------------------------------------------------------------------------
/test/data/multipart-base64-3:
--------------------------------------------------------------------------------
 1 | Content-Type: multipart/mixed; boundary=boundary
 2 | 
 3 | etc etc
 4 | --boundary
 5 | Content-Type: text/html
 6 | Content-Transfer-Encoding: base64
 7 | 
 8 | PGh0bWw+PGhlYWQ+VGhpcyB0aW1lLCB0aGUgdGFncw0Kc2hvdWxkIGJlIHN0cmlwcGVkIG91dC48L2hlYWQ+PC9odG1sPg==
 9 | 
10 | --boundary--
11 | 


--------------------------------------------------------------------------------
/test/data/multipart-base64-1:
--------------------------------------------------------------------------------
 1 | Content-Type: multipart/mixed; boundary=boundary
 2 | Content-Transfer-Encoding: base64
 3 | 
 4 | This part shouldn't appear
 5 | --boundary
 6 | Content-Type: text/plain
 7 | Content-Transfer-Encoding: base64
 8 | 
 9 | TXVsdGlwYXJ0IGJhc2U2NCBlbmNvZGVkIHRleHQu
10 | 
11 | --boundary--
12 | 


--------------------------------------------------------------------------------
/test/data/multipart-base64-2:
--------------------------------------------------------------------------------
 1 | Content-Type: multipart/mixed; boundary=boundary
 2 | 
 3 | yadda yadda
 4 | 
 5 | --boundary
 6 | Content-Type: text/html
 7 | Content-Transfer-Encoding: base64
 8 | 
 9 | PGh0bWw+PGhlYWQ+YmFzZTY0ZW5jb2RlZCBIVE1MIHRleHQgaW5zaWRlIGEgbXVsdGlwYXJ0IG1lc3N
10 | hZ2UuPC9oZWFkPjwvaHRtbD4=
11 | 
12 | --boundary--
13 | 


--------------------------------------------------------------------------------
/test/data/multipart-binary:
--------------------------------------------------------------------------------
 1 | Subject: binary attachment
 2 | Content-Type: multipart/mixed; boundary="vungrzvzr"
 3 | 
 4 | --vungrzvzr
 5 | Content-Type: text/plain;
 6 | Content-Transfer-Encoding: 8bit
 7 | 
 8 | see binary attachment
 9 | 
10 | --vungrzvzr
11 | Content-Type: application/octect-stream
12 | Content-Transfer-Encoding: binary
13 | 
14 | 
15 | --vungrzvzr--
16 | 


--------------------------------------------------------------------------------
/test/data/multipart2:
--------------------------------------------------------------------------------
 1 | Content-Type: multipart/mixed; boundary=boundary
 2 | 
 3 | This is a text/html message. This part shouldn't appear at all!
 4 | 
 5 | --boundary
 6 | Content-Type: text/html
 7 | 
 8 | <html><body>Multipart HTML message with just a single part!
 9 | </body></html>
10 | 
11 | --boundary--
12 | 
13 | Actually, this part shouldn't appear either.
14 | 


--------------------------------------------------------------------------------
/test/data/multipart3:
--------------------------------------------------------------------------------
 1 | Content-Type: multipart/mixed; boundary=boundary
 2 | 
 3 | --boundary
 4 | Content-Type: text/html
 5 | 
 6 | <html>
 7 | 
 8 | 
 9 | <body>
10 | 
11 | 
12 | 
13 | Here, the HTML tags should be stripped out.
14 | 
15 | 
16 | 
17 | </body>
18 | 
19 | 
20 | 
21 | </html>
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | --boundary--
30 | 


--------------------------------------------------------------------------------
/test/data/base64-1:
--------------------------------------------------------------------------------
1 | Content-Type: text/plain; charset=iso-8859-1
2 | Content-Transfer-Encoding: base64
3 | 
4 | DQpIZWxsbywgd29ybGQhIChBZ2Fpbi4uLikNCg0KTGV0J3Mgc2VlIGhvdyB3ZWxsIGJhc2U2NCB0ZXh
5 | 0IGlzIGhhbmRsZWQuICAgICAgICAgICAgICAgICAgICAgICAgICAgIFlheSwgbG90cyBvZiBzcGFjZX
6 | MhIFRoZXJlJ3MgZXZlbiBhIENSTEYgYXQgdGhlIGVuZCBhbmQgb25lIGF0IHRoZSBiZWdpbm5pbmcsI
7 | GJ1dCB0aGUgb3V0cHV0IHNob3VsZG4ndCBoYXZlIGl0Lg0K
8 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "npm"
 4 |     directory: "/"
 5 |     schedule:
 6 |       interval: "daily"
 7 |     allow:
 8 |       - dependency-name: "playwright"
 9 |     versioning-strategy: increase
10 |     ignore:
11 |       - dependency-name: "playwright"
12 |         update-types: ["version-update:semver-patch"] # patches do not include browser version updates
13 | 


--------------------------------------------------------------------------------
/docs/Developing.mkd:
--------------------------------------------------------------------------------
1 | Developing
2 | ==========
3 | 
4 | JSMime is intended to rely only on HTML 5 Web APIs and ES 6 as external
5 | requirements for its development. However, these are still a work in progress,
6 | and thus support for them may vary from browser to browser or among different
7 | rendering engines. Polyfilling may be necessary to get this to work, but
8 | polyfills should only be present in test framework code and not in the main source or test files themselves.
9 | 


--------------------------------------------------------------------------------
/test/data/multipart-addresses:
--------------------------------------------------------------------------------
 1 | From: Some One <someone@test.com>
 2 | To: receiver@test.com, another_receiver@test.com
 3 | Cc: copy@test.com
 4 | Date: Sun, 12 Jun 2022 17:21:02 +0200
 5 | MIME-Version: 1.0
 6 | Content-Type: multipart/mixed;
 7 |         boundary="XXXXboundary text"
 8 | 
 9 | This is a multipart message in MIME format.
10 | 
11 | --XXXXboundary text
12 | Content-Type: text/plain
13 | 
14 | this is the body text
15 | 
16 | --XXXXboundary text
17 | Content-Type: text/plain;
18 | Content-Disposition: attachment;
19 |         filename="test.txt"
20 | 
21 | this is the attachment text
22 | 
23 | --XXXXboundary text--


--------------------------------------------------------------------------------
/test/data/multipart-addresses-groups:
--------------------------------------------------------------------------------
 1 | From: Some One <someone@test.com>
 2 | To: undisclosed-recipients: ;
 3 | Cc: Group A: AA <a@b.com>, AB <a@b.com>;, Group B: b@b.com
 4 | Date: Sun, 12 Jun 2022 17:21:02 +0200
 5 | MIME-Version: 1.0
 6 | Content-Type: multipart/mixed;
 7 |         boundary="XXXXboundary text"
 8 | 
 9 | This is a multipart message in MIME format.
10 | 
11 | --XXXXboundary text
12 | Content-Type: text/plain
13 | 
14 | this is the body text
15 | 
16 | --XXXXboundary text
17 | Content-Type: text/plain;
18 | Content-Disposition: attachment;
19 |         filename="test.txt"
20 | 
21 | this is the attachment text
22 | 
23 | --XXXXboundary text--


--------------------------------------------------------------------------------
/test/data/message-encoded:
--------------------------------------------------------------------------------
 1 | Content-Type: multipart/mixed; boundary="iamaboundary"
 2 | 
 3 | This is a text message in MIME format.
 4 | This part shouldn't appear in the output.
 5 | 
 6 | --iamaboundary
 7 | Content-Type: message/rfc822
 8 | 
 9 | Subject: I am a subject
10 | 
11 | This is a plain-text message.
12 | --iamaboundary
13 | Content-Type: message/global
14 | Content-Transfer-Encoding: base64
15 | 
16 | U3ViamVjdDog56eB44Gv44CB5Lu25ZCN5Y2I5YmNDQoNCkkgYW0gYSBwbGFpbi10ZXh0IG1lc3NhZ2Uu
17 | --iamaboundary
18 | Content-Type: message/news
19 | Content-Transfer-Encoding: quoted-printable
20 | 
21 | Subject: =e7=a7=81=e3=81=af=e3=80=81=e4=bb=b6=e5=90=8d=e5=8d=88=e5=89=8d
22 | 
23 | I am an encoded plain-text message.
24 | --iamaboundary--
25 | 


--------------------------------------------------------------------------------
/test/data/multipart-empty-attachment:
--------------------------------------------------------------------------------
 1 | Content-Type: multipart/mixed; protected-headers="v1"; boundary="===============3607788715713061906=="
 2 | MIME-Version: 1.0
 3 | From: sender <sender@test.com>
 4 | Subject: encryption test
 5 | To: receiver@test.com
 6 | Date: Sun, 12 Jun 2022 17:21:02 +0200
 7 | Content-Language: en-US
 8 | 
 9 | --===============3607788715713061906==
10 | Content-Type: text/rfc822-headers; protected-headers="v1"
11 | Content-Disposition: inline
12 | Subject: encryption test
13 | To: receiver@test.com
14 | From: sender <sender@test.com>
15 | Date: Sun, 12 Jun 2022 17:21:02 +0200
16 | 
17 | 
18 | --===============3607788715713061906==
19 | Content-Type: text/plain; charset=utf-8
20 | Content-Transfer-Encoding: 7bit
21 | 
22 | test body
23 | 
24 | --===============3607788715713061906==--


--------------------------------------------------------------------------------
/test/data/multipart-complex1:
--------------------------------------------------------------------------------
 1 | Content-Type: multipart/mixed; boundary="boundary"
 2 | 
 3 | This shouldn't appear.
 4 | --boundary
 5 | Content-Type: application/octet-stream
 6 | Content-Transfer-Encoding: base64
 7 | 
 8 | VGhpcyBpc24ndCByZWFsbHkgYW4gYXBwbGljYXRpb24vb2N0ZXQtc3RyZWFtLiA7KQ=='
 9 | 
10 | --boundary
11 | Content-Type: image/png
12 | Content-Transfer-Encoding: base64
13 | 
14 | TmVpdGhlciBpcyB0aGlzIGFuIGltYWdlL3BuZy4=
15 | 
16 | --boundary
17 | Content-Type: multipart/related; boundary="boundary2"
18 | 
19 | --boundary2
20 | Content-Type: text/html
21 | 
22 | <html><head>This part should be returned.</head></html>
23 | 
24 | --boundary2--
25 | 
26 | --boundary
27 | Content-Type: text/plain
28 | 
29 | This part shouldn't.
30 | 
31 | --boundary
32 | 
33 | Neither should this part!
34 | 
35 | --boundary--
36 | 


--------------------------------------------------------------------------------
/index.d.ts:
--------------------------------------------------------------------------------
 1 | export type Headers = { [key: string]: string[] };
 2 | 
 3 | interface Attachment {
 4 |   content: Uint8Array<ArrayBuffer>;
 5 |   headers: Headers;
 6 |   size: number;
 7 |   fileName?: string;
 8 |   contentType?: string;
 9 |   contentDisposition?: string;
10 |   contentId?: string;
11 | }
12 | 
13 | type Address = { name: string, email: string };
14 | type Group = { name: string, group: Address[] };
15 | type AddressOrGroup = Address | Group;
16 | 
17 | export interface ParsedMessage {
18 |   attachments: Attachment[];
19 |   headers: Headers;
20 |   body: {
21 |     html: string | null; // 'text/html' body parts, joined together separated by <br>\n
22 |     text: string | null; // 'text/plain' body parts, joined together separated by \n
23 |   },
24 |   date?: Date;
25 |   subject?: string,
26 |   from?: Address,
27 |   to?: AddressOrGroup[],
28 |   cc?: AddressOrGroup[],
29 |   bcc?: AddressOrGroup[],
30 |   'reply-to'?: Address
31 | }
32 | 
33 | export function parseMail(message: string | Uint8Array): ParsedMessage;
34 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches: [main]
 4 |   pull_request:
 5 |     branches: [main]
 6 | 
 7 | jobs:
 8 |   e2e-tests:
 9 |     name: Tests
10 |     strategy:
11 |       fail-fast: false # if tests for one version fail, continue with the rest
12 |       matrix:
13 |         # run on multiple platforms to test platform-specific code, if present
14 |         # (e.g. webkit's WebCrypto API implementation is different in macOS vs Linux)
15 |         runner: ['ubuntu-latest', 'macos-latest', 'windows-latest']
16 |     runs-on: ${{ matrix.runner }} 
17 |     
18 |     steps:
19 |       - uses: actions/checkout@v4
20 |       - uses: actions/setup-node@v4
21 | 
22 |       - name: Install dependencies
23 |         run: npm ci
24 | 
25 |       - name: Install Chrome
26 |         run: npx playwright install --with-deps chromium
27 | 
28 |       - name: Install Firefox
29 |         run: npx playwright install --with-deps firefox
30 | 
31 |       - name: Install Webkit
32 |         run: npx playwright install --with-deps webkit
33 | 
34 |       - name: Run tests
35 |         run: npm test


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013 Joshua Cranmer
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/test/data/multipart-encrypted-subject-utf8:
--------------------------------------------------------------------------------
 1 | Content-Type: multipart/mixed; boundary="------------7VgK7B2dk0pUYjHBY0Zi2Fda";
 2 |  protected-headers="v1"
 3 | Subject: =?UTF-8?B?c3ViamVjdCB3aXRoIGVtb2ppcyDwn5iD8J+Yhw==?=
 4 | From: Sender <sender@example.com>
 5 | To: receiver@example.com
 6 | Message-ID: <7daafa18-8595-8065-3eba-b08c07becf36@example.com>
 7 | 
 8 | --------------7VgK7B2dk0pUYjHBY0Zi2Fda
 9 | Content-Type: multipart/mixed; boundary="------------D5jH01SvFZAwYShsjQamYW8w"
10 | 
11 | --------------D5jH01SvFZAwYShsjQamYW8w
12 | Content-Type: text/plain; charset=UTF-8; format=flowed
13 | Content-Transfer-Encoding: base64
14 | 
15 | dGVzdCB1dGY4IGluIGVuY3J5cHRlZCBzdWJqZWN0DQo=
16 | --------------D5jH01SvFZAwYShsjQamYW8w
17 | Content-Type: application/pgp-keys; name="OpenPGP_0xabc.asc"
18 | Content-Disposition: attachment; filename="OpenPGP_0xabc.asc"
19 | Content-Description: OpenPGP public key
20 | Content-Transfer-Encoding: quoted-printable
21 | 
22 | -----BEGIN PGP PUBLIC KEY BLOCK-----
23 | 
24 | ...
25 | -----END PGP PUBLIC KEY BLOCK-----
26 | 
27 | --------------D5jH01SvFZAwYShsjQamYW8w--
28 | 
29 | --------------7VgK7B2dk0pUYjHBY0Zi2Fda--
30 | 


--------------------------------------------------------------------------------
/test/data/multipart-content-id:
--------------------------------------------------------------------------------
 1 | Subject: attachment with ContentID
 2 | Content-Type: multipart/mixed; boundary="vungrzvzr"
 3 | 
 4 | --vungrzvzr
 5 | Content-Type: text/plain;
 6 | Content-Transfer-Encoding: 8bit
 7 | 
 8 | the first attachment has a Content-ID set, the second does not.
 9 | 
10 | --vungrzvzr
11 | Content-Type: image/png
12 | Content-Transfer-Encoding: base64
13 | Content-ID: <001110.102211@siebel.com>
14 | 
15 | iVBORw0KGgoAAAANSUhEUgAAAIAAAABECAIAAADGJao+AAAAwklEQVR4Xu3UgQbDMBRA0bc03f//
16 | b7N0VuqJEmwoc+KqNEkDh9b+2HuJu1KNO4f+AQCAAAAQAAACAEAAAAgAAAEAIAAABACAAAAQAAAC
17 | AEAAAAgAAAEAIAAAANReamRLlPWYfNH0klxcPs+cP3NxWF+vi3lb7pa2R+vx6tHOtuN1O+a5lY3H
18 | zgM5ya/GM5N7ZjfPq7/5yS8IgAAAEAAAAgBAAAAIAAABACAAAAQAgAAAEAAAAgBAAAAIAAABACAA
19 | AIw322gDIPvtlmUAAAAASUVORK5CYII=
20 | 
21 | --vungrzvzr
22 | Content-Disposition: attachment; filename="test.png"
23 | Content-Type: image/png
24 | Content-Transfer-Encoding: base64
25 | 
26 | iVBORw0KGgoAAAANSUhEUgAAAIAAAABECAIAAADGJao+AAAAwklEQVR4Xu3UgQbDMBRA0bc03f//
27 | b7N0VuqJEmwoc+KqNEkDh9b+2HuJu1KNO4f+AQCAAAAQAAACAEAAAAgAAAEAIAAABACAAAAQAAAC
28 | AEAAAAgAAAEAIAAAANReamRLlPWYfNH0klxcPs+cP3NxWF+vi3lb7pa2R+vx6tHOtuN1O+a5lY3H
29 | zgM5ya/GM5N7ZjfPq7/5yS8IgAAAEAAAAgBAAAAIAAABACAAAAQAgAAAEAAAAgBAAAAIAAABACAA
30 | AIw322gDIPvtlmUAAAAASUVORK5CYII=
31 | --vungrzvzr--
32 | 
33 | 


--------------------------------------------------------------------------------
/test/data/multipart-complex2:
--------------------------------------------------------------------------------
 1 | From - Mon Jun 02 19:00:00 2008
 2 | Content-Type: multipart/mixed; boundary="bou"
 3 | Message-Id: <123456@example.com>
 4 | 
 5 | Part 1
 6 | --bou                       
 7 | Content-Type: multipart/related; boundary="bound"
 8 | 
 9 | Part 2
10 | --bound
11 | Content-Type: multipart/digest; boundary="boundar"
12 | 
13 | Part 3
14 | --boundar
15 | Content-Type: multipart/alternative; boundary="boundary"
16 | 
17 | Part 4
18 | --boundary
19 | Content-Type: application/octet-stream
20 | 
21 | Wow, what alternatives!
22 | 
23 | We're trying to confuse the parser here.
24 | 
25 | --bou
26 | 
27 | --bound
28 | 
29 | --boundar
30 | 
31 | --boundary
32 | Content-Type: application/pdf
33 | 
34 | A choice between a PDF and an octet stream! How marvellous!
35 | 
36 | --boundary--
37 | 
38 | --boundar
39 | Content-Type: multipart/mixed; boundary="boundary123456"
40 | 
41 | --boundary123456
42 | Content-Type: text/plain
43 | 
44 | This is the correct answer.
45 | 
46 | --boundary123456--
47 | 
48 | --boundar--
49 | 
50 | --bound
51 | Content-Type: text/plain
52 | 
53 | One last attempt at confusing the parser.
54 | 
55 | --bound--
56 | 
57 | --bou
58 | Content-Type: text/html
59 | 
60 | <html><body>No harm in making another.</body></html>
61 | 
62 | --bou--
63 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@protontech/jsmimeparser",
 3 |   "version": "3.0.2",
 4 |   "description": "An asynchronous MIME parser written in JavaScript",
 5 |   "main": "index.js",
 6 |   "types": "index.d.ts",
 7 |   "files": [
 8 |     "lib/",
 9 |     "index.d.ts",
10 |     "index.js"
11 |   ],
12 |   "scripts": {
13 |     "postversion": "git push && git push --tags",
14 |     "test": "karma start karma.conf.js",
15 |     "lint": "eslint lib test --quiet"
16 |   },
17 |   "repository": {
18 |     "type": "git",
19 |     "url": "git+https://github.com/ProtonMail/jsmimeparser.git"
20 |   },
21 |   "author": "ProtonMail",
22 |   "license": "MIT",
23 |   "bugs": {
24 |     "url": "https://github.com/ProtonMail/jsmimeparser/issues"
25 |   },
26 |   "devDependencies": {
27 |     "@types/chai": "^4.3.20",
28 |     "@types/mocha": "^10.0.10",
29 |     "chai": "^4.5.0",
30 |     "eslint": "^8.57.1",
31 |     "eslint-config-airbnb-base": "^15.0.0",
32 |     "eslint-plugin-import": "^2.32.0",
33 |     "karma": "^6.4.4",
34 |     "karma-chrome-launcher": "^3.2.0",
35 |     "karma-firefox-launcher": "^2.1.3",
36 |     "karma-mocha": "^2.0.1",
37 |     "karma-mocha-reporter": "^2.2.5",
38 |     "karma-webkit-launcher": "^1.3.1",
39 |     "karma-webpack": "^5.0.1",
40 |     "mocha": "^10.8.2",
41 |     "playwright": "^1.54.2",
42 |     "webpack": "^5.101.0"
43 |   }
44 | }
45 | 


--------------------------------------------------------------------------------
/test/test_custom_headers.js:
--------------------------------------------------------------------------------
 1 | 
 2 | import { assert } from "chai";
 3 | import { headeremitter, headerparser } from "../lib/jsmime";
 4 | 
 5 | describe("Custom decoder support", function() {
 6 |   function customDecoder(values) {
 7 |     let value = values.join("");
 8 |     return atob(value);
 9 |   }
10 |   function customEncoder(value) {
11 |     this.addText(btoa(value), true);
12 |   }
13 |   it("addStructuredEncoder", function() {
14 |     assert.equal(
15 |       "X-Base64: String\r\n",
16 |       headeremitter.emitStructuredHeader("X-Base64", "String", {})
17 |     );
18 |     headeremitter.addStructuredEncoder("X-Base64", customEncoder);
19 |     assert.equal(
20 |       "X-Base64: U3RyaW5n\r\n",
21 |       headeremitter.emitStructuredHeader("X-Base64", "String", {})
22 |     );
23 |     assert.equal(
24 |       "X-Base64: U3RyaW5n\r\n",
25 |       headeremitter.emitStructuredHeader("x-bASe64", "String", {})
26 |     );
27 |   });
28 |   it("addStructuredDecoder", function() {
29 |     assert.throws(function() {
30 |       headerparser.parseStructuredHeader("X-Base64", "U3RyaW5n");
31 |     }, /Unknown structured header/);
32 |     headerparser.addStructuredDecoder("X-Base64", customDecoder);
33 |     assert.equal(
34 |       "String",
35 |       headerparser.parseStructuredHeader("X-Base64", "U3RyaW5n")
36 |     );
37 |     assert.throws(function() {
38 |       headerparser.addStructuredDecoder("To", customDecoder);
39 |     }, /Cannot override header/);
40 |   });
41 | });
42 | 


--------------------------------------------------------------------------------
/test/data/multipartmalt-detach:
--------------------------------------------------------------------------------
 1 | From 
 2 | X-Account-Key: account1
 3 | X-UIDL: 0397aedc0eee392343488772c79f110d
 4 | X-Mozilla-Status: 0001
 5 | X-Mozilla-Status2: 10000000
 6 | X-Mozilla-Keys:                                                                                 
 7 | Return-Path: <ef@hg.de>
 8 | X-Flags: 0000
 9 | Date: Tue, 29 Aug 2006 16:42:08 GMT
10 | From: abc <ef@hg.de>
11 | To: abc <ef@hg.de>
12 | Subject: detach test
13 | Message-ID: <xxxyy@zzz>
14 | MIME-Version: 1.0
15 | Content-Type: multipart/alternative; boundary="gmxboundary=-1156956072-29266-top"
16 | 
17 | --gmxboundary=-1156956072-29266-top
18 | Content-Type: text/plain; charset="iso-8859-1"
19 | 
20 | plain body
21 | --gmxboundary=-1156956072-29266-top
22 | Content-Type: multipart/related; boundary="gmxboundary=-1156956072-29266-sub"
23 | 
24 | --gmxboundary=-1156956072-29266-sub
25 | Content-Type: text/html; charset="iso-8859-1"
26 | 
27 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.=
28 | w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
29 | <html xmlns=3D"http://www.w3.org/1999/xhtml" xml:lang=3D"de" lang=3D"de">
30 | <head>
31 | <title>Update</title>
32 | <meta http-equiv=3D"Content-Type" content=3D"text/html; charset=3Diso-8859-1" />
33 | </head>
34 | 
35 | <body> body hello
36 | </body>
37 | </html>
38 | --gmxboundary=-1156956072-29266-sub
39 | Content-Type: text/plain
40 | Content-Disposition: inline; filename="head_update.txt"
41 | 
42 | head_update.txt
43 | --gmxboundary=-1156956072-29266-sub
44 | Content-Type: text/plain
45 | Content-Disposition: inline; filename="smurf_update_neu.txt"
46 | 
47 | smurf_update_neu.txt
48 | --gmxboundary=-1156956072-29266-sub--
49 | --gmxboundary=-1156956072-29266-top
50 | Content-Type: text/plain
51 | Content-Disposition: attachment; filename="head_update.txt"
52 | 
53 | headUpdate.text
54 | --gmxboundary=-1156956072-29266-top--
55 | 


--------------------------------------------------------------------------------
/.eslintrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "extends": [
 3 |         // "airbnb-base"
 4 |         "eslint:recommended"
 5 |     ],
 6 | 
 7 |     "parserOptions": {
 8 |         "ecmaVersion": 9,
 9 |         "sourceType": "module"
10 |     },
11 | 
12 |     "globals": {
13 |         "window": "readonly",
14 |         "btoa": "readonly",
15 |         "atob": "readonly"
16 |     },
17 |     "env": {
18 |         "es6": true,
19 |         "browser": true,
20 |         "mocha": true
21 |     },
22 |     "plugins": [
23 |         "import"
24 |     ],
25 |     "rules": {
26 |         "no-unused-vars": ["error", {"args": "none"}],
27 |         "prefer-spread": "off",
28 |         "no-restricted-syntax": "off",
29 |         "consistent-return": "off",
30 |         "object-curly-newline": "off",
31 |         "prefer-template": "off",
32 |         "no-plusplus": "off",
33 |         "no-continue": "off",
34 |         "no-bitwise": "off",
35 |         "no-await-in-loop": "off",
36 |         "no-sequences": "warn",
37 |         "no-param-reassign": "warn",
38 |         "no-return-assign": "warn",
39 |         "no-else-return": ["error", { "allowElseIf": true }],
40 |         "no-shadow": "off",
41 |         "no-undef": "error",
42 |         "arrow-body-style": "off",
43 |         "space-before-function-paren": "off",
44 |         "operator-linebreak": "off",
45 |         "implicit-arrow-linebreak": "off",
46 |         "no-underscore-dangle": "off",
47 |         "import/no-unresolved": ["error", {
48 |             "ignore": ["^react$", "ttag", ".data"]
49 |         }],
50 |         "import/prefer-default-export": "off",
51 |         "import/no-extraneous-dependencies": "off",
52 |         "import/no-unassigned-import": "error",
53 |         "import/named": "error",
54 |         "import/extensions": "error",
55 |         "max-len": ["error", {
56 |             "ignoreComments": true,
57 |             "code": 120,
58 |             "ignoreStrings": true,
59 |             "ignoreTemplateLiterals": true,
60 |             "ignoreRegExpLiterals": true
61 |         }],
62 |         "no-multiple-empty-lines": ["error"],
63 |         "no-trailing-spaces": ["error"],
64 |         "eol-last": ["error"],
65 |         "padded-blocks": "off",
66 |         "max-classes-per-file": "off",
67 |         "no-empty": "off"
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/test/data/bug505221:
--------------------------------------------------------------------------------
 1 | From - Mon Jan 1 00:00:00 1965
 2 | X-Mozilla-Status: 0001
 3 | X-Mozilla-Status2: 10000000
 4 | From: <aaa bbb>
 5 | To: <aaa@bb.invalid>
 6 | Subject: xxx
 7 | Date: Tue, 9 Dec 2008 16:49:02 +0200
 8 | MIME-Version: 1.0
 9 | Content-Type: multipart/mixed;
10 | 	boundary="----=_NextPart_000_36B5_01C9DB8C.9514C300"
11 | X-Priority: 3
12 | X-MSMail-Priority: Normal
13 | X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2900.2180
14 | 
15 | This is a multi-part message in MIME format.
16 | 
17 | ------=_NextPart_000_36B5_01C9DB8C.9514C300
18 | Content-Type: text/html;
19 | 	charset="iso-8859-1"
20 | Content-Transfer-Encoding: quoted-printable
21 | 
22 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
23 | <HTML><HEAD>
24 | <META HTTP-EQUIV=3D"Content-Type" CONTENT=3D"text/html; =
25 | charset=3Dus-ascii">
26 | 
27 | 
28 | <META content=3D"MSHTML 6.00.6000.16735" name=3DGENERATOR></HEAD>
29 | <BODY> bbb
30 | </BODY></HTML>
31 | ------=_NextPart_000_36B5_01C9DB8C.9514C300
32 | Content-Type: message/rfc822
33 | Content-Transfer-Encoding: 7bit
34 | Content-Disposition: attachment
35 | 
36 | From: <sys admin>
37 | To: <dd@ee.invalid>
38 | Subject: yyy
39 | Date: Sun, 7 Dec 2008 17:53:47 +0200
40 | MIME-Version: 1.0
41 | Content-Type: message/rfc822
42 | Content-Transfer-Encoding: 7bit
43 | Content-Disposition: attachment
44 | X-Priority: 3
45 | X-MSMail-Priority: Normal
46 | X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2900.2180
47 | 
48 | From: <aa@b>
49 | To: <aa@b>
50 | Subject: ccc
51 | Date: Sat, 23 May 2009 09:55:19 +0200
52 | MIME-Version: 1.0
53 | Content-Type: text/html;
54 | 	charset="iso-8859-1"
55 | Content-Transfer-Encoding: quoted-printable
56 | X-Priority: 3
57 | X-MSMail-Priority: Normal
58 | X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2900.2180
59 | 
60 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
61 | <HTML>
62 | <HEAD>
63 | <META HTTP-EQUIV=3D"Content-Type" CONTENT=3D"text/html; =
64 | charset=3Diso-8859-1">
65 | <META NAME=3D"Generator" CONTENT=3D"MS Exchange Server version =
66 | 08.00.0681.000">
67 | <TITLE>ccc</TITLE>
68 | </HEAD>
69 | <BODY>
70 | <!-- Converted from text/plain format -->
71 | 
72 | </BODY>
73 | </HTML>
74 | ------=_NextPart_000_36B5_01C9DB8C.9514C300--
75 | 
76 | 


--------------------------------------------------------------------------------
/lib/textDecoders.js:
--------------------------------------------------------------------------------
 1 | import { uint8ArrayToString, base64ToUint8Array } from './utils';
 2 | 
 3 | class UTF7TextDecoder {
 4 |   constructor() {
 5 |     this.collectInput = '';
 6 |     this.decodeString = decodeUtf7;
 7 |   }
 8 |   decode(input, options = {}) {
 9 |     let more = options.stream;
10 |     // There are cases where this is called without input, to flush the collected input
11 |     if (input) {
12 |       this.collectInput += uint8ArrayToString(input);
13 |     }
14 |     if (more) {
15 |       return "";
16 |     }
17 |     return this.decodeString(this.collectInput);
18 |   }
19 | }
20 | 
21 | class UTF7ImapTextDecoder extends UTF7TextDecoder {
22 |   constructor() {
23 |     super();
24 |     this.decodeString = decodeUtf7Imap;
25 |   }
26 | }
27 | 
28 | export function MimeTextDecoder(charset, options) {
29 |   switch (charset.toLowerCase()) {
30 |     case "utf-7":
31 |       return new UTF7TextDecoder();
32 |     case "utf-7-imap":
33 |       return new UTF7ImapTextDecoder();
34 |     case "cp932":
35 |       // https://bugzilla.mozilla.org/show_bug.cgi?id=1511950
36 |       return new TextDecoder("shift_jis", options);
37 |     default:
38 |       return new TextDecoder(charset, options);
39 |   }
40 | }
41 | 
42 | // UTF7 helpers
43 | 
44 | /**
45 |  * Decode UTF7 string to unicode
46 |  * @see {@link https://github.com/emailjs/emailjs-utf7} for original implementation
47 |  */
48 |  function decodeFromUTF7 (str) {
49 |   const octets = base64ToUint8Array(str)
50 |   let output = ''
51 | 
52 |   // In modified UTF-7, all characters are represented by their two byte Unicode ID.
53 |   for (let i = 0, len = octets.length; i < len;) {
54 |     output += String.fromCharCode(octets[i++] << 8 | octets[i++])
55 |   }
56 |   return output
57 | }
58 | 
59 | /**
60 |  * Decodes UTF-7 string, see RFC 2152
61 |  * @see {@link https://github.com/emailjs/emailjs-utf7} for original implementation
62 |  * @param {String} str String to decode
63 |  */
64 | export const decodeUtf7 = str =>
65 |   str.replace(/\+([A-Za-z0-9/]*)-?/gi, (_, chunk) => chunk === '' ? '+' : decodeFromUTF7(chunk))
66 | 
67 | /**
68 |  * Decodes UTF-7 string, see RFC 3501
69 |  * @see {@link https://github.com/emailjs/emailjs-utf7} for original implementation
70 |  * @param {String} str String to decode
71 |  */
72 | export const decodeUtf7Imap = str =>
73 |   str.replace(/&([^-]*)-/g, (_, chunk) => (chunk === '') ? '&' : decodeFromUTF7(chunk.replace(/,/g, '/')))
74 | 


--------------------------------------------------------------------------------
/test/data/bugmail11:
--------------------------------------------------------------------------------
 1 | From - Mon Jun 02 19:00:00 2008
 2 | X-Mozilla-Status: 0001
 3 | X-Mozilla-Status2: 00000000
 4 | X-Mozilla-Keys:                                                                                 
 5 | Return-path: <example@example.com>
 6 | Delivered-To: bugmail@example.org
 7 | Received: by 10.114.166.12 with SMTP id o12cs163262wae;
 8 |         Fri, 11 Apr 2008 07:17:31 -0700 (PDT)
 9 | Received: by 10.115.60.1 with SMTP id n1mr214763wak.181.1207923450166;
10 |         Fri, 11 Apr 2008 07:17:30 -0700 (PDT)
11 | Return-Path: <bugzilla-daemon@mozilla.org>
12 | Received: from webapp-out.mozilla.org (webapp01.sj.mozilla.com [63.245.208.146])
13 |         by mx.google.com with ESMTP id n38si6807242wag.2.2008.04.11.07.17.29;
14 |         Fri, 11 Apr 2008 07:17:30 -0700 (PDT)
15 | Received-SPF: neutral (google.com: 63.245.208.146 is neither permitted nor denied by best guess record for domain of bugzilla-daemon@mozilla.org) client-ip=63.245.208.146;
16 | Authentication-Results: mx.google.com; spf=neutral (google.com: 63.245.208.146 is neither permitted nor denied by best guess record for domain of bugzilla-daemon@mozilla.org) smtp.mail=bugzilla-daemon@mozilla.org
17 | Received: from mrapp51.mozilla.org (mrapp51.mozilla.org [127.0.0.1])
18 | 	by webapp-out.mozilla.org (8.13.8/8.13.8) with ESMTP id m3BEHTGU030132
19 | 	for <bugmail@example.org>; Fri, 11 Apr 2008 07:17:29 -0700
20 | Received: (from root@localhost)
21 | 	by mrapp51.mozilla.org (8.13.8/8.13.8/Submit) id m3BEHTk4030129;
22 | 	Fri, 11 Apr 2008 07:17:29 -0700
23 | Date: Fri, 11 Apr 2008 07:17:29 -0700
24 | Message-Id: <200804111417.m3BEHTk4030129@mrapp51.mozilla.org>
25 | From: bugzilla-daemon@mozilla.org
26 | To: bugmail@example.org
27 | Subject: Bugzilla: confirm account creation
28 | X-Bugzilla-Type: admin
29 | Content-Type: text/plain; charset="UTF-8"
30 | MIME-Version: 1.0
31 | 
32 | Bugzilla has received a request to create a user account
33 | using your email address (example@example.org).
34 | 
35 | To confirm that you want to create an account using that email address,
36 | visit the following link:
37 | 
38 | https://bugzilla.mozilla.org/token.cgi?t=xxxxxxxxxx&a=request_new_account
39 | 
40 | If you are not the person who made this request, or you wish to cancel
41 | this request, visit the following link:
42 | 
43 | https://bugzilla.mozilla.org/token.cgi?t=xxxxxxxxxx&a=cancel_new_account
44 | 
45 | If you do nothing, the request will lapse after 3 days
46 | (on April 14th, 2008 at 07:17 PDT).
47 | 
48 | 


--------------------------------------------------------------------------------
/karma.conf.js:
--------------------------------------------------------------------------------
  1 | /* global require, Buffer, process, module */
  2 | 
  3 | const fs = require('fs');
  4 | const { firefox, chromium, webkit } = require('playwright');
  5 | process.env.CHROME_BIN = chromium.executablePath();
  6 | process.env.FIREFOX_BIN = firefox.executablePath();
  7 | process.env.WEBKIT_HEADLESS_BIN = webkit.executablePath();
  8 | 
  9 | // karma does not recognise the file as binary and automatically converts it to utf8 to apply preprocessors.
 10 | // Using a middleware to load the file prevents the transformation and preserves the charset information.
 11 | function charsets_middleware() {
 12 |   return function (request, response, next) {
 13 |     const match = request.url.match(/[/\w+]*charsets/);
 14 |     if (match && request.method === 'GET') {
 15 |       const path = match[0].replace(/\/base/, './');
 16 |       // eslint-disable-next-line no-undef
 17 |       const data = Buffer.from(fs.readFileSync(path));
 18 |       response.setHeader('Content-Type', 'application/octet-stream');
 19 |       response.setHeader('Content-Length', data.length);
 20 |       response.writeHead(200);
 21 |       return response.end(data);
 22 |     }
 23 |     next();
 24 |   }
 25 | }
 26 | 
 27 | module.exports = function(config) {
 28 |   config.set({
 29 |     // base path that will be used to resolve all patterns (eg. files, exclude)
 30 |     basePath: '',
 31 | 
 32 |     // frameworks to use
 33 |     // available frameworks: https://www.npmjs.com/search?q=keywords:karma-adapter
 34 |     frameworks: ['mocha', 'webpack'],
 35 | 
 36 |     plugins: [
 37 |       'karma-mocha',
 38 |       'karma-webpack',
 39 |       'karma-mocha-reporter',
 40 |       'karma-chrome-launcher',
 41 |       'karma-firefox-launcher',
 42 |       'karma-webkit-launcher',
 43 |       {'middleware:charsets': ['factory', charsets_middleware]}
 44 |     ],
 45 | 
 46 |     // list of files / patterns to load in the browser
 47 |     files: [
 48 |       { pattern: 'test/test*', watched: false },
 49 |       {pattern: 'test/data/**', watched: false, included: false, served: true},
 50 |     ],
 51 | 
 52 |     beforeMiddleware: ['charsets'],
 53 | 
 54 |     // list of files / patterns to exclude
 55 |     exclude: [],
 56 | 
 57 |     // preprocess matching files before serving them to the browser
 58 |     // available preprocessors: https://www.npmjs.com/search?q=keywords:karma-preprocessor
 59 |     preprocessors: {
 60 |         'test/test*': 'webpack'
 61 |     },
 62 | 
 63 |     webpack: {
 64 |       resolve: {
 65 |         extensions: ['', '.js']
 66 |       }
 67 |     },
 68 | 
 69 |     // available reporters: https://www.npmjs.com/search?q=keywords:karma-reporter
 70 |     reporters: ['mocha'],
 71 | 
 72 |     // web server port
 73 |     port: 9876,
 74 | 
 75 |     // enable / disable colors in the output (reporters and logs)
 76 |     colors: true,
 77 | 
 78 |     // level of logging
 79 |     // possible values: config.LOG_DISABLE || config.LOG_ERROR || config.LOG_WARN || config.LOG_INFO || config.LOG_DEBUG
 80 |     logLevel: config.LOG_INFO,
 81 | 
 82 |     // enable / disable watching file and executing tests whenever any file changes
 83 |     autoWatch: false,
 84 | 
 85 |         customLaunchers: {
 86 |             ChromeHeadlessCI: {
 87 |                 base: 'ChromeHeadless',
 88 |                 flags: ['--no-sandbox']
 89 |             }
 90 |         },
 91 |         browsers: ['ChromeHeadlessCI', 'FirefoxHeadless', 'WebkitHeadless'],
 92 | 
 93 |     // Continuous Integration mode
 94 |     // if true, Karma captures browsers, runs the tests and exits
 95 |     singleRun: true,
 96 | 
 97 |     // Concurrency level
 98 |     // how many browser instances should be started simultaneously
 99 |     concurrency: Infinity
100 |   });
101 | };
102 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This is a fork of [mozilla-comm/jsmime](https://github.com/mozilla-comm/jsmime) that has been updated to include the changes made in [mozilla/releases-comm-central](https://github.com/mozilla/releases-comm-central/tree/master/mailnews/mime/jsmime) (incl. UTF-7 support).
 2 | Further, the library now uses ES6 modules and exposes a user-friendly `parseMail` function.
 3 | 
 4 | ## Code Layout
 5 | 
 6 | JSMime is a MIME parsing and composition library that is written completely in
 7 | JavaScript using ES6 functionality and WebAPIs (where such APIs exist). There
 8 | are a few features for which a standardized WebAPI does not exist; for these,
 9 | external JavaScript libraries are used.
10 | 
11 | The MIME parser consists of three logical phases of translation:
12 | 
13 | 1. Build the MIME (and pseudo-MIME) tree.
14 | 2. Convert the MIME tree into a list of body parts and attachments.
15 | 3. Use the result to drive a displayed version of the message.
16 | 
17 | The first stage is located in `rawMimeParser.js`, the second in `mailParser.js` (in particular, the `parseMail` function). The latter stage is left to the applications.
18 | 
19 | ## Install
20 | 
21 | ```js
22 | npm i @protontech/jsmimeparser
23 | ```
24 | 
25 | ## Usage
26 | 
27 | The `parseMail` function is designed to be user-friendly but remains bare-bones in the sense that it does not add metadata or information that is not found in the original message (e.g. no automatic contentID or checksum generation for the attachments, unlike [Nodemailer's MailParser](https://github.com/nodemailer/mailparser)).
28 | 
29 | ```js
30 | import { parseMail } from '@protontech/jsmimeparser';
31 | 
32 | const eml = `Message-Id: <200308210240.h7L2e5A0016623@sphinx.got.net>
33 | Received: from source ([69.9.251.177]) by exprod5mx37.postini.com ...
34 | From: "Bob Example" <bob@internet.com>
35 | To: "Alice Example" <alice@internet.com>
36 | Date: Wed, 20 Aug 2003 16:02:43 -0500
37 | Subject: Test message
38 | MIME-Version: 1.0
39 | Content-Type: multipart/mixed;
40 |         boundary="XXXXboundary text"
41 | 
42 | This is a multipart message in MIME format.
43 | 
44 | --XXXXboundary text
45 | Content-Type: text/plain
46 | 
47 | Hello Alice.
48 | This is a test message with 5 lines in the message body
49 | and an attachment.
50 | Your friend,
51 | Bob
52 | --XXXXboundary text
53 | Content-Type: image/gif
54 | Content-Transfer-Encoding: Base64
55 | Content-Disposition: attachment; filename=smile.gif
56 | 
57 | R0lGODlhyADIAMIAAP...+lmxwBLZ7FjJNkKsbcbyuGq0vKpH7bO50klqJ7YSmCYn4Yrrn4+elGsurYeoKy67e/ZqrrfogivvvONu4i6B8CJ6L77nguKigD0O7FK+mhhskoZIEhzwJwpjxLCFUy7co8ANH1xwxhY/LIpdIB/qmr6Hhvztfih+XPLKJ6c4HsYtK2ByvShb9UQCADs=
58 | 
59 | --XXXXboundary text--`
60 | 
61 | const {
62 |   attachments, // [{ contentType: 'image/gif', fileName: 'smile.gif', content: Uint8Array[71, 73, 70..], ... }]
63 |   body, // { text: 'Hello Alice.\nThis is..', html: '' }
64 |   subject, // 'Test message'
65 |   from, // // { name: 'Bob Example', email: 'bob@internet.com' }
66 |   to, // [{ name: 'Alice Example', email: 'alice@internet.com' }]
67 |   date, // Date('Wed, 20 Aug 2003 16:02:43 -0500')
68 |   ...rest // headers and more
69 | } = parseMail(eml);
70 | ```
71 | 
72 | See `test/test_mail_parser.ts` for other examples with different MIME messages. Type information can be found in `index.d.ts`.
73 | 
74 | Aside from `parseMail`, several lower-level functions are exported by `lib/jsmime` and `lib/mailParser` (mostly unchanged from the original jsmime & mozilla repos).
75 | 
76 | ## Testing
77 | Headless Chrome (or Chromium), Firefox and Webkit are used for the tests.
78 | To install any missing browsers automatically, you can run `npx playwright install-deps <chromium|firefox|webkit>`. Alternatively, you can install them manually as you normally would on your platform.
79 | If you'd like to test on a subset of browsers, use e.g. `npm test -- --browsers ChromeHeadless,FirefoxHeadless`.
80 | 


--------------------------------------------------------------------------------
/test/test_structured_header_emitters.js:
--------------------------------------------------------------------------------
  1 | 
  2 | import { assert } from 'chai';
  3 | import { headeremitter } from '../lib/jsmime';
  4 | import { MockDate } from "./utils";
  5 | 
  6 | function arrayTest(data, fn) {
  7 |   fn.toString = function() {
  8 |     let text = Function.prototype.toString.call(this);
  9 |     text = text.replace(/data\[([0-9]*)\]/g, function(m, p) {
 10 |       return JSON.stringify(data[p]);
 11 |     });
 12 |     return text;
 13 |   };
 14 |   return it(JSON.stringify(data[0]), fn);
 15 | }
 16 | 
 17 | function testHeader(header, tests) {
 18 |   describe(header, function() {
 19 |     tests.forEach(function(data) {
 20 |       arrayTest(data, function() {
 21 |         assert.deepEqual(
 22 |           headeremitter.emitStructuredHeader(header, data[0], {
 23 |             softMargin: 100,
 24 |             useASCII: true,
 25 |           }),
 26 |           (header + ": " + data[1]).trim() + "\r\n"
 27 |         );
 28 |       });
 29 |     });
 30 |   });
 31 | }
 32 | 
 33 | describe("Structured header emitters", function() {
 34 |   // Ad-hoc header tests
 35 |   // TODO: add structured encoder tests for Content-Type when it is added.
 36 | 
 37 |   testHeader("Content-Transfer-Encoding", [
 38 |     ["", ""],
 39 |     ["8bit", "8bit"],
 40 |     ["invalid", "invalid"],
 41 |   ]);
 42 | 
 43 |   // Non-ad-hoc header tests
 44 |   let addressing_headers = [
 45 |     "From",
 46 |     "To",
 47 |     "Cc",
 48 |     "Bcc",
 49 |     "Sender",
 50 |     "Reply-To",
 51 |     "Resent-Bcc",
 52 |     "Resent-To",
 53 |     "Resent-From",
 54 |     "Resent-Cc",
 55 |     "Resent-Sender",
 56 |     "Approved",
 57 |     "Disposition-Notification-To",
 58 |     "Delivered-To",
 59 |     "Return-Receipt-To",
 60 |     "Resent-Reply-To",
 61 |     "Mail-Reply-To",
 62 |     "Mail-Followup-To",
 63 |   ];
 64 |   let address_tests = [
 65 |     [{ name: "", email: "" }, ""],
 66 |     [
 67 |       { name: "John Doe", email: "john.doe@test.invalid" },
 68 |       "John Doe <john.doe@test.invalid>",
 69 |     ],
 70 |     [
 71 |       [{ name: "John Doe", email: "john.doe@test.invalid" }],
 72 |       "John Doe <john.doe@test.invalid>",
 73 |     ],
 74 |     [
 75 |       { name: "undisclosed-recipients", group: [] },
 76 |       "undisclosed-recipients: ;",
 77 |     ],
 78 |   ];
 79 |   addressing_headers.forEach(function(header) {
 80 |     testHeader(header, address_tests);
 81 |   });
 82 | 
 83 |   let date_headers = [
 84 |     "Date",
 85 |     "Expires",
 86 |     "Injection-Date",
 87 |     "NNTP-Posting-Date",
 88 |     "Resent-Date",
 89 |   ];
 90 |   let date_tests = [
 91 |     [
 92 |       new MockDate("2012-09-06T08:08:21-0700"),
 93 |       "Thu, 6 Sep 2012 08:08:21 -0700",
 94 |     ],
 95 |   ];
 96 |   date_headers.forEach(function(header) {
 97 |     testHeader(header, date_tests);
 98 |   });
 99 | 
100 |   let unstructured_headers = [
101 |     "Comments",
102 |     "Content-Description",
103 |     "Keywords",
104 |     "Subject",
105 |   ];
106 |   let unstructured_tests = [
107 |     ["", ""],
108 |     ["This is a subject", "This is a subject"],
109 |     [
110 |       "\u79c1\u306f\u4ef6\u540d\u5348\u524d",
111 |       "=?UTF-8?B?56eB44Gv5Lu25ZCN5Y2I5YmN?=",
112 |     ],
113 |   ];
114 |   unstructured_headers.forEach(function(header) {
115 |     testHeader(header, unstructured_tests);
116 |   });
117 | 
118 |   it("emitStructuredHeaders", function() {
119 |     let headers = new Map();
120 |     headers.set("From", [{ name: "", email: "bugzilla-daemon@mozilla.org" }]);
121 |     headers.set("subject", ["[Bug 939557] browsercomps.dll failed to build"]);
122 |     headers.set("x-capitalization-test", ["should capitalize"]);
123 |     let str = headeremitter.emitStructuredHeaders(headers, {});
124 |     assert.equal(
125 |       str,
126 |       "From: bugzilla-daemon@mozilla.org\r\n" +
127 |         "Subject: [Bug 939557] browsercomps.dll failed to build\r\n" +
128 |         "X-Capitalization-Test: should capitalize\r\n"
129 |     );
130 |   });
131 | });
132 | 


--------------------------------------------------------------------------------
/docs/RelatedSpecifications.mkd:
--------------------------------------------------------------------------------
 1 | Related specifications and MIME extensions
 2 | ==========================================
 3 | 
 4 | Properly generating or parsing email and news messages requires referring to a
 5 | diverse set of specifications. All of the specifications used to develop jsmime
 6 | and to which frequent reference is made in the code are provided here as a list.
 7 | Although many specifications have multiple versions, only the newest version of
 8 | these specifications are linked to. As jsmime is still in development, some
 9 | specifications are not yet consulted for implementation. These are marked with
10 | XXX markers.
11 | 
12 | Basic format of bodies:
13 | 
14 | * [RFC 2045](http://tools.ietf.org/html/rfc2045):
15 |   MIME Part 1, Format of Internet Message Bodies
16 | * [RFC 2046](http://tools.ietf.org/html/rfc2046):
17 |   MIME Part 2, Media Types
18 | 
19 | Structured header interpretation:
20 | 
21 | * [RFC 2047](http://tools.ietf.org/html/rfc2047):
22 |   MIME Part 3, Message Header Extensions for Non-ASCII Text
23 | * [RFC 2231](http://tools.ietf.org/html/rfc2231):
24 |   MIME Parameter Value and Encoded Word Extensions
25 | * [RFC 5322](http://tools.ietf.org/html/rfc5322):
26 |   Internet Message Format
27 | * [RFC 5536](http://tools.ietf.org/html/rfc5536):
28 |   Netnews Article Format
29 | * [RFC 6532](http://tools.ietf.org/html/rfc6532):
30 |   Internationalized Email Headers
31 | 
32 | Body decoding:
33 | * XXX [Uuencode](http://pubs.opengroup.org/onlinepubs/7908799/xcu/uuencode.html)
34 | * XXX [yEnc](http://www.yenc.org/yenc-draft.1.3.txt)
35 | * XXX [TNEF](http://msdn.microsoft.com/en-us/library/cc425498%28v=exchg.80%29.aspx)
36 | * XXX [RFC 3156](http://tools.ietf.org/html/rfc3156)
37 |   MIME Security with PGP
38 | * XXX [RFC 4880](http://tools.ietf.org/html/rfc4880)
39 |   OpenPGP Message Format
40 | * XXX [RFC 5751](http://tools.ietf.org/html/rfc5751)
41 |   S/MIME Version 3.2 Message Format Specification
42 | 
43 | Other:
44 | * XXX [RFC 2387](http://tools.ietf.org/html/rfc2387)
45 |   The MIME Multipart/Related Content-type
46 | * XXX [RFC 2392](http://tools.ietf.org/html/rfc2392)
47 |   Content-ID and Message-ID Uniform Resource Locators
48 | * XXX [RFC 2557](http://tools.ietf.org/html/rfc2557)
49 |   MIME Encapsulation of Aggregate Documents, such as HTML (MHTML)
50 | * [RFC 3501](http://tools.ietf.org/html/rfc3501)
51 |   IMAP Version 4rev1 (specifically the part numbering section)
52 | * XXX [RFC 3676](http://tools.ietf.org/html/rfc3676)
53 |   The Text/Plain Format and DelSp Parameters
54 | * XXX [RFC 3798](http://tools.ietf.org/html/rfc3978)
55 |   Message Disposition Notification
56 | 
57 | 
58 | Willful violations
59 | ------------------
60 | 
61 | An unfortunate consequence of historical developments for email is that the set
62 | of specifications for handling email is often an insufficient guide to actually
63 | working with email in practice. For further aid to prospective implementers, the
64 | following list is a list of all deliberate deviations from specification
65 | requirements.
66 | 
67 | * All three line conventions are treated as a CRLF (`\r`, `\n`, `\r\n`). In this
68 |   parser, it is possible to use a mixture of line endings in the same file,
69 |   although this is highly unlikely to come up in practice.
70 | * MIME and message headers need not be either ASCII or UTF-8, as use of other
71 |   character sets without proper wrapping is very common in practice. For more
72 |   precise rules on how non-ASCII headers are interpreted, read the comments in
73 |   the source code.
74 | * CFWS is permitted in fewer places than the specifications require. This was
75 |   done to match other parsers (including the one this replaced, among others).
76 |   In particular, the Content-Type parameter needs to be a single run of text, so
77 |   `multipart / mixed` would be treated as an invalid type.
78 | * If the first line of a headers block starts with the Berkeley mailbox
79 |   delimiter (`From` followed by a space character), it is ignored.
80 | * A `message/rfc822`-like part may be encoded in quoted-printable or base64,
81 |   while RFC 6532 only permits this for `message/global`.
82 | * Decoding the `%hh` specifiers (per RFC 2231) happens for any parameter whose
83 |   name ends with a `'*'`, regardless of whether or not it is enclosed in a
84 |   string or not.
85 | * RFC 2047 encoded-words may contain embedded spaces.
86 | * RFC 2047 decoding tolerates multibyte characters being split between adjacent
87 |   encoded-words.
88 | * A quoted string whose contents is a full RFC 2047 encoded-word is decoded.
89 | 


--------------------------------------------------------------------------------
/lib/utils.js:
--------------------------------------------------------------------------------
  1 | 
  2 | /**
  3 |  * Decode a quoted-printable buffer into a binary string.
  4 |  *
  5 |  * @param buffer {BinaryString} The string to decode.
  6 |  * @returns {Array(BinaryString, BinaryString)} The first element of the array
  7 |  *          is the decoded string. The second element is always the empty
  8 |  *          string.
  9 |  */
 10 | export  function decode_qp(buffer) {
 11 |   // Unlike base64, quoted-printable isn't stateful across multiple lines, so
 12 |   // there is no need to buffer input, so we can always ignore more.
 13 |   let decoded = buffer.replace(
 14 |     // Replace either =<hex><hex> or =<wsp>CRLF
 15 |     /=([0-9A-F][0-9A-F]|[ \t]*(\r\n|[\r\n]|$))/gi,
 16 |     function(match, param) {
 17 |       // If trailing text matches [ \t]*CRLF, drop everything, since it's a
 18 |       // soft line break.
 19 |       if (param.trim().length == 0) {
 20 |         return "";
 21 |       }
 22 |       return String.fromCharCode(parseInt(param, 16));
 23 |     }
 24 |   );
 25 |   return [decoded, ""];
 26 | }
 27 | 
 28 | /**
 29 |  * Decode a base64 buffer into a binary string. Unlike window.atob, the buffer
 30 |  * may contain non-base64 characters that will be ignored.
 31 |  *
 32 |  * @param buffer {BinaryString} The string to decode.
 33 |  * @param more   {Boolean}      If true, we expect that this function could be
 34 |  *                              called again and should retain extra data. If
 35 |  *                              false, we should flush all pending output.
 36 |  * @returns {Array(BinaryString, BinaryString)} The first element of the array
 37 |  *          is the decoded string. The second element contains the data that
 38 |  *          could not be decoded and needs to be retained for the next call.
 39 |  */
 40 | export function decode_base64(buffer, more) {
 41 |   // Drop all non-base64 characters
 42 |   let sanitize = buffer.replace(/[^A-Za-z0-9+/=]/g, "");
 43 |   // Remove harmful `=' chars in the middle.
 44 |   sanitize = sanitize.replace(/=+([A-Za-z0-9+/])/g, "$1");
 45 |   // We need to encode in groups of 4 chars. If we don't have enough, leave the
 46 |   // excess for later. If there aren't any more, drop enough to make it 4.
 47 |   let excess = sanitize.length % 4;
 48 |   if (excess != 0 && more) {
 49 |     buffer = sanitize.slice(-excess);
 50 |   } else {
 51 |     buffer = "";
 52 |   }
 53 |   sanitize = sanitize.substring(0, sanitize.length - excess);
 54 |   // Delete all unnecessary '====' in padding.
 55 |   sanitize = sanitize.replace(/(====)+$/g, "");
 56 |   // Use the atob function we (ought to) have in global scope.
 57 |   return [atob(sanitize), buffer];
 58 | }
 59 | 
 60 | /**
 61 |  * Converts a binary string into a Uint8Array buffer.
 62 |  *
 63 |  * @param buffer {BinaryString} The string to convert.
 64 |  * @returns {Uint8Array} The converted data.
 65 |  */
 66 | export function stringToUint8Array(buffer) {
 67 |   var typedarray = new Uint8Array(buffer.length);
 68 |   for (var i = 0; i < buffer.length; i++) {
 69 |     typedarray[i] = buffer.charCodeAt(i);
 70 |   }
 71 |   return typedarray;
 72 | }
 73 | 
 74 | /**
 75 |  * Converts a Uint8Array buffer to a binary string.
 76 |  *
 77 |  * @param buffer {Uint8Array} The Uint8Array to convert.
 78 |  * @returns {string} The converted string.
 79 |  */
 80 | export function uint8ArrayToString(buffer) {
 81 |   var string = "";
 82 |   for (let i = 0; i < buffer.length; i += 100) {
 83 |     string += String.fromCharCode.apply(
 84 |       undefined,
 85 |       buffer.subarray(i, i + 100)
 86 |     );
 87 |   }
 88 |   return string;
 89 | }
 90 | 
 91 | export const base64ToUint8Array = (base64) => stringToUint8Array(atob(base64));
 92 | 
 93 | /** A list of month names for Date parsing. */
 94 | export const kMonthNames = [
 95 |   "Jan",
 96 |   "Feb",
 97 |   "Mar",
 98 |   "Apr",
 99 |   "May",
100 |   "Jun",
101 |   "Jul",
102 |   "Aug",
103 |   "Sep",
104 |   "Oct",
105 |   "Nov",
106 |   "Dec",
107 | ];
108 | 
109 | export function concatUint8Arrays(arrays) {
110 |   if (arrays.length === 1) return arrays[0];
111 | 
112 |   let totalLength = 0;
113 |   for (let i = 0; i < arrays.length; i++) {
114 |       if (!(arrays[i] instanceof Uint8Array)) {
115 |           throw new Error('concatArrays: Data must be in the form of a Uint8Array');
116 |       }
117 | 
118 |       totalLength += arrays[i].length;
119 |   }
120 | 
121 |   const result = new Uint8Array(totalLength);
122 |   let pos = 0;
123 |   arrays.forEach((element) => {
124 |       result.set(element, pos);
125 |       pos += element.length;
126 |   });
127 | 
128 |   return result;
129 | }
130 | 
131 | export function uint8ArrayToHex(bytes) {
132 |   const res = [];
133 |   for (let c = 0; c < bytes.length; c++) {
134 |       const hex = bytes[c].toString(16);
135 |       res.push(hex.length < 2 ? '0' + hex : hex);
136 |   }
137 |   return res.join('');
138 | }
139 | 


--------------------------------------------------------------------------------
/test/utils.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * A class which appears to act like the Date class with customizable timezone
  3 |  * offsets.
  4 |  * @param {String} iso8601String An ISO-8601 date/time string including a
  5 |  *                               timezone offset.
  6 |  */
  7 |  export function MockDate(iso8601String) {
  8 |   // Find the timezone offset (Z or ±hhmm) from the ISO-8601 date string, and
  9 |   // then convert that into a number of minutes.
 10 |   let parse = /\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d(Z|[+-]\d{4})/.exec(
 11 |     iso8601String
 12 |   );
 13 |   let tzOffsetStr = parse[1];
 14 |   if (tzOffsetStr == "Z") {
 15 |     this._tzOffset = 0;
 16 |   } else {
 17 |     this._tzOffset =
 18 |       parseInt(tzOffsetStr.substring(1, 3)) * 60 +
 19 |       parseInt(tzOffsetStr.substring(3));
 20 |     if (tzOffsetStr[0] == "-") {
 21 |       this._tzOffset = -this._tzOffset;
 22 |     }
 23 |   }
 24 | 
 25 |   // To store the offset, we store both the real time in _realDate and a time
 26 |   // that is offset by the tzOffset in _shiftedDate. Only the getUTC* methods
 27 |   // should be used on these properties, to avoid problems caused by daylight
 28 |   // savings time or other timezone effects. This shifting is always legal
 29 |   // because ES6 is specified to assume that leap seconds do not exist, so there
 30 |   // are always 60 seconds in a minute.
 31 |   this._realDate = new Date(iso8601String);
 32 |   this._shiftedDate = new Date(
 33 |     this._realDate.getTime() + this._tzOffset * 60 * 1000
 34 |   );
 35 | }
 36 | MockDate.prototype = {
 37 |   getTimezoneOffset() {
 38 |     // This property is reversed from how it's defined in ISO 8601, i.e.,
 39 |     // UTC +0100 needs to return -60.
 40 |     return -this._tzOffset;
 41 |   },
 42 |   getTime() {
 43 |     return this._realDate.getTime();
 44 |   },
 45 | };
 46 | 
 47 | // Provide an implementation of Date methods that will be need in JSMime. For
 48 | // the time being, we only need .get* methods.
 49 | for (let name of Object.getOwnPropertyNames(Date.prototype)) {
 50 |   // Only copy getters, not setters or x.toString.
 51 |   if (!name.startsWith("get")) {
 52 |     continue;
 53 |   }
 54 |   // No redefining any other names on MockDate.
 55 |   if (MockDate.prototype.hasOwnProperty(name)) {
 56 |     continue;
 57 |   }
 58 | 
 59 |   if (name.includes("UTC")) {
 60 |     // 'name' is already supposed to be freshly bound per newest ES6 drafts, but
 61 |     // current ES6 implementations reuse the bindings. Until implementations
 62 |     // catch up, use a new let to bind it freshly.
 63 |     let boundName = name;
 64 |     Object.defineProperty(MockDate.prototype, name, {
 65 |       value(...aArgs) {
 66 |         return Date.prototype[boundName].call(this._realDate, aArgs);
 67 |       },
 68 |     });
 69 |   } else {
 70 |     let newName = "getUTC" + name.substr(3);
 71 |     Object.defineProperty(MockDate.prototype, name, {
 72 |       value(...aArgs) {
 73 |         return Date.prototype[newName].call(this._shiftedDate, aArgs);
 74 |       },
 75 |     });
 76 |   }
 77 | }
 78 | 
 79 | 
 80 | // A file cache for read_file.
 81 | const file_cache = {};
 82 | /**
 83 |  * Read a file into a string (all line endings become CRLF).
 84 |  * @param file  The name of the file to read, relative to the data/ directory.
 85 |  * @param start The first line of the file to return, defaulting to 0
 86 |  * @param end   The last line of the file to return, defaulting to the number of
 87 |  *              lines in the file.
 88 |  * @return      Promise<String> The contents of the file as a binary string.
 89 |  */
 90 | export function read_file(file, start = undefined, end = undefined) {
 91 |   if (!(file in file_cache)) {
 92 |     var realFile = new Promise(function(resolve, reject) {
 93 |       fetch('base/test/data/' + file)
 94 |         .then(response => response.ok ? response.arrayBuffer() : reject(new Error('error fetching file')))
 95 |         .then(buffer => {
 96 |           resolve(new Uint8Array(buffer))
 97 |         })
 98 |         .catch(err => reject(err))
 99 |     });
100 |     var loader = realFile.then(function(contents) {
101 |       var inStrForm = "";
102 |       while (contents.length > 0) {
103 |         inStrForm += String.fromCharCode.apply(
104 |           null,
105 |           contents.subarray(0, 1024)
106 |         );
107 |         contents = contents.subarray(1024);
108 |       }
109 |       return inStrForm.split(/\r\n|[\r\n]/);
110 |     });
111 |     file_cache[file] = loader;
112 |   }
113 |   return file_cache[file].then(function(contents) {
114 |     if (start !== undefined) {
115 |       contents = contents.slice(start - 1, end - 1);
116 |     }
117 |     return contents.join("\r\n");
118 |   });
119 | }
120 | 
121 | export function read_file_raw(file) {
122 |   return new Promise(function(resolve, reject) {
123 |       fetch('base/test/data/' + file)
124 |         .then(response => response.ok ? response.arrayBuffer() : reject(new Error('error fetching file')))
125 |         .then(buffer => {
126 |           resolve(new Uint8Array(buffer))
127 |         })
128 |         .catch(err => reject(err))
129 |     });
130 | }
131 | 
132 | export function isFirefox() {
133 |   return navigator.userAgent.toLowerCase().includes('firefox')
134 | }
135 | 


--------------------------------------------------------------------------------
/lib/structuredHeaders.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * This file implements knowledge of how to encode or decode structured headers
  3 |  * for several key headers. It is not meant to be used externally to jsmime.
  4 |  */
  5 | 
  6 | var structuredDecoders = new Map();
  7 | var structuredEncoders = new Map();
  8 | var preferredSpellings = new Map();
  9 | 
 10 | function addHeader(name, decoder, encoder) {
 11 |   var lowerName = name.toLowerCase();
 12 |   structuredDecoders.set(lowerName, decoder);
 13 |   structuredEncoders.set(lowerName, encoder);
 14 |   preferredSpellings.set(lowerName, name);
 15 | }
 16 | 
 17 | // Addressing headers: We assume that they can be specified in 1* form (this is
 18 | // false for From, but it's close enough to the truth that it shouldn't matter).
 19 | // There is no need to specialize the results for the header, so just pun it
 20 | // back to parseAddressingHeader.
 21 | function parseAddress(value) {
 22 |   let headerparser = this;
 23 |   return value.reduce(function(results, header) {
 24 |     return results.concat(headerparser.parseAddressingHeader(header, true));
 25 |   }, []);
 26 | }
 27 | function writeAddress(value) {
 28 |   // Make sure the input is an array (accept a single entry)
 29 |   if (!Array.isArray(value)) {
 30 |     value = [value];
 31 |   }
 32 |   this.addAddresses(value);
 33 | }
 34 | 
 35 | // Addressing headers from RFC 5322:
 36 | addHeader("Bcc", parseAddress, writeAddress);
 37 | addHeader("Cc", parseAddress, writeAddress);
 38 | addHeader("From", parseAddress, writeAddress);
 39 | addHeader("Reply-To", parseAddress, writeAddress);
 40 | addHeader("Resent-Bcc", parseAddress, writeAddress);
 41 | addHeader("Resent-Cc", parseAddress, writeAddress);
 42 | addHeader("Resent-From", parseAddress, writeAddress);
 43 | addHeader("Resent-Reply-To", parseAddress, writeAddress);
 44 | addHeader("Resent-Sender", parseAddress, writeAddress);
 45 | addHeader("Resent-To", parseAddress, writeAddress);
 46 | addHeader("Sender", parseAddress, writeAddress);
 47 | addHeader("To", parseAddress, writeAddress);
 48 | // From RFC 5536:
 49 | addHeader("Approved", parseAddress, writeAddress);
 50 | // From RFC 3798:
 51 | addHeader("Disposition-Notification-To", parseAddress, writeAddress);
 52 | // Non-standard headers:
 53 | addHeader("Delivered-To", parseAddress, writeAddress);
 54 | addHeader("Return-Receipt-To", parseAddress, writeAddress);
 55 | 
 56 | // http://cr.yp.to/proto/replyto.html
 57 | addHeader("Mail-Reply-To", parseAddress, writeAddress);
 58 | addHeader("Mail-Followup-To", parseAddress, writeAddress);
 59 | 
 60 | // Parameter-based headers. Note that all parameters are slightly different, so
 61 | // we use slightly different variants here.
 62 | function parseParameterHeader(value, do2231, do2047) {
 63 |   // Only use the first header for parameters; ignore subsequent redefinitions.
 64 |   return this.parseParameterHeader(value[0], do2231, do2047);
 65 | }
 66 | 
 67 | // RFC 2045
 68 | function parseContentType(value) {
 69 |   let params = parseParameterHeader.call(this, value, false, false);
 70 |   let origtype = params.preSemi;
 71 |   let parts = origtype.split("/");
 72 |   if (parts.length != 2) {
 73 |     // Malformed. Return to text/plain. Evil, ain't it?
 74 |     params = new Map();
 75 |     parts = ["text", "plain"];
 76 |   }
 77 |   let mediatype = parts[0].toLowerCase();
 78 |   let subtype = parts[1].toLowerCase();
 79 |   let type = mediatype + "/" + subtype;
 80 |   let structure = new Map();
 81 |   structure.mediatype = mediatype;
 82 |   structure.subtype = subtype;
 83 |   structure.type = type;
 84 |   params.forEach(function(value, name) {
 85 |     structure.set(name.toLowerCase(), value);
 86 |   });
 87 |   return structure;
 88 | }
 89 | structuredDecoders.set("Content-Type", parseContentType);
 90 | 
 91 | // Unstructured headers (just decode RFC 2047 for the first header value)
 92 | function parseUnstructured(values) {
 93 |   return this.decodeRFC2047Words(values[0]);
 94 | }
 95 | function writeUnstructured(value) {
 96 |   this.addUnstructured(value);
 97 | }
 98 | 
 99 | // Message-ID headers.
100 | function parseMessageID(values) {
101 |   // TODO: Proper parsing support for these headers is currently unsupported).
102 |   return this.decodeRFC2047Words(values[0]);
103 | }
104 | function writeMessageID(value) {
105 |   // TODO: Proper parsing support for these headers is currently unsupported).
106 |   this.addUnstructured(value);
107 | }
108 | 
109 | // RFC 5322
110 | addHeader("Comments", parseUnstructured, writeUnstructured);
111 | addHeader("Keywords", parseUnstructured, writeUnstructured);
112 | addHeader("Subject", parseUnstructured, writeUnstructured);
113 | 
114 | // RFC 2045
115 | addHeader("MIME-Version", parseUnstructured, writeUnstructured);
116 | addHeader("Content-Description", parseUnstructured, writeUnstructured);
117 | 
118 | // RFC 7231
119 | addHeader("User-Agent", parseUnstructured, writeUnstructured);
120 | 
121 | // Date headers
122 | function parseDate(values) {
123 |   return this.parseDateHeader(values[0]);
124 | }
125 | function writeDate(value) {
126 |   this.addDate(value);
127 | }
128 | 
129 | // RFC 5322
130 | addHeader("Date", parseDate, writeDate);
131 | addHeader("Resent-Date", parseDate, writeDate);
132 | // RFC 5536
133 | addHeader("Expires", parseDate, writeDate);
134 | addHeader("Injection-Date", parseDate, writeDate);
135 | addHeader("NNTP-Posting-Date", parseDate, writeDate);
136 | 
137 | // RFC 5322
138 | addHeader("Message-ID", parseMessageID, writeMessageID);
139 | addHeader("Resent-Message-ID", parseMessageID, writeMessageID);
140 | 
141 | // Miscellaneous headers (those that don't fall under the above schemes):
142 | 
143 | // RFC 2047
144 | structuredDecoders.set("Content-Transfer-Encoding", function(values) {
145 |   return values[0].toLowerCase();
146 | });
147 | structuredEncoders.set("Content-Transfer-Encoding", writeUnstructured);
148 | 
149 | // Some clients like outlook.com send non-compliant References headers that
150 | // separate values using commas. Also, some clients don't separate References
151 | // with spaces, since these are optional according to RFC2822. So here we
152 | // preprocess these headers (see bug 1154521 and bug 1197686).
153 | function preprocessMessageIDs(values) {
154 |   let msgId = /<[^>]*>/g;
155 |   let match,
156 |     ids = [];
157 |   while ((match = msgId.exec(values)) !== null) {
158 |     ids.push(match[0]);
159 |   }
160 |   return ids.join(" ");
161 | }
162 | structuredDecoders.set("References", preprocessMessageIDs);
163 | structuredDecoders.set("In-Reply-To", preprocessMessageIDs);
164 | 
165 | export {
166 |   structuredDecoders as decoders,
167 |   structuredEncoders as encoders,
168 |   preferredSpellings as spellings,
169 | };
170 | 


--------------------------------------------------------------------------------
/test/test_mail_parser.ts:
--------------------------------------------------------------------------------
  1 | import { expect } from "chai";
  2 | import { read_file_raw } from "./utils";
  3 | import { parseMail } from "../";
  4 | import { uint8ArrayToString } from "../lib/utils";
  5 | 
  6 | const toBase64 = uInt8Array => btoa(uint8ArrayToString(uInt8Array));
  7 | 
  8 | describe('mail parser', () => {
  9 |   it('correctly parses multipart message with both HTML and plain text data', async () => {
 10 |     const eml = await read_file_raw("multipart-complex1");
 11 |     const { body, attachments } = parseMail(eml);
 12 | 
 13 |     expect(body.html).to.equal('<html><head>This part should be returned.</head></html>\n');
 14 |     expect(body.text).to.equal("This part shouldn't.\n\nNeither should this part!\n");
 15 | 
 16 |     expect(toBase64(attachments[0].content)).to.equal('VGhpcyBpc24ndCByZWFsbHkgYW4gYXBwbGljYXRpb24vb2N0ZXQtc3RyZWFtLiA7KQ==');
 17 |     expect(attachments[0].contentType).to.equal('application/octet-stream');
 18 |     expect(attachments[0].fileName).to.equal('');
 19 | 
 20 |     expect(attachments[1].contentType).to.equal('image/png');
 21 |     expect(toBase64(attachments[1].content)).to.equal('TmVpdGhlciBpcyB0aGlzIGFuIGltYWdlL3BuZy4=');
 22 |     expect(attachments[1].fileName).to.equal('');
 23 |   });
 24 | 
 25 |   it('correctly parses UTF-8 string input', async () => {
 26 |     const eml = `Content-Type: multipart/mixed;
 27 |     boundary="------------cJMvmFk1NneB7MT4jwYHY7ap"
 28 | 
 29 | This is a multi-part message in MIME format.
 30 | --------------cJMvmFk1NneB7MT4jwYHY7ap
 31 | Content-Type: text/plain; charset=UTF-8;
 32 | Content-Transfer-Encoding: 8bit
 33 | 
 34 | Import HTML cöntäct//Subjεέςτ//
 35 | 
 36 | --------------cJMvmFk1NneB7MT4jwYHY7ap--`;
 37 |     const { body } = parseMail(eml);
 38 | 
 39 |     expect(body.text).to.equal('Import HTML cöntäct//Subjεέςτ//\n');
 40 |   });
 41 | 
 42 |   it('correctly parses SHIFT-JIS body with png attachment', async () => {
 43 |     const expectedText = 'Portable Network Graphics（ポータブル・ネットワーク・グラフィックス、PNG）はコンピュータでビットマップ画像を扱うファイルフォーマットである。圧縮アルゴリズムとしてDeflateを採用している、圧縮による画質の劣化のない可逆圧縮の画像ファイルフォーマットである。\n';
 44 |     const expectedAttachmentContent = 'iVBORw0KGgoAAAANSUhEUgAAAIAAAABECAIAAADGJao+AAAAwklEQVR4Xu3UgQbDMBRA0bc03f//b7N0VuqJEmwoc+KqNEkDh9b+2HuJu1KNO4f+AQCAAAAQAAACAEAAAAgAAAEAIAAABACAAAAQAAACAEAAAAgAAAEAIAAAANReamRLlPWYfNH0klxcPs+cP3NxWF+vi3lb7pa2R+vx6tHOtuN1O+a5lY3HzgM5ya/GM5N7ZjfPq7/5yS8IgAAAEAAAAgBAAAAIAAABACAAAAQAgAAAEAAAAgBAAAAIAAABACAAAIw322gDIPvtlmUAAAAASUVORK5CYII=';
 45 | 
 46 |     const eml = await read_file_raw("shift-jis-image");
 47 |     const { body, subject, headers, attachments: [attachment] } = parseMail(eml);
 48 | 
 49 |     expect(body.text).to.equal(expectedText);
 50 |     expect(subject).to.equal('Shift-JIS and PNG test');
 51 |     expect(headers.subject[0]).to.equal('Shift-JIS and PNG test');
 52 |     expect(toBase64(attachment.content)).to.equal(expectedAttachmentContent);
 53 |     expect(attachment.size).to.equal(251);
 54 |     expect(attachment.contentType).to.equal('image/png');
 55 |     expect(attachment.fileName).to.equal('');
 56 |   });
 57 | 
 58 |   it('correctly reads binary attachments', async () => {
 59 |     const eml = await read_file_raw("multipart-binary");
 60 |     const { attachments: [attachment] } = parseMail(eml);
 61 | 
 62 |     expect(attachment.content).to.deep.equal(new Uint8Array([1, 2, 3]));
 63 |     expect(attachment.contentType).to.equal('application/octect-stream');
 64 |     expect(attachment.fileName).to.equal('');
 65 |   });
 66 | 
 67 |   it('includes the content-id and filename for each attachment', async () => {
 68 |     const eml = await read_file_raw("multipart-content-id");
 69 |     const { attachments: [attachment1, attachment2] } = parseMail(eml);
 70 | 
 71 |     expect(attachment1.content).to.deep.equal(attachment2.content);
 72 |     expect(attachment1.contentId).to.equal('<001110.102211@siebel.com>');
 73 |     expect(attachment1.contentType).to.equal('image/png');
 74 |     expect(attachment1.fileName).to.equal('');
 75 |     expect(attachment2.contentType).to.equal('image/png');
 76 |     expect(attachment2.fileName).to.equal('test.png');
 77 |   });
 78 | 
 79 |   it('returns an empty array for empty attachment body', async () => {
 80 |     const eml = await read_file_raw("multipart-empty-attachment");
 81 |     const { attachments: [attachment] } = parseMail(eml);
 82 | 
 83 |     expect(attachment.content).to.be.instanceOf(Uint8Array);
 84 |     expect(attachment.content).to.have.length(0);
 85 |     expect(attachment.contentType).to.equal('text/rfc822-headers');
 86 |     expect(attachment.fileName).to.equal('');
 87 |   });
 88 | 
 89 |   it('returns null for non-existent html body', async () => {
 90 |     const eml = `Content-Type: multipart/mixed; boundary="------------P7E1gxp6rCvfn0to5n3PZ2h0";
 91 |     protected-headers="v1"
 92 | From: Sender <sender@test.com>
 93 | To: receiver@test.com
 94 | Message-ID: <39b3134c-0fcd-4618-b1bd-2b20481bf2af>
 95 | Subject: Empty message test
 96 | 
 97 | --------------P7E1gxp6rCvfn0to5n3PZ2h0
 98 | Content-Type: text/plain; charset=UTF-8; format=flowed
 99 | Content-Transfer-Encoding: 7bit
100 | 
101 | 
102 | --------------P7E1gxp6rCvfn0to5n3PZ2h0--`
103 |     const { subject, body: { html, text } } = parseMail(eml);
104 | 
105 |     expect(subject).to.equal('Empty message test');
106 |     expect(text).to.equal('');
107 |     expect(html).to.be.null;
108 |   });
109 | 
110 |   it('decodes the subject', async () => {
111 |     const eml = await read_file_raw("multipart-encrypted-subject-utf8");
112 |     const { subject, body } = parseMail(eml);
113 | 
114 |     expect(subject).to.equal('subject with emojis 😃😇');
115 |     expect(body.text).to.equal('test utf8 in encrypted subject\n');
116 |   });
117 | 
118 |   it('parses addresses and date', async () => {
119 |     const eml = await read_file_raw("multipart-addresses");
120 |     const { from, to, cc, bcc, date } = parseMail(eml);
121 | 
122 |     expect(from).to.deep.equal({ name: 'Some One', email: 'someone@test.com' });
123 |     expect(to).to.deep.equal([{ name: '', email: 'receiver@test.com' }, { name: '', email: 'another_receiver@test.com' }]);
124 |     expect(cc).to.deep.equal([{ name: '', email: 'copy@test.com' }]);
125 |     expect(bcc).to.be.undefined;
126 |     expect(date).to.deep.equal(new Date('Sun, 12 Jun 2022 17:21:02 +0200'));
127 |   });
128 | 
129 |   it('parses address groups', async () => {
130 |     const eml = await read_file_raw("multipart-addresses-groups");
131 |     const { from, to, cc, bcc } = parseMail(eml);
132 | 
133 |     expect(from).to.deep.equal({ name: 'Some One', email: 'someone@test.com' });
134 |     expect(to).to.deep.equal([{ name: 'undisclosed-recipients', group: [] }]);
135 |     expect(cc).to.deep.equal([
136 |       { name: 'Group A', group: [{ name: 'AA', email: 'a@b.com' }, { name: 'AB', email: 'a@b.com' }] },
137 |       { name: 'Group B', group: [{ name: '', email: 'b@b.com' }] }
138 |     ]);
139 |     expect(bcc).to.be.undefined;
140 |   });
141 | });
142 | 


--------------------------------------------------------------------------------
/test/test_structured_headers.js:
--------------------------------------------------------------------------------
  1 | import { assert } from "chai";
  2 | import { headerparser } from "../lib/jsmime";
  3 | 
  4 | function smartDeepEqual(actual, expected) {
  5 |   assert.deepEqual(actual, expected);
  6 |   if (actual instanceof Map && expected instanceof Map) {
  7 |     assert.deepEqual(
  8 |       Array.from(actual.entries()),
  9 |       Array.from(expected.entries())
 10 |     );
 11 |   }
 12 | }
 13 | 
 14 | function arrayTest(data, fn) {
 15 |   fn.toString = function() {
 16 |     let text = Function.prototype.toString.call(this);
 17 |     text = text.replace(/data\[([0-9]*)\]/g, function(m, p) {
 18 |       return JSON.stringify(data[p]);
 19 |     });
 20 |     return text;
 21 |   };
 22 |   return it(data[0].toString(), fn);
 23 | }
 24 | 
 25 | function testHeader(header, tests) {
 26 |   describe(header, function() {
 27 |     tests.forEach(function(data) {
 28 |       arrayTest(data, function() {
 29 |         smartDeepEqual(
 30 |           headerparser.parseStructuredHeader(header, data[0]),
 31 |           data[1]
 32 |         );
 33 |       });
 34 |     });
 35 |   });
 36 | }
 37 | 
 38 | function makeCT(media, sub, params) {
 39 |   var object = new Map();
 40 |   object.mediatype = media;
 41 |   object.subtype = sub;
 42 |   object.type = media + "/" + sub;
 43 |   for (let k in params) {
 44 |     object.set(k, params[k]);
 45 |   }
 46 |   return object;
 47 | }
 48 | describe("Structured headers", function() {
 49 |   // Ad-hoc header tests
 50 |   testHeader("Content-Type", [
 51 |     ["text/plain", makeCT("text", "plain", {})],
 52 |     ["text/html", makeCT("text", "html", {})],
 53 |     [
 54 |       'text/plain; charset="UTF-8"',
 55 |       makeCT("text", "plain", { charset: "UTF-8" }),
 56 |     ],
 57 |     ["text/", makeCT("text", "", {})],
 58 |     ["text", makeCT("text", "plain", {})],
 59 |     ["image/", makeCT("image", "", {})],
 60 |     ["image", makeCT("text", "plain", {})],
 61 |     ["hacker/x-mailnews", makeCT("hacker", "x-mailnews", {})],
 62 |     ["hacker/x-mailnews;", makeCT("hacker", "x-mailnews", {})],
 63 |     ["HACKER/X-MAILNEWS", makeCT("hacker", "x-mailnews", {})],
 64 |     [
 65 |       "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
 66 |       makeCT(
 67 |         "application",
 68 |         "vnd.openxmlformats-officedocument.spreadsheetml.sheet",
 69 |         {}
 70 |       ),
 71 |     ],
 72 |     [
 73 |       "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;\r" +
 74 |         '\n name="Presentation.pptx"',
 75 |       makeCT(
 76 |         "application",
 77 |         "vnd.openxmlformats-officedocument.spreadsheetml.sheet",
 78 |         { name: "Presentation.pptx" }
 79 |       ),
 80 |     ],
 81 |     ["", makeCT("text", "plain", {})],
 82 |     ["                                        ", makeCT("text", "plain", {})],
 83 |     ["text/plain; c", makeCT("text", "plain", {})],
 84 |     ["text/plain; charset=", makeCT("text", "plain", { charset: "" })],
 85 |     ['text/plain; charset="', makeCT("text", "plain", { charset: "" })],
 86 |     ["text\\/enriched", makeCT("text\\", "enriched", {})],
 87 |     ['multipart/mixed ";" wtf=stupid', makeCT("multipart", "mixed", {})],
 88 |     [
 89 |       "multipart/mixed; wtf=stupid",
 90 |       makeCT("multipart", "mixed", { wtf: "stupid" }),
 91 |     ],
 92 |     [
 93 |       "text/plain; CHARSET=Big5",
 94 |       makeCT("text", "plain", { charset: "Big5" }),
 95 |     ],
 96 |     [
 97 |       'text/html; CHARSET="Big5"',
 98 |       makeCT("text", "html", { charset: "Big5" }),
 99 |     ],
100 |     ['text/html; CHARSET="Big5', makeCT("text", "html", { charset: "Big5" })],
101 |     [["text/html", "multipart/mixed"], makeCT("text", "html", {})],
102 |   ]);
103 |   testHeader("Content-Transfer-Encoding", [
104 |     ["", ""],
105 |     ["8bit", "8bit"],
106 |     ["8BIT", "8bit"],
107 |     ["QuOtEd-PrInTaBlE", "quoted-printable"],
108 |     ["Base64", "base64"],
109 |     ["7bit", "7bit"],
110 |     [["7bit", "8bit"], "7bit"],
111 |     ["x-uuencode", "x-uuencode"],
112 |   ]);
113 | 
114 |   // Non-ad-hoc header tests
115 |   let addressing_headers = [
116 |     "From",
117 |     "To",
118 |     "Cc",
119 |     "Bcc",
120 |     "Sender",
121 |     "Reply-To",
122 |     "Resent-Bcc",
123 |     "Resent-To",
124 |     "Resent-From",
125 |     "Resent-Cc",
126 |     "Resent-Sender",
127 |     "Approved",
128 |     "Disposition-Notification-To",
129 |     "Delivered-To",
130 |     "Return-Receipt-To",
131 |     "Resent-Reply-To",
132 |     "Mail-Reply-To",
133 |     "Mail-Followup-To",
134 |   ];
135 |   let address_tests = [
136 |     ["", []],
137 |     ["a@example.invalid", [{ name: "", email: "a@example.invalid" }]],
138 |     [
139 |       "John Doe <a@example.invalid>",
140 |       [{ name: "John Doe", email: "a@example.invalid" }],
141 |     ],
142 |     [
143 |       "John Doe <A@EXAMPLE.INVALID>",
144 |       [{ name: "John Doe", email: "A@EXAMPLE.INVALID" }],
145 |     ],
146 |     [
147 |       "=?UTF-8?B?5bGx55Sw5aSq6YOO?= <a@example.invalid>",
148 |       [{ name: "\u5c71\u7530\u592a\u90ce", email: "a@example.invalid" }],
149 |     ],
150 |     [
151 |       "undisclosed-recipients:;",
152 |       [{ name: "undisclosed-recipients", group: [] }],
153 |     ],
154 |     [
155 |       "world: a@example.invalid, b@example.invalid;",
156 |       [
157 |         {
158 |           name: "world",
159 |           group: [
160 |             { name: "", email: "a@example.invalid" },
161 |             { name: "", email: "b@example.invalid" },
162 |           ],
163 |         },
164 |       ],
165 |     ],
166 |     // TODO when we support IDN:
167 |     // This should be \u4f8b.invalid instead (Japanese kanji for "example")
168 |     [
169 |       "\u5c71\u7530\u592a\u90ce <a@xn--fsq.invalid>",
170 |       [{ name: "\u5c71\u7530\u592a\u90ce", email: "a@xn--fsq.invalid" }],
171 |     ],
172 |     [
173 |       "\u5c71\u7530\u592a\u90ce <a@\u4f8b.invalid>",
174 |       [{ name: "\u5c71\u7530\u592a\u90ce", email: "a@\u4f8b.invalid" }],
175 |     ],
176 |     [
177 |       "\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb@\u4f8b.invalid",
178 |       [
179 |         {
180 |           name: "",
181 |           email:
182 |             "\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb@\u4f8b.invalid",
183 |         },
184 |       ],
185 |     ],
186 |     [
187 |       ["a@example.invalid", "b@example.invalid"],
188 |       [
189 |         { name: "", email: "a@example.invalid" },
190 |         { name: "", email: "b@example.invalid" },
191 |       ],
192 |     ],
193 |   ];
194 |   addressing_headers.forEach(function(header) {
195 |     testHeader(header, address_tests);
196 |   });
197 | 
198 |   let date_headers = [
199 |     "Date",
200 |     "Expires",
201 |     "Injection-Date",
202 |     "NNTP-Posting-Date",
203 |     "Resent-Date",
204 |   ];
205 |   let date_tests = [
206 |     ["Thu, 06 Sep 2012 08:08:21 -0700", new Date("2012-09-06T08:08:21-0700")],
207 |     ["This is so not a date", new Date(NaN)],
208 |   ];
209 |   date_headers.forEach(function(header) {
210 |     testHeader(header, date_tests);
211 |   });
212 | 
213 |   let multiple_unstructured_headers = ["In-Reply-To", "References"];
214 |   let multiple_unstructured_tests = [
215 |     ["<asdasdasd@asdasdasd.com>", "<asdasdasd@asdasdasd.com>"],
216 |     ["<asd@asd.com> <asdf@asdf.com>", "<asd@asd.com> <asdf@asdf.com>"],
217 | 
218 |     // This test is needed for clients sending non-compliant headers, see bug 1154521
219 |     [
220 |       "<asd@asd.com>,<asdf@asdf.com>,<asdfg@asdfg.com>",
221 |       "<asd@asd.com> <asdf@asdf.com> <asdfg@asdfg.com>",
222 |     ],
223 |     // Test for bug 1197686
224 |     [
225 |       "<asd@asd.com><asdf@asdf.com><asdfg@asdfg.com>",
226 |       "<asd@asd.com> <asdf@asdf.com> <asdfg@asdfg.com>",
227 |     ],
228 |   ];
229 |   multiple_unstructured_headers.forEach(function(header) {
230 |     testHeader(header, multiple_unstructured_tests);
231 |   });
232 | 
233 |   let unstructured_headers = [
234 |     "Comments",
235 |     "Content-Description",
236 |     "Keywords",
237 |     "Subject",
238 |   ];
239 |   let unstructured_tests = [
240 |     ["", ""],
241 |     ["This is a subject", "This is a subject"],
242 |     [["Subject 1", "Subject 2"], "Subject 1"],
243 |     [
244 |       "=?UTF-8?B?56eB44Gv5Lu25ZCN5Y2I5YmN?=",
245 |       "\u79c1\u306f\u4ef6\u540d\u5348\u524d",
246 |     ],
247 |   ];
248 |   unstructured_headers.forEach(function(header) {
249 |     testHeader(header, unstructured_tests);
250 |   });
251 | });
252 | 


--------------------------------------------------------------------------------
/test/test_header_emitter.js:
--------------------------------------------------------------------------------
  1 | 
  2 | import { assert } from 'chai';
  3 | import { headeremitter } from '../lib/jsmime';
  4 | 
  5 | import { MockDate } from './utils';
  6 | 
  7 | function arrayTest(data, fn) {
  8 |   fn.toString = function() {
  9 |     let text = Function.prototype.toString.call(this);
 10 |     text = text.replace(/data\[([0-9]*)\]/g, function(m, p) {
 11 |       return JSON.stringify(data[p]);
 12 |     });
 13 |     return text;
 14 |   };
 15 |   return it(JSON.stringify(data[0]), fn);
 16 | }
 17 | 
 18 | describe("headeremitter", function() {
 19 |   describe("addAddresses", function() {
 20 |     let handler = {
 21 |       reset(expected) {
 22 |         this.output = "";
 23 |         this.expected = expected;
 24 |       },
 25 |       deliverData(data) {
 26 |         this.output += data;
 27 |       },
 28 |       deliverEOF() {
 29 |         assert.equal(this.output, this.expected + "\r\n");
 30 |         for (let line of this.output.split("\r\n")) {
 31 |           assert.ok(line.length <= 30, "Line is too long");
 32 |         }
 33 |       },
 34 |     };
 35 |     let header_tests = [
 36 |       [[{ name: "", email: "" }], ""],
 37 |       [[{ name: "", email: "a@example.com" }], "a@example.com"],
 38 |       [
 39 |         [{ name: "John Doe", email: "a@example.com" }],
 40 |         "John Doe <a@example.com>",
 41 |       ],
 42 |       [
 43 |         [
 44 |           { name: "", email: "a@b.c" },
 45 |           { name: "", email: "b@b.c" },
 46 |         ],
 47 |         "a@b.c, b@b.c",
 48 |       ],
 49 |       [
 50 |         [
 51 |           { name: "JD", email: "a@a.c" },
 52 |           { name: "SD", email: "b@b.c" },
 53 |         ],
 54 |         "JD <a@a.c>, SD <b@b.c>",
 55 |       ],
 56 |       [
 57 |         [
 58 |           { name: "John Doe", email: "a@example.com" },
 59 |           { name: "Sally Doe", email: "b@example.com" },
 60 |         ],
 61 |         "John Doe <a@example.com>,\r\n Sally Doe <b@example.com>",
 62 |       ],
 63 |       [
 64 |         [
 65 |           {
 66 |             name: "My name is really long and I split somewhere",
 67 |             email: "a@a.c",
 68 |           },
 69 |         ],
 70 |         "My name is really long and I\r\n split somewhere <a@a.c>",
 71 |       ],
 72 |       // Note that the name is 29 chars here, so adding the email needs a break.
 73 |       [
 74 |         [{ name: "My name is really really long", email: "a@a.c" }],
 75 |         "My name is really really long\r\n <a@a.c>",
 76 |       ],
 77 |       [
 78 |         [
 79 |           { name: "", email: "a@a.c" },
 80 |           { name: "This name is long", email: "b@b.c" },
 81 |         ],
 82 |         "a@a.c,\r\n This name is long <b@b.c>",
 83 |       ],
 84 |       [
 85 |         [
 86 |           { name: "", email: "a@a.c" },
 87 |           { name: "This name is also long", email: "b@b.c" },
 88 |         ],
 89 |         "a@a.c,\r\n This name is also long\r\n <b@b.c>",
 90 |       ],
 91 |       [[{ name: "", email: "hi!bad@all.com" }], '"hi!bad"@all.com'],
 92 |       [[{ name: "", email: '"hi!bad"@all.com' }], '"hi!bad"@all.com'],
 93 |       [[{ name: "Doe, John", email: "a@a.com" }], '"Doe, John" <a@a.com>'],
 94 |       // This one violates the line length, so it underquotes instead.
 95 |       [
 96 |         [
 97 |           {
 98 |             name: "A really, really long name to quote",
 99 |             email: "a@example.com",
100 |           },
101 |         ],
102 |         'A "really," really long name\r\n to quote <a@example.com>',
103 |       ],
104 |       [
105 |         [
106 |           {
107 |             name: "Group",
108 |             group: [
109 |               { name: "", email: "a@a.c" },
110 |               { name: "", email: "b@b.c" },
111 |             ],
112 |           },
113 |         ],
114 |         "Group: a@a.c, b@b.c;",
115 |       ],
116 |       [[{ name: "No email address", email: "" }], "No email address"],
117 |       [
118 |         [{ name: "]user[ domain", email: "user@d.com" }],
119 |         '"]user[ domain" <user@d.com>',
120 |       ],
121 |       [
122 |         [
123 |           {
124 |             name: "Group",
125 |             group: [
126 |               { name: "]u[ d", email: "a@a.c" },
127 |               { name: "]u[ c", email: "b@b.c" },
128 |             ],
129 |           },
130 |         ],
131 |         'Group: "]u[ d" <a@a.c>,\r\n "]u[ c" <b@b.c>;',
132 |       ],
133 |       [
134 |         [{ name: "user@domain", email: "user@d.com" }],
135 |         '"user@domain" <user@d.com>',
136 |       ],
137 |       [
138 |         [
139 |           {
140 |             name: "Group",
141 |             group: [
142 |               { name: "u@d", email: "a@a.c" },
143 |               { name: "u@c", email: "b@b.c" },
144 |             ],
145 |           },
146 |         ],
147 |         'Group: "u@d" <a@a.c>,\r\n "u@c" <b@b.c>;',
148 |       ],
149 |     ];
150 |     header_tests.forEach(function(data) {
151 |       arrayTest(data, function() {
152 |         let emitter = headeremitter.makeStreamingEmitter(handler, {
153 |           softMargin: 30,
154 |           useASCII: false,
155 |         });
156 |         handler.reset(data[1]);
157 |         emitter.addAddresses(data[0]);
158 |         emitter.finish(true);
159 |       });
160 |     });
161 |   });
162 |   describe("addAddresses (RFC 2047)", function() {
163 |     let handler = {
164 |       reset(expected) {
165 |         this.output = "";
166 |         this.expected = expected;
167 |       },
168 |       deliverData(data) {
169 |         this.output += data;
170 |       },
171 |       deliverEOF() {
172 |         assert.equal(this.output, this.expected + "\r\n");
173 |         for (let line of this.output.split("\r\n")) {
174 |           assert.ok(line.length <= 30, "Line is too long");
175 |         }
176 |       },
177 |     };
178 |     let header_tests = [
179 |       [[{ name: "\u0436", email: "a@a.c" }], "=?UTF-8?B?0LY=?= <a@a.c>"],
180 |       [
181 |         [{ name: "dioxyg\u00e8ne", email: "a@a.c" }],
182 |         "=?UTF-8?Q?dioxyg=C3=A8ne?=\r\n <a@a.c>",
183 |       ],
184 |       // Prefer QP if base64 and QP are exactly the same length
185 |       [
186 |         [{ name: "oxyg\u00e8ne", email: "a@a.c" }],
187 |         // =?UTF-8?B?b3h5Z8OobmU=?=
188 |         "=?UTF-8?Q?oxyg=C3=A8ne?=\r\n <a@a.c>",
189 |       ],
190 |       [
191 |         [
192 |           {
193 |             name: "\ud83d\udca9\ud83d\udca9\ud83d\udca9\ud83d\udca9",
194 |             email: "a@a.c",
195 |           },
196 |         ],
197 |         "=?UTF-8?B?8J+SqfCfkqnwn5Kp?=\r\n =?UTF-8?B?8J+SqQ==?= <a@a.c>",
198 |       ],
199 |       // Bug 1088975: Since the encoded-word should be recognized as an atom,
200 |       // encode commas.
201 |       [
202 |         [{ name: "B\u00fcg 1088975, FirstName", email: "a@b.c" }],
203 |         "=?UTF-8?Q?B=C3=BCg_1088975?=\r\n" +
204 |           " =?UTF-8?Q?=2C_FirstName?=\r\n <a@b.c>",
205 |       ],
206 |     ];
207 |     header_tests.forEach(function(data) {
208 |       arrayTest(data, function() {
209 |         let emitter = headeremitter.makeStreamingEmitter(handler, {
210 |           softMargin: 30,
211 |           useASCII: true,
212 |         });
213 |         handler.reset(data[1]);
214 |         emitter.addAddresses(data[0]);
215 |         emitter.finish(true);
216 |       });
217 |     });
218 |   });
219 |   describe("addUnstructured (RFC 2047)", function() {
220 |     let handler = {
221 |       reset(expected) {
222 |         this.output = "";
223 |         this.expected = expected;
224 |       },
225 |       deliverData(data) {
226 |         this.output += data;
227 |       },
228 |       deliverEOF() {
229 |         assert.equal(this.output, this.expected + "\r\n");
230 |         for (let line of this.output.split("\r\n")) {
231 |           assert.ok(line.length <= 30, "Line is too long");
232 |         }
233 |       },
234 |     };
235 |     let header_tests = [
236 |       ["My house   burned down!", "My house burned down!"],
237 | 
238 |       // Which of the 32 "special" characters need to be encoded in QP encoding?
239 |       // Note: Encoding is forced by adding a \x7f at the end.
240 |       // These 5 don't need encoding:
241 |       [" ! * + - / \x7f", "=?UTF-8?Q?_!_*_+_-_/_=7F?="],
242 | 
243 |       // Bug 1438590: RFC2047 [5. (3)] requests the
244 |       // encoding of these 27 "special" characters:
245 |       // " # $ % & ' ( ) , . : ; < = > ? @ [ \ ] ^ _ ` { | } ~.
246 |       // Note: If there are enough characters for padding,
247 |       // QP is used and not base64.
248 |       ['Test " # \x7f', "=?UTF-8?Q?Test_=22_=23_=7F?="],
249 |       ["Test $ % \x7f", "=?UTF-8?Q?Test_=24_=25_=7F?="],
250 |       ["Test & ' \x7f", "=?UTF-8?Q?Test_=26_=27_=7F?="],
251 |       ["Test ( ) \x7f", "=?UTF-8?Q?Test_=28_=29_=7F?="],
252 |       ["Test , . \x7f", "=?UTF-8?Q?Test_=2C_=2E_=7F?="],
253 |       ["Test : ; \x7f", "=?UTF-8?Q?Test_=3A_=3B_=7F?="],
254 |       ["Test < = \x7f", "=?UTF-8?Q?Test_=3C_=3D_=7F?="],
255 |       ["Test > ? \x7f", "=?UTF-8?Q?Test_=3E_=3F_=7F?="],
256 |       ["Test @ [ \x7f", "=?UTF-8?Q?Test_=40_=5B_=7F?="],
257 |       ["Test \\ ] \x7f", "=?UTF-8?Q?Test_=5C_=5D_=7F?="],
258 |       ["Test ^ _ \x7f", "=?UTF-8?Q?Test_=5E_=5F_=7F?="],
259 |       ["Test ` { \x7f", "=?UTF-8?Q?Test_=60_=7B_=7F?="],
260 |       ["Test | } \x7f", "=?UTF-8?Q?Test_=7C_=7D_=7F?="],
261 |       ["Test ~ \x7f", "=?UTF-8?Q?Test_=7E_=7F?="],
262 | 
263 |       // But the 32 printable "special" characters don't need it in the first place!
264 |       [
265 |         "! \" # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \\ ] ^ _ ` { | } ~",
266 |         "! \" # $ % & ' ( ) * + , - . /\r\n" +
267 |           " : ; < = > ? @ [ \\ ] ^ _ ` { |\r\n" +
268 |           " } ~",
269 |       ],
270 | 
271 |       // Test to make sure 2047-encoding chooses the right values.
272 |       ["\u001f", "=?UTF-8?Q?=1F?="],
273 |       ["\u001fa", "=?UTF-8?Q?=1Fa?="],
274 |       ["\u001faa", "=?UTF-8?B?H2Fh?="],
275 |       ["\u001faaa", "=?UTF-8?Q?=1Faaa?="],
276 |       ["\u001faaa\u001f", "=?UTF-8?B?H2FhYR8=?="],
277 |       ["\u001faaa\u001fa", "=?UTF-8?B?H2FhYR9h?="],
278 |       ["\u001faaa\u001faa", "=?UTF-8?Q?=1Faaa=1Faa?="],
279 |       ["\u001faaa\u001faa\u001faaaa", "=?UTF-8?B?H2FhYR9hYR9hYWFh?="],
280 | 
281 |       // Make sure line breaking works right at the edge cases
282 |       ["\u001faaa\u001faaaaaaaaa", "=?UTF-8?Q?=1Faaa=1Faaaaaaaaa?="],
283 |       [
284 |         "\u001faaa\u001faaaaaaaaaa",
285 |         "=?UTF-8?Q?=1Faaa=1Faaaaaaaaa?=\r\n =?UTF-8?Q?a?=",
286 |       ],
287 | 
288 |       // Choose base64/qp independently for each word
289 |       [
290 |         "\ud83d\udca9\ud83d\udca9\ud83d\udca9a",
291 |         "=?UTF-8?B?8J+SqfCfkqnwn5Kp?=\r\n =?UTF-8?Q?a?=",
292 |       ],
293 | 
294 |       // Don't split a surrogate character!
295 |       [
296 |         "a\ud83d\udca9\ud83d\udca9\ud83d\udca9a",
297 |         "=?UTF-8?B?YfCfkqnwn5Kp?=\r\n =?UTF-8?B?8J+SqWE=?=",
298 |       ],
299 | 
300 |       // Spacing a UTF-8 string
301 |       [
302 |         "L'oxyg\u00e8ne est un \u00e9l\u00e9ment chimique du groupe des " +
303 |           "chalcog\u00e8nes",
304 |         //          1         2         3
305 |         // 123456789012345678901234567890
306 |         "=?UTF-8?B?TCdveHlnw6huZSBl?=\r\n" +
307 |           " =?UTF-8?B?c3QgdW4gw6lsw6lt?=\r\n" +
308 |           " =?UTF-8?Q?ent_chimique_du_g?=\r\n" +
309 |           " =?UTF-8?Q?roupe_des_chalcog?=\r\n" +
310 |           " =?UTF-8?B?w6huZXM=?=",
311 |       ],
312 |     ];
313 |     header_tests.forEach(function(data) {
314 |       arrayTest(data, function() {
315 |         let emitter = headeremitter.makeStreamingEmitter(handler, {
316 |           softMargin: 30,
317 |           useASCII: true,
318 |         });
319 |         handler.reset(data[1]);
320 |         emitter.addUnstructured(data[0]);
321 |         emitter.finish(true);
322 |       });
323 |     });
324 |   });
325 |   describe("addDate", function() {
326 |     let handler = {
327 |       reset(expected) {
328 |         this.output = "";
329 |         this.expected = expected;
330 |       },
331 |       deliverData(data) {
332 |         this.output += data;
333 |       },
334 |       deliverEOF() {
335 |         assert.equal(this.output, this.expected + "\r\n");
336 |       },
337 |     };
338 |     let header_tests = [
339 |       // Test basic day/month names
340 |       ["2000-01-01T00:00:00Z", "Sat, 1 Jan 2000 00:00:00 +0000"],
341 |       ["2000-02-01T00:00:00Z", "Tue, 1 Feb 2000 00:00:00 +0000"],
342 |       ["2000-03-01T00:00:00Z", "Wed, 1 Mar 2000 00:00:00 +0000"],
343 |       ["2000-04-01T00:00:00Z", "Sat, 1 Apr 2000 00:00:00 +0000"],
344 |       ["2000-05-01T00:00:00Z", "Mon, 1 May 2000 00:00:00 +0000"],
345 |       ["2000-06-01T00:00:00Z", "Thu, 1 Jun 2000 00:00:00 +0000"],
346 |       ["2000-07-01T00:00:00Z", "Sat, 1 Jul 2000 00:00:00 +0000"],
347 |       ["2000-08-01T00:00:00Z", "Tue, 1 Aug 2000 00:00:00 +0000"],
348 |       ["2000-09-01T00:00:00Z", "Fri, 1 Sep 2000 00:00:00 +0000"],
349 |       ["2000-10-01T00:00:00Z", "Sun, 1 Oct 2000 00:00:00 +0000"],
350 |       ["2000-11-01T00:00:00Z", "Wed, 1 Nov 2000 00:00:00 +0000"],
351 |       ["2000-12-01T00:00:00Z", "Fri, 1 Dec 2000 00:00:00 +0000"],
352 | 
353 |       // Test timezone offsets
354 |       ["2000-06-01T12:00:00Z", "Thu, 1 Jun 2000 12:00:00 +0000"],
355 |       ["2000-06-01T12:00:00+0100", "Thu, 1 Jun 2000 12:00:00 +0100"],
356 |       ["2000-06-01T12:00:00+0130", "Thu, 1 Jun 2000 12:00:00 +0130"],
357 |       ["2000-06-01T12:00:00-0100", "Thu, 1 Jun 2000 12:00:00 -0100"],
358 |       ["2000-06-01T12:00:00-0130", "Thu, 1 Jun 2000 12:00:00 -0130"],
359 |       ["2000-06-01T12:00:00+1345", "Thu, 1 Jun 2000 12:00:00 +1345"],
360 |       ["2000-06-01T12:00:00-1200", "Thu, 1 Jun 2000 12:00:00 -1200"],
361 |       ["2000-06-01T12:00:00+1337", "Thu, 1 Jun 2000 12:00:00 +1337"],
362 |       ["2000-06-01T12:00:00+0101", "Thu, 1 Jun 2000 12:00:00 +0101"],
363 |       ["2000-06-01T12:00:00-1337", "Thu, 1 Jun 2000 12:00:00 -1337"],
364 | 
365 |       // Try some varying hour, minute, and second amounts, to double-check
366 |       // padding and time dates.
367 |       ["2000-06-01T01:02:03Z", "Thu, 1 Jun 2000 01:02:03 +0000"],
368 |       ["2000-06-01T23:13:17Z", "Thu, 1 Jun 2000 23:13:17 +0000"],
369 |       ["2000-06-01T00:05:04Z", "Thu, 1 Jun 2000 00:05:04 +0000"],
370 |       ["2000-06-01T23:59:59Z", "Thu, 1 Jun 2000 23:59:59 +0000"],
371 |       ["2000-06-01T13:17:40Z", "Thu, 1 Jun 2000 13:17:40 +0000"],
372 |       ["2000-06-01T11:15:34Z", "Thu, 1 Jun 2000 11:15:34 +0000"],
373 |       ["2000-06-01T04:09:09Z", "Thu, 1 Jun 2000 04:09:09 +0000"],
374 |       ["2000-06-01T04:10:10Z", "Thu, 1 Jun 2000 04:10:10 +0000"],
375 |       ["2000-06-01T09:13:17Z", "Thu, 1 Jun 2000 09:13:17 +0000"],
376 |       ["2000-06-01T13:12:14Z", "Thu, 1 Jun 2000 13:12:14 +0000"],
377 |       ["2000-06-01T14:16:48Z", "Thu, 1 Jun 2000 14:16:48 +0000"],
378 | 
379 |       // Try varying month, date, and year values.
380 |       ["2000-01-31T00:00:00Z", "Mon, 31 Jan 2000 00:00:00 +0000"],
381 |       ["2000-02-28T00:00:00Z", "Mon, 28 Feb 2000 00:00:00 +0000"],
382 |       ["2000-02-29T00:00:00Z", "Tue, 29 Feb 2000 00:00:00 +0000"],
383 |       ["2001-02-28T00:00:00Z", "Wed, 28 Feb 2001 00:00:00 +0000"],
384 |       ["2000-03-31T00:00:00Z", "Fri, 31 Mar 2000 00:00:00 +0000"],
385 |       ["2000-04-30T00:00:00Z", "Sun, 30 Apr 2000 00:00:00 +0000"],
386 |       ["2000-05-31T00:00:00Z", "Wed, 31 May 2000 00:00:00 +0000"],
387 |       ["2000-06-30T00:00:00Z", "Fri, 30 Jun 2000 00:00:00 +0000"],
388 |       ["2000-07-31T00:00:00Z", "Mon, 31 Jul 2000 00:00:00 +0000"],
389 |       ["2000-08-31T00:00:00Z", "Thu, 31 Aug 2000 00:00:00 +0000"],
390 |       ["2000-09-30T00:00:00Z", "Sat, 30 Sep 2000 00:00:00 +0000"],
391 |       ["2000-10-31T00:00:00Z", "Tue, 31 Oct 2000 00:00:00 +0000"],
392 |       ["2000-11-30T00:00:00Z", "Thu, 30 Nov 2000 00:00:00 +0000"],
393 |       ["2000-12-31T00:00:00Z", "Sun, 31 Dec 2000 00:00:00 +0000"],
394 |       ["1900-01-01T00:00:00Z", "Mon, 1 Jan 1900 00:00:00 +0000"],
395 |       ["9999-12-31T23:59:59Z", "Fri, 31 Dec 9999 23:59:59 +0000"],
396 | 
397 |       // Tests that are not actually missing:
398 |       // We don't actually need to test daylight savings time issues, so long as
399 |       // getTimezoneOffset is correct. We've confirmed black-box that the value
400 |       // is being directly queried on every instance, since we have tests that
401 |       // make MockDate.getTimezoneOffset return different values.
402 |       // In addition, ES6 Date objects don't support leap seconds. Invalid dates
403 |       // per RFC 5322 are handled in a later run of code.
404 |     ];
405 |     header_tests.forEach(function(data) {
406 |       arrayTest(data, function() {
407 |         let emitter = headeremitter.makeStreamingEmitter(handler, {});
408 |         handler.reset(data[1]);
409 |         emitter.addDate(new MockDate(data[0]));
410 |         emitter.finish(true);
411 |       });
412 |     });
413 | 
414 |     // An invalid date should throw an error instead of make a malformed header.
415 |     it("Invalid dates", function() {
416 |       let emitter = headeremitter.makeStreamingEmitter(handler, {});
417 |       assert.throws(function() {
418 |         emitter.addDate(new Date(NaN));
419 |       }, /Cannot encode an invalid date/);
420 |       assert.throws(function() {
421 |         emitter.addDate(new Date("1850-01-01"));
422 |       }, /Date year is out of encodable range/);
423 |       assert.throws(function() {
424 |         emitter.addDate(new Date("10000-01-01"));
425 |       }); // Firefox considers the date invalid, Chrome does not, resulting in different error messages
426 |     });
427 | 
428 |     // Test preferred breaking for the date header.
429 |     it("Break spot", function() {
430 |       let emitter = headeremitter.makeStreamingEmitter(handler, {
431 |         softMargin: 30,
432 |       });
433 |       handler.reset("Overly-Long-Date:\r\n Sat, 1 Jan 2000 00:00:00 +0000");
434 |       emitter.addHeaderName("Overly-Long-Date");
435 |       emitter.addDate(new MockDate("2000-01-01T00:00:00Z"));
436 |       emitter.finish();
437 |     });
438 | 
439 |     it("Correctness of date", function() {
440 |       let emitter = headeremitter.makeStreamingEmitter(handler, {});
441 |       handler.reset();
442 |       let now = new Date();
443 |       emitter.addDate(now);
444 |       emitter.finish();
445 |       // All engines can parse the date strings we produce
446 |       let reparsed = new Date(handler.output);
447 | 
448 |       // Now and reparsed should be correct to second-level precision.
449 |       assert.equal(reparsed.getMilliseconds(), 0);
450 |       assert.equal(now.getTime() - now.getMilliseconds(), reparsed.getTime());
451 |     });
452 |   });
453 | 
454 |   describe("Header lengths", function() {
455 |     let handler = {
456 |       reset(expected) {
457 |         this.output = "";
458 |         this.expected = expected;
459 |       },
460 |       deliverData(data) {
461 |         this.output += data;
462 |       },
463 |       deliverEOF() {
464 |         assert.equal(this.output, this.expected + "\r\n");
465 |       },
466 |     };
467 |     let header_tests = [
468 |       [
469 |         [{ name: "Supercalifragilisticexpialidocious", email: "a@b.c" }],
470 |         "Supercalifragilisticexpialidocious\r\n <a@b.c>",
471 |       ],
472 |       [
473 |         [
474 |           {
475 |             email:
476 |               "supercalifragilisticexpialidocious@" +
477 |               "the.longest.domain.name.in.the.world.invalid",
478 |           },
479 |         ],
480 |         "supercalifragilisticexpialidocious\r\n" +
481 |           " @the.longest.domain.name.in.the.world.invalid",
482 |       ],
483 |       [
484 |         [
485 |           {
486 |             name:
487 |               "Lopadotemachoselachogaleokranioleipsanodrimhypotrimmatosilphi" +
488 |               "paraomelitokatakechymenokichlepikossyphophattoperisteralektryonoptek" +
489 |               "ephalliokigklopeleiolagoiosiraiobaphetraganopterygon",
490 |             email: "a@b.c",
491 |           },
492 |         ],
493 |         new Error(),
494 |       ],
495 |     ];
496 |     header_tests.forEach(function(data) {
497 |       arrayTest(data, function() {
498 |         let emitter = headeremitter.makeStreamingEmitter(handler, {
499 |           softMargin: 30,
500 |           hardMargin: 50,
501 |           useASCII: false,
502 |         });
503 |         handler.reset(data[1]);
504 |         if (data[1] instanceof Error) {
505 |           assert.throws(function() {
506 |             emitter.addAddresses(data[0]);
507 |           }, /Cannot encode/);
508 |         } else {
509 |           assert.doesNotThrow(function() {
510 |             emitter.addAddresses(data[0]);
511 |           });
512 |           emitter.finish(true);
513 |         }
514 |       });
515 |     });
516 |   });
517 | });
518 | 


--------------------------------------------------------------------------------
/lib/mailParser.js:
--------------------------------------------------------------------------------
  1 | /* This Source Code Form is subject to the terms of the Mozilla Public
  2 |  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  3 |  * You can obtain one at http://mozilla.org/MPL/2.0/. */
  4 | 
  5 | import headerParser from './headerParser';
  6 | import RawMimeParser from './rawMimeParser';
  7 | import { concatUint8Arrays, stringToUint8Array, uint8ArrayToString } from './utils';
  8 | 
  9 | // Emitter helpers, for internal functions later on.
 10 | var ExtractMimeMsgEmitter = {
 11 |   getAttachmentName(part) {
 12 |     if (!part || !part["headers"]) {
 13 |       return "";
 14 |     }
 15 | 
 16 |     if (part.headers["content-disposition"]) {
 17 |       let filename = MimeParser.getParameter(
 18 |         part.headers["content-disposition"][0],
 19 |         "filename"
 20 |       );
 21 |       if (filename) {
 22 |         return filename;
 23 |       }
 24 |     }
 25 | 
 26 |     if (part.headers["content-type"]) {
 27 |       let name = MimeParser.getParameter(
 28 |         part.headers["content-type"][0],
 29 |         "name"
 30 |       );
 31 |       if (name) {
 32 |         return name;
 33 |       }
 34 |     }
 35 | 
 36 |     return "";
 37 |   },
 38 | 
 39 |   // All parts of content-disposition = "attachment" are returned as attachments.
 40 |   // For content-disposition = "inline", all parts except those with content-type
 41 |   // text/plain, text/html and text/enriched are returned as attachments.
 42 |   isAttachment(part) {
 43 |     if (!part) {
 44 |       return false;
 45 |     }
 46 | 
 47 |     let contentType = part.contentType || "text/plain";
 48 |     if (contentType.search(/^multipart\//i) === 0) {
 49 |       return false;
 50 |     }
 51 | 
 52 |     let contentDisposition = "";
 53 |     if (
 54 |       Array.isArray(part.headers["content-disposition"]) &&
 55 |       part.headers["content-disposition"].length > 0
 56 |     ) {
 57 |       contentDisposition = part.headers["content-disposition"][0];
 58 |     }
 59 | 
 60 |     if (
 61 |       contentDisposition.search(/^attachment/i) === 0 ||
 62 |       contentType.search(/^text\/plain|^text\/html|^text\/enriched/i) === -1
 63 |     ) {
 64 |       return true;
 65 |     }
 66 | 
 67 |     return false;
 68 |   },
 69 | 
 70 |   /** JSMime API **/
 71 |   startMessage() {
 72 |     this.mimeTree = {
 73 |       partName: "",
 74 |       contentType: "message/rfc822",
 75 |       parts: [],
 76 |       size: 0,
 77 |       headers: {},
 78 |       rawHeaderText: "",
 79 |       allAttachments: [],
 80 |       // keep track of encountered body parts, based on content-type
 81 |       bodyParts: { text: [], html: [] },
 82 |       // No support for encryption.
 83 |       isEncrypted: false,
 84 |     };
 85 |     // partsPath is a hierarchical stack of parts from the root to the
 86 |     // current part.
 87 |     this.partsPath = [this.mimeTree];
 88 |     this.options = this.options || {};
 89 |   },
 90 | 
 91 |   endMessage() {
 92 |     // Prepare the mimeMsg object, which is the final output of the emitter.
 93 |     this.mimeMsg = null;
 94 |     if (this.mimeTree.parts.length == 0) {
 95 |       return;
 96 |     }
 97 | 
 98 |     // Check if only a specific mime part has been requested.
 99 |     if (this.options.getMimePart) {
100 |       if (this.mimeTree.parts[0].partName == this.options.getMimePart) {
101 |         this.mimeMsg = this.mimeTree.parts[0];
102 |         this.mimeMsg.bodyAsTypedArray = stringToUint8Array(
103 |           this.mimeMsg.body
104 |         );
105 |       }
106 |       return;
107 |     }
108 | 
109 |     this.mimeMsg = this.mimeTree;
110 |   },
111 | 
112 |   startPart(partNum, headerMap) {
113 |     let utf8Encoder = new TextEncoder();
114 | 
115 |     let contentType = headerMap.contentType && headerMap.contentType.type
116 |       ? headerMap.contentType.type
117 |       : "text/plain";
118 | 
119 |     let rawHeaderText = headerMap.rawHeaderText;
120 | 
121 |     let headers = {};
122 |     for (let [headerName, headerValue] of headerMap._rawHeaders) {
123 |       // MsgHdrToMimeMessage always returns an array, even for single values.
124 |       let valueArray = Array.isArray(headerValue) ? headerValue : [headerValue];
125 |       // Return a binary string, to mimic MsgHdrToMimeMessage.
126 |       headers[headerName] = valueArray.map(value => {
127 |         let utf8ByteArray = utf8Encoder.encode(value);
128 |         return uint8ArrayToString(utf8ByteArray);
129 |       });
130 |     }
131 | 
132 |     // Get the most recent part from the hierarchical parts stack, which is the
133 |     // parent of the new part to by added.
134 |     let currentPart = this.partsPath[this.partsPath.length - 1];
135 | 
136 |     // Add a leading 1 to the partNum.
137 |     let partName = "1" + (partNum !== "" ? "." : "") + partNum;
138 |     if (partName == "1") {
139 |       // MsgHdrToMimeMessage differentiates between the message headers and the
140 |       // headers of the first part. jsmime.js however returns all headers of
141 |       // the message in the first part.
142 | 
143 |       // Move rawHeaderText and add the content-* headers back to the new/first
144 |       // part.
145 |       currentPart.rawHeaderText = rawHeaderText;
146 |       rawHeaderText = rawHeaderText
147 |         .split(/\n(?![ \t])/)
148 |         .filter(h => h.toLowerCase().startsWith("content-"))
149 |         .join("\n")
150 |         .trim();
151 | 
152 |       // Move all headers and add the content-* headers back to the new/first
153 |       // part.
154 |       currentPart.headers = headers;
155 |       headers = Object.fromEntries(
156 |         Object.entries(headers).filter(h => h[0].startsWith("content-"))
157 |       );
158 |     }
159 | 
160 |     // Add default content-type header.
161 |     if (!headers["content-type"]) {
162 |       headers["content-type"] = ["text/plain"];
163 |     }
164 | 
165 |     let newPart = {
166 |       partName,
167 |       rawBody: null, // Uint8Array
168 |       body: '', // string, coerced based on options
169 |       headers,
170 |       rawHeaderText,
171 |       contentType,
172 |       size: 0,
173 |       parts: [],
174 |       // No support for encryption.
175 |       isEncrypted: false,
176 |     };
177 | 
178 |     // Add nested new part.
179 |     currentPart.parts.push(newPart);
180 |     // Update the newly added part to be current part.
181 |     this.partsPath.push(newPart);
182 |   },
183 | 
184 |   endPart(partNum) {
185 |     let deleteBody = false;
186 |     // Get the most recent part from the hierarchical parts stack.
187 |     let currentPart = this.partsPath[this.partsPath.length - 1];
188 | 
189 |     // Add size.
190 |     let size = currentPart.body.length;
191 |     currentPart.size += size;
192 | 
193 |     if (this.isAttachment(currentPart)) {
194 |       currentPart.fileName = this.getAttachmentName(currentPart);
195 |       const contentDispositionHeader = currentPart.headers["content-disposition"] && currentPart.headers["content-disposition"][0];
196 |       const contentIdHeader = currentPart.headers["content-id"] && currentPart.headers["content-id"][0];
197 | 
198 |       // the content-disposition header, as parsed by jsmime, also contains the filename
199 |       currentPart.contentDisposition = contentDispositionHeader ? contentDispositionHeader.split(';').shift() : undefined;
200 |       currentPart.contentId = contentIdHeader || undefined;
201 | 
202 |       if (this.options.includeAttachments) {
203 |         this.mimeTree.allAttachments.push(currentPart);
204 |       } else {
205 |         deleteBody = true;
206 |       }
207 |     } else if (currentPart.rawBody) {
208 |       delete currentPart.rawBody; // drop Uint8Array data outside of attachments, to free up memory
209 | 
210 |       const bodyType = currentPart.contentType || 'text/plain';
211 |       switch(bodyType) {
212 |         case 'text/html':
213 |           this.mimeTree.bodyParts.html.push(currentPart.body);
214 |           break;
215 |         case 'text/plain':
216 |           this.mimeTree.bodyParts.text.push(currentPart.body);
217 |           break;
218 |         // no support for rich text
219 |       }
220 |     }
221 | 
222 |     if (deleteBody) {
223 |       delete currentPart.body;
224 |       delete currentPart.rawBody;
225 |     }
226 | 
227 |     // Remove content-disposition and content-transfer-encoding headers.
228 |     currentPart.headers = Object.fromEntries(
229 |       Object.entries(currentPart.headers).filter(
230 |         h =>
231 |           !["content-disposition", "content-transfer-encoding"].includes(h[0])
232 |       )
233 |     );
234 | 
235 |     // Set the parent of this part to be the new current part.
236 |     this.partsPath.pop();
237 | 
238 |     // Add the size of this part to its parent as well.
239 |     currentPart = this.partsPath[this.partsPath.length - 1];
240 |     currentPart.size += size;
241 |   },
242 | 
243 |   /**
244 |    * The data parameter is either a string or a Uint8Array.
245 |    */
246 |   deliverPartData(partNum, data, rawData) {
247 |     // Get the most recent part from the hierarchical parts stack.
248 |     let currentPart = this.partsPath[this.partsPath.length - 1];
249 | 
250 |     if (typeof data === "string") {
251 |       currentPart.body += data;
252 |     } else {
253 |       currentPart.body += uint8ArrayToString(data);
254 |     }
255 | 
256 |     // we keep both raw and string data as at this point we do not know whether the part is an attachment
257 |     if (currentPart.rawBody === null) {
258 |       currentPart.rawBody = rawData;
259 |     } else {
260 |       currentPart.rawBody = concatUint8Arrays([currentPart.rawBody, rawData])
261 |     }
262 |   },
263 | };
264 | 
265 | var ExtractHeadersEmitter = {
266 |   startPart(partNum, headers) {
267 |     if (partNum == "") {
268 |       this.headers = headers;
269 |     }
270 |   },
271 | };
272 | 
273 | var ExtractHeadersAndBodyEmitter = {
274 |   body: "",
275 |   startPart: ExtractHeadersEmitter.startPart,
276 |   deliverPartData(partNum, data) {
277 |     if (partNum == "") {
278 |       this.body += data;
279 |     }
280 |   },
281 | };
282 | 
283 | export const MimeParser = {
284 |   /***
285 |    * Determine an arbitrary "parameter" part of a mail header.
286 |    *
287 |    * @param {string} headerStr - The string containing all parts of the header.
288 |    * @param {string} parameter - The parameter we are looking for.
289 |    *
290 |    *
291 |    * 'multipart/signed; protocol="xyz"', 'protocol' --> returns "xyz"
292 |    *
293 |    * @return {string} String containing the value of the parameter; or "".
294 |    */
295 | 
296 |   getParameter(headerStr, parameter) {
297 |     parameter = parameter.toLowerCase();
298 |     headerStr = headerStr.replace(/[\r\n]+[ \t]+/g, "");
299 | 
300 |     let hdrMap = headerParser.parseParameterHeader(
301 |       ";" + headerStr,
302 |       true,
303 |       true
304 |     );
305 | 
306 |     for (let [key, value] of hdrMap.entries()) {
307 |       if (parameter == key.toLowerCase()) {
308 |         return value;
309 |       }
310 |     }
311 | 
312 |     return "";
313 |   },
314 | 
315 |   /**
316 |    * Triggers an synchronous parse of the given input.
317 |    *
318 |    * The input is a string that is immediately parsed, calling all functions on
319 |    * the emitter before this function returns.
320 |    *
321 |    * @param {BinaryString} input   A string or input stream of text to parse.
322 |    * @param emitter The emitter to receive callbacks on.
323 |    * @param opts    A set of options for the parser.
324 |    */
325 |   parseSync(input, emitter, opts) {
326 |     // We only support string parsing if we are trying to do this parse
327 |     // synchronously.
328 |     if (typeof input != "string") {
329 |       throw new Error("input is not a recognizable type!");
330 |     }
331 |     var parser = new RawMimeParser(emitter, opts);
332 |     parser.deliverData(input);
333 |     parser.deliverEOF();
334 |   },
335 | 
336 |   /**
337 |    * Returns a stream listener that feeds data into a parser.
338 |    *
339 |    * In addition to the functions on the emitter that the parser may use, the
340 |    * generated stream listener will also make calls to onStartRequest and
341 |    * onStopRequest on the emitter (if they exist).
342 |    *
343 |    * @param emitter The emitter to receive callbacks on.
344 |    * @param opts    A set of options for the parser.
345 |    */
346 |   // makeStreamListenerParser(emitter, opts) {
347 |   //   var StreamListener = {
348 |   //     onStartRequest(aRequest) {
349 |   //       try {
350 |   //         if ("onStartRequest" in emitter) {
351 |   //           emitter.onStartRequest(aRequest);
352 |   //         }
353 |   //       } finally {
354 |   //         this._parser.resetParser();
355 |   //       }
356 |   //     },
357 |   //     onStopRequest(aRequest, aStatus) {
358 |   //       this._parser.deliverEOF();
359 |   //       if ("onStopRequest" in emitter) {
360 |   //         emitter.onStopRequest(aRequest, aStatus);
361 |   //       }
362 |   //     },
363 |   //     onDataAvailable(aRequest, aStream, aOffset, aCount) {
364 |   //       var scriptIn = Cc[
365 |   //         "@mozilla.org/scriptableinputstream;1"
366 |   //       ].createInstance(Ci.nsIScriptableInputStream);
367 |   //       scriptIn.init(aStream);
368 |   //       // Use readBytes instead of read to handle embedded NULs properly.
369 |   //       this._parser.deliverData(scriptIn.readBytes(aCount));
370 |   //     },
371 |   //     QueryInterface: ChromeUtils.generateQI([
372 |   //       "nsIStreamListener",
373 |   //       "nsIRequestObserver",
374 |   //     ]),
375 |   //   };
376 |   //   setDefaultParserOptions(opts);
377 |   //   StreamListener._parser = new RawMimeParser(emitter, opts);
378 |   //   return StreamListener;
379 |   // },
380 | 
381 |   /**
382 |    * Returns a new raw MIME parser.
383 |    *
384 |    * Prefer one of the other methods where possible, since the input here must
385 |    * be driven manually.
386 |    *
387 |    * @param emitter The emitter to receive callbacks on.
388 |    * @param opts    A set of options for the parser.
389 |    */
390 |   makeParser(emitter, opts) {
391 |     return new RawMimeParser(emitter, opts);
392 |   },
393 | 
394 |   /**
395 |    * Returns a mimeMsg object for the given input. The returned object tries to
396 |    * be compatible with the return value of MsgHdrToMimeMessage. Differences:
397 |    *  - no support for encryption
398 |    *  - calculated sizes differ slightly
399 |    *  - allAttachments includes the content and not a URL
400 |    *  - does not eat TABs in headers, if they follow a CRLF
401 |    *
402 |    * The input is any type of input that would be accepted by parseSync.
403 |    *
404 |    * @param {BinaryString} input   A string of text to parse.
405 |    */
406 |   extractMimeMsg(input, options = {}) {
407 |     var emitter = Object.create(ExtractMimeMsgEmitter);
408 |     // Set default options.
409 |     emitter.options = {
410 |       includeAttachments: true,
411 |       getMimePart: "",
412 |     };
413 |     // Override default options.
414 |     for (let option of Object.keys(options)) {
415 |       emitter.options[option] = options[option];
416 |     }
417 | 
418 |     MimeParser.parseSync(input, emitter, {
419 |       // jsmime does not use the "1." prefix for the partName.
420 |       pruneat: emitter.options.getMimePart
421 |         .split(".")
422 |         .slice(1)
423 |         .join("."),
424 |       bodyformat: "decode",
425 |       stripcontinuations: true,
426 |       strformat: "unicode",
427 |     });
428 |     return emitter.mimeMsg;
429 |   },
430 | 
431 |   /**
432 |    * Returns a dictionary of headers for the given input.
433 |    *
434 |    * The input is any type of input that would be accepted by parseSync. What
435 |    * is returned is a JS object that represents the headers of the entire
436 |    * envelope as would be received by startPart when partNum is the empty
437 |    * string.
438 |    *
439 |    * @param input   A string of text to parse.
440 |    */
441 |   extractHeaders(input) {
442 |     var emitter = Object.create(ExtractHeadersEmitter);
443 |     MimeParser.parseSync(input, emitter, { pruneat: "", bodyformat: "none" });
444 |     return emitter.headers;
445 |   },
446 | 
447 |   /**
448 |    * Returns the headers and body for the given input message.
449 |    *
450 |    * The return value is an array whose first element is the dictionary of
451 |    * headers (as would be returned by extractHeaders) and whose second element
452 |    * is a binary string of the entire body of the message.
453 |    *
454 |    * @param input   A string of text to parse.
455 |    */
456 |   extractHeadersAndBody(input) {
457 |     var emitter = Object.create(ExtractHeadersAndBodyEmitter);
458 |     MimeParser.parseSync(input, emitter, { pruneat: "", bodyformat: "raw" });
459 |     return [emitter.headers, emitter.body];
460 |   },
461 | 
462 |   // Parameters for parseHeaderField
463 | 
464 |   /**
465 |    * Parse the header as if it were unstructured.
466 |    *
467 |    * This results in the same string if no other options are specified. If other
468 |    * options are specified, this causes the string to be modified appropriately.
469 |    */
470 |   HEADER_UNSTRUCTURED: 0x00,
471 |   /**
472 |    * Parse the header as if it were in the form text; attr=val; attr=val.
473 |    *
474 |    * Such headers include Content-Type, Content-Disposition, and most other
475 |    * headers used by MIME as opposed to messages.
476 |    */
477 |   HEADER_PARAMETER: 0x02,
478 |   /**
479 |    * Parse the header as if it were a sequence of mailboxes.
480 |    */
481 |   HEADER_ADDRESS: 0x03,
482 | 
483 |   /**
484 |    * This decodes parameter values according to RFC 2231.
485 |    *
486 |    * This flag means nothing if HEADER_PARAMETER is not specified.
487 |    */
488 |   HEADER_OPTION_DECODE_2231: 0x10,
489 |   /**
490 |    * This decodes the inline encoded-words that are in RFC 2047.
491 |    */
492 |   HEADER_OPTION_DECODE_2047: 0x20,
493 |   /**
494 |    * This converts the header from a raw string to proper Unicode.
495 |    */
496 |   HEADER_OPTION_ALLOW_RAW: 0x40,
497 | 
498 |   // Convenience for all three of the above.
499 |   HEADER_OPTION_ALL_I18N: 0x70,
500 | 
501 |   /**
502 |    * Parse a header field according to the specification given by flags.
503 |    *
504 |    * Permissible flags begin with one of the HEADER_* flags, which may be or'd
505 |    * with any of the HEADER_OPTION_* flags to modify the result appropriately.
506 |    *
507 |    * If the option HEADER_OPTION_ALLOW_RAW is passed, the charset parameter, if
508 |    * present, is the charset to fallback to if the header is not decodable as
509 |    * UTF-8 text. If HEADER_OPTION_ALLOW_RAW is passed but the charset parameter
510 |    * is not provided, then no fallback decoding will be done. If
511 |    * HEADER_OPTION_ALLOW_RAW is not passed, then no attempt will be made to
512 |    * convert charsets.
513 |    *
514 |    * @param text    The value of a MIME or message header to parse.
515 |    * @param flags   A set of flags that controls interpretation of the header.
516 |    * @param charset A default charset to assume if no information may be found.
517 |    */
518 |   parseHeaderField(text, flags, charset) {
519 |     // If we have a raw string, convert it to Unicode first
520 |     if (flags & MimeParser.HEADER_OPTION_ALLOW_RAW) {
521 |       text = headerParser.convert8BitHeader(text, charset);
522 |     }
523 | 
524 |     // The low 4 bits indicate the type of the header we are parsing. All of the
525 |     // higher-order bits are flags.
526 |     switch (flags & 0x0f) {
527 |       case MimeParser.HEADER_UNSTRUCTURED:
528 |         if (flags & MimeParser.HEADER_OPTION_DECODE_2047) {
529 |           text = headerParser.decodeRFC2047Words(text);
530 |         }
531 |         return text;
532 |       case MimeParser.HEADER_PARAMETER:
533 |         return headerParser.parseParameterHeader(
534 |           text,
535 |           (flags & MimeParser.HEADER_OPTION_DECODE_2047) != 0,
536 |           (flags & MimeParser.HEADER_OPTION_DECODE_2231) != 0
537 |         );
538 |       case MimeParser.HEADER_ADDRESS:
539 |         return headerParser.parseAddressingHeader(
540 |           text,
541 |           (flags & MimeParser.HEADER_OPTION_DECODE_2047) != 0
542 |         );
543 |       default:
544 |         throw new Error("Illegal type of header field");
545 |     }
546 |   },
547 | };
548 | 
549 | /**
550 |  * Parse MIME message
551 |  * @param {String|Uint8Array} data - MIME message to parse
552 |  * @returns {Object} parsed content (see TS definitions for more details)
553 |  */
554 | export function parseMail(data) {
555 |   const encoded = (typeof data === 'string') ? new TextEncoder().encode(data) : data;
556 | 
557 |   const { headers, allAttachments, bodyParts } = MimeParser.extractMimeMsg(uint8ArrayToString(encoded));
558 |   // these fields can only contain a single value
559 |   const singleKeys = new Set([
560 |     'message-id',
561 |     'content-id',
562 |     'from',
563 |     'sender',
564 |     'in-reply-to',
565 |     'reply-to',
566 |     'subject',
567 |     'date',
568 |     'content-disposition',
569 |     'content-type',
570 |     'content-transfer-encoding',
571 |     'priority',
572 |     'mime-version',
573 |     'content-description',
574 |     'precedence',
575 |     'errors-to'
576 |   ]);
577 | 
578 |   const mail = {
579 |     headers,
580 |     // drop some fields for each attachment.
581 |     // also, and convert a `null` rawBody to an empty array (edge-case when passing only the attachment headers as `data`)
582 |     attachments: allAttachments.map(
583 |       ({ parts, partName, body, isEncrypted, rawBody, ...rest }) => ({ ...rest, content: rawBody || new Uint8Array() })
584 |     ),
585 |     // join all body parts and normalise EOL to \n
586 |     body: {
587 |       html: bodyParts.html.length ? bodyParts.html.join('<br>\n').replace(/\r?\n/g, '\n') : null,
588 |       text: bodyParts.text.length ? bodyParts.text.join('\n').replace(/\r?\n/g, '\n') : null
589 |     },
590 |   };
591 | 
592 |   // copy some headers into top-level object
593 |   ['subject', 'date', 'to', 'from', 'to', 'cc', 'bcc', 'message-id', 'in-reply-to', 'reply-to'].forEach(key => {
594 |     if (!headers[key]) return;
595 |     const maybeArrayValue = headers[key] && headerParser.parseStructuredHeader(key, headers[key]);
596 |     mail[key] = singleKeys.has(key) && Array.isArray(maybeArrayValue)
597 |       ? maybeArrayValue[maybeArrayValue.length - 1]
598 |       : maybeArrayValue;
599 |   });
600 | 
601 |   return mail;
602 | }
603 | 


--------------------------------------------------------------------------------
/test/test_mime_tree.js:
--------------------------------------------------------------------------------
  1 | import { assert } from "chai";
  2 | import { read_file } from "./utils";
  3 | import { MimeParser, headerparser } from "../lib/jsmime";
  4 | 
  5 | function arrayTest(data, fn) {
  6 |   fn.toString = function() {
  7 |     let text = Function.prototype.toString.call(this);
  8 |     text = text.replace(/data\[([0-9]*)\]/g, function(m, p) {
  9 |       return JSON.stringify(data[p]);
 10 |     });
 11 |     return text;
 12 |   };
 13 |   return it(data[0], fn);
 14 | }
 15 | 
 16 | // Returns and deletes object[field] if present, or undefined if not.
 17 | function extract_field(object, field) {
 18 |   if (field in object) {
 19 |     var result = object[field];
 20 |     delete object[field];
 21 |     return result;
 22 |   }
 23 |   return undefined;
 24 | }
 25 | 
 26 | /**
 27 |  * Helper for body tests.
 28 |  *
 29 |  * Some extra options are listed too:
 30 |  * _split: The contents of the file will be passed in packets split by this
 31 |  *         regex. Be sure to include the split delimiter in a group so that they
 32 |  *         are included in the output packets!
 33 |  * _eol: The CRLFs in the input file will be replaced with the given line
 34 |  *       ending instead.
 35 |  * @param test     The name of test
 36 |  * @param file     The name of the file to read (relative to mailnews/data)
 37 |  * @param opts     Options for the mime parser, as well as a few extras detailed
 38 |  *                 above.
 39 |  * @param partspec An array of [partnum, line start, line end] detailing the
 40 |  *                 expected parts in the body. It will be expected that the
 41 |  *                 accumulated body part data for partnum would be the contents
 42 |  *                 of the file from [line start, line end) [1-based lines]
 43 |  */
 44 | function make_body_test(test, file, opts, partspec) {
 45 |   var results = Promise.all(
 46 |     partspec.map(p => Promise.all([p[0], read_file(file, p[1], p[2])]))
 47 |   );
 48 |   var eol = extract_field(opts, "_eol");
 49 |   var msgtext = read_file(file).then(function(msgcontents) {
 50 |     var packetize = extract_field(opts, "_split");
 51 |     if (packetize !== undefined) {
 52 |       msgcontents = msgcontents.split(packetize);
 53 |     }
 54 |     if (eol !== undefined) {
 55 |       msgcontents = msgcontents.replace(/\r\n/g, eol);
 56 |     }
 57 |     return msgcontents;
 58 |   });
 59 |   if (eol !== undefined) {
 60 |     results = results.then(function(results_) {
 61 |       for (let part of results_) {
 62 |         part[1] = part[1].replace(/\r\n/g, eol);
 63 |       }
 64 |       return results_;
 65 |     });
 66 |   }
 67 |   return [test, msgtext, opts, results];
 68 | }
 69 | 
 70 | /**
 71 |  * Execute a single MIME tree test.
 72 |  *
 73 |  * @param message  Either the text of the message, an array of textual message
 74 |  *                 part data (imagine coming on different TCP packets), or a
 75 |  *                 promise that resolves to any of the above.
 76 |  * @param opts     A set of options for the parser and for the test.
 77 |  * @param results  The expected results of the call. This may either be a
 78 |  *                 dictionary of part number -> header -> values (to check
 79 |  *                 headers), or an array of [partnum, partdata] for expected
 80 |  *                 results to deliverPartData, or a promise for the above.
 81 |  * @return         A promise containing the results of the test.
 82 |  */
 83 | function testParser(message, opts, results) {
 84 |   var uncheckedValues;
 85 |   var checkingHeaders;
 86 |   var calls = 0;
 87 |   var fusingParts = extract_field(opts, "_nofuseparts") === undefined;
 88 |   var emitter = {
 89 |     stack: [],
 90 |     startMessage: function emitter_startMsg() {
 91 |       assert.equal(this.stack.length, 0);
 92 |       calls++;
 93 |       this.partData = "";
 94 |     },
 95 |     endMessage: function emitter_endMsg() {
 96 |       assert.equal(this.stack.length, 0);
 97 |       calls++;
 98 |     },
 99 |     startPart: function emitter_startPart(partNum, headers) {
100 |       this.stack.push(partNum);
101 |       if (checkingHeaders) {
102 |         assert.ok(partNum in uncheckedValues);
103 |         // Headers is a map, convert it to an object.
104 |         var objmap = {};
105 |         for (let pair of headers) {
106 |           objmap[pair[0]] = pair[1];
107 |         }
108 |         var expected = uncheckedValues[partNum];
109 |         var convresults = {};
110 |         for (let key in expected) {
111 |           try {
112 |             convresults[key] = headerparser.parseStructuredHeader(
113 |               key,
114 |               expected[key]
115 |             );
116 |           } catch (e) {
117 |             convresults[key] = expected[key];
118 |           }
119 |         }
120 |         assert.deepEqual(objmap, convresults);
121 |         if (fusingParts) {
122 |           assert.equal(this.partData, "");
123 |         }
124 |         delete uncheckedValues[partNum];
125 |       }
126 |     },
127 |     deliverPartData: function emitter_partData(partNum, data) {
128 |       assert.equal(this.stack[this.stack.length - 1], partNum);
129 |       if (!checkingHeaders) {
130 |         if (fusingParts) {
131 |           this.partData += data;
132 |         } else {
133 |           let check = uncheckedValues.shift();
134 |           assert.equal(partNum, check[0]);
135 |           assert.equal(data, check[1]);
136 |         }
137 |       }
138 |     },
139 |     endPart: function emitter_endPart(partNum) {
140 |       if (this.partData != "") {
141 |         let check = uncheckedValues.shift();
142 |         assert.equal(partNum, check[0]);
143 |         assert.equal(this.partData, check[1]);
144 |         this.partData = "";
145 |       }
146 |       assert.equal(this.stack.pop(), partNum);
147 |     },
148 |   };
149 |   opts.onerror = function(e) {
150 |     throw e;
151 |   };
152 | 
153 |   return Promise.all([message, results]).then(function(vals) {
154 |     let [message_, results_] = vals;
155 |     // Clone the results array into uncheckedValues
156 |     if (Array.isArray(results_)) {
157 |       uncheckedValues = Array.from(results_);
158 |       checkingHeaders = false;
159 |     } else {
160 |       uncheckedValues = {};
161 |       for (let key in results_) {
162 |         uncheckedValues[key] = results_[key];
163 |       }
164 |       checkingHeaders = true;
165 |     }
166 |     if (!Array.isArray(message_)) {
167 |       message_ = [message_];
168 |     }
169 |     var parser = new MimeParser(emitter, opts);
170 |     message_.forEach(function(packet) {
171 |       parser.deliverData(packet);
172 |     });
173 |     parser.deliverEOF();
174 |     assert.equal(calls, 2);
175 |     if (!checkingHeaders) {
176 |       assert.equal(0, uncheckedValues.length);
177 |     } else {
178 |       assert.deepEqual({}, uncheckedValues);
179 |     }
180 |   });
181 | }
182 | 
183 | describe("MimeParser", function() {
184 |   // This is the expected part specifier for the multipart-complex1 test file,
185 |   // specified here because it is used in several cases.
186 |   let mpart_complex1 = [
187 |     ["1", 8, 10],
188 |     ["2", 14, 16],
189 |     ["3.1", 22, 24],
190 |     ["4", 29, 31],
191 |     ["5", 33, 35],
192 |   ];
193 | 
194 |   describe("Simple tests", function() {
195 |     let parser_tests = [
196 |       // The following tests are either degenerate or error cases that should
197 |       // work
198 |       ["Empty string", "", {}, { "": {} }],
199 |       ["No value for header", "Header", {}, { "": { Header: [""] } }],
200 |       [
201 |         "No trailing newline",
202 |         "To: eof@example.net",
203 |         {},
204 |         { "": { To: ["eof@example.net"] } },
205 |       ],
206 |       [
207 |         "Header no val",
208 |         "To: eof@example.net\r\n",
209 |         {},
210 |         { "": { To: ["eof@example.net"] } },
211 |       ],
212 |       ["No body no headers", "\r\n\r\n", {}, { "": {} }],
213 |       ["Body no headers", "\r\n\r\nA", {}, { "": {} }],
214 |       // Basic cases for headers
215 |       [
216 |         "Multiparts get headers",
217 |         read_file("multipart-complex1"),
218 |         {},
219 |         {
220 |           "": { "Content-Type": ['multipart/mixed; boundary="boundary"'] },
221 |           "1": {
222 |             "Content-Type": ["application/octet-stream"],
223 |             "Content-Transfer-Encoding": ["base64"],
224 |           },
225 |           "2": {
226 |             "Content-Type": ["image/png"],
227 |             "Content-Transfer-Encoding": ["base64"],
228 |           },
229 |           "3": {
230 |             "Content-Type": ['multipart/related; boundary="boundary2"'],
231 |           },
232 |           "3.1": { "Content-Type": ["text/html"] },
233 |           "4": { "Content-Type": ["text/plain"] },
234 |           "5": {},
235 |         },
236 |       ],
237 |     ];
238 |     parser_tests.forEach(function(data) {
239 |       arrayTest(data, function() {
240 |         return testParser(data[1], data[2], data[3]);
241 |       });
242 |     });
243 |   });
244 | 
245 |   describe("Body tests", function() {
246 |     let parser_tests = [
247 |       // Body tests from data
248 |       // (Note: line numbers are 1-based. Also, to capture trailing EOF, add 2
249 |       // to the last line number of the file).
250 |       make_body_test("Basic body", "basic1", {}, [["", 3, 5]]),
251 |       make_body_test("Basic multipart", "multipart1", {}, [["1", 10, 12]]),
252 |       make_body_test("Basic multipart", "multipart2", {}, [["1", 8, 11]]),
253 |       make_body_test(
254 |         "Complex multipart",
255 |         "multipart-complex1",
256 |         {},
257 |         mpart_complex1
258 |       ),
259 |       make_body_test("Truncated multipart", "multipart-complex2", {}, [
260 |         ["1.1.1.1", 21, 25],
261 |         ["2", 27, 57],
262 |         ["3", 60, 62],
263 |       ]),
264 |       make_body_test("No LF multipart", "multipartmalt-detach", {}, [
265 |         ["1", 20, 21],
266 |         ["2.1", 27, 38],
267 |         ["2.2", 42, 43],
268 |         ["2.3", 47, 48],
269 |         ["3", 53, 54],
270 |       ]),
271 |       make_body_test("Raw body", "multipart1", { bodyformat: "raw" }, [
272 |         ["", 4, 14],
273 |       ]),
274 |       [
275 |         "Base64 decode 1",
276 |         read_file("base64-1"),
277 |         { bodyformat: "decode" },
278 |         [
279 |           [
280 |             "",
281 |             "\r\nHello, world! (Again...)\r\n\r\nLet's see how well base64 " +
282 |               "text is handled.                            Yay, lots of space" +
283 |               "s! There's even a CRLF at the end and one at the beginning, bu" +
284 |               "t the output shouldn't have it.\r\n",
285 |           ],
286 |         ],
287 |       ],
288 |       [
289 |         "Base64 decode 2",
290 |         read_file("base64-2"),
291 |         { bodyformat: "decode" },
292 |         [
293 |           [
294 |             "",
295 |             "<html><body>This is base64 encoded HTML text, and the tags sho" +
296 |               "uldn't be stripped.\r\n<b>Bold text is bold!</b></body></html>" +
297 |               "\r\n",
298 |           ],
299 |         ],
300 |       ],
301 |       [
302 |         "Base64 decode line issues",
303 |         read_file("base64-2").then(function(s) {
304 |           return s.split(/(\r\n)/);
305 |         }),
306 |         { bodyformat: "decode" },
307 |         [
308 |           [
309 |             "",
310 |             "<html><body>This is base64 encoded HTML text, and the tags sho" +
311 |               "uldn't be stripped.\r\n<b>Bold text is bold!</b></body></html>" +
312 |               "\r\n",
313 |           ],
314 |         ],
315 |       ],
316 |       make_body_test("Base64 nodecode", "base64-1", {}, [["", 4, 9]]),
317 |       [
318 |         "QP decode",
319 |         read_file("bug505221"),
320 |         { pruneat: "1", bodyformat: "decode" },
321 |         [
322 |           [
323 |             "1",
324 |             '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"' +
325 |               '>\r\n<HTML><HEAD>\r\n<META HTTP-EQUIV="Content-Type" CONTENT=' +
326 |               '"text/html; charset=us-ascii">\r\n\r\n\r\n<META content="MSHT' +
327 |               'ML 6.00.6000.16735" name=GENERATOR></HEAD>\r\n<BODY> bbb\r\n<' +
328 |               "/BODY></HTML>",
329 |           ],
330 |         ],
331 |       ],
332 |       [
333 |         "Nested messages",
334 |         read_file("message-encoded"),
335 |         { bodyformat: "decode" },
336 |         [
337 |           ["1$", "This is a plain-text message."],
338 |           ["2$", "I am a plain-text message."],
339 |           ["3$", "I am an encoded plain-text message."],
340 |         ],
341 |       ],
342 |       [
343 |         "Nested message headers",
344 |         read_file("message-encoded"),
345 |         {},
346 |         {
347 |           "": {
348 |             "Content-Type": ['multipart/mixed; boundary="iamaboundary"'],
349 |           },
350 |           "1": { "Content-Type": ["message/rfc822"] },
351 |           "1$": { Subject: ["I am a subject"] },
352 |           "2": {
353 |             "Content-Type": ["message/global"],
354 |             "Content-Transfer-Encoding": ["base64"],
355 |           },
356 |           "2$": { Subject: ["\u79c1\u306f\u3001\u4ef6\u540d\u5348\u524d"] },
357 |           "3": {
358 |             "Content-Type": ["message/news"],
359 |             "Content-Transfer-Encoding": ["quoted-printable"],
360 |           },
361 |           "3$": { Subject: ["\u79c1\u306f\u3001\u4ef6\u540d\u5348\u524d"] },
362 |         },
363 |       ],
364 |     ];
365 |     parser_tests.forEach(function(data) {
366 |       arrayTest(data, function() {
367 |         return testParser(data[1], data[2], data[3]);
368 |       });
369 |     });
370 |   });
371 | 
372 |   describe("Torture tests", function() {
373 |     // Generate a very long message for tests
374 |     let teststr = "a";
375 |     for (let i = 0; i < 16; i++) {
376 |       teststr += teststr;
377 |     }
378 |     let parser_tests = [
379 |       [
380 |         "Base64 very long decode",
381 |         "Content-Transfer-Encoding: base64\r\n\r\n" + btoa(teststr) + "\r\n",
382 |         { bodyformat: "decode" },
383 |         [["", teststr]],
384 |       ],
385 |       make_body_test("Torture regular body", "mime-torture", {}, [
386 |         ["1", 17, 21],
387 |         ["2$.1", 58, 75],
388 |         ["2$.2.1", 83, 97],
389 |         ["2$.3", 102, 130],
390 |         ["3$", 155, 7742],
391 |         ["4", 7747, 8213],
392 |         ["5", 8218, 8242],
393 |         ["6$.1.1", 8284, 8301],
394 |         ["6$.1.2", 8306, 8733],
395 |         ["6$.2.1", 8742, 9095],
396 |         ["6$.2.2", 9100, 9354],
397 |         ["6$.2.3", 9357, 11794],
398 |         ["6$.2.4", 11797, 12155],
399 |         ["6$.3", 12161, 12809],
400 |         ["7$.1", 12844, 12845],
401 |         ["7$.2", 12852, 13286],
402 |         ["7$.3", 13288, 13297],
403 |         ["8$.1", 13331, 13358],
404 |         ["8$.2", 13364, 13734],
405 |         ["9$", 13757, 20179],
406 |         ["10", 20184, 21200],
407 |         ["11$.1", 21223, 22031],
408 |         ["11$.2", 22036, 22586],
409 |         ["12$.1", 22607, 23469],
410 |         ["12$.2", 23474, 23774],
411 |         ["12$.3$.1", 23787, 23795],
412 |         ["12$.3$.2.1", 23803, 23820],
413 |         ["12$.3$.2.2", 23825, 24633],
414 |         ["12$.3$.3", 24640, 24836],
415 |         ["12$.3$.4$", 24848, 25872],
416 |       ]),
417 |       make_body_test("Torture pruneat", "mime-torture", { pruneat: "4" }, [
418 |         ["4", 7747, 8213],
419 |       ]),
420 | 
421 |       // Test packetization problems
422 |       make_body_test(
423 |         "Large packets",
424 |         "multipart-complex1",
425 |         { _split: /(.{30})/ },
426 |         mpart_complex1
427 |       ),
428 |       make_body_test(
429 |         "Split on newline",
430 |         "multipart-complex1",
431 |         { _split: /(\r\n)/ },
432 |         mpart_complex1
433 |       ),
434 |       make_body_test(
435 |         "Pathological splitting",
436 |         "multipart-complex1",
437 |         { _split: "" },
438 |         mpart_complex1
439 |       ),
440 | 
441 |       // Non-CLRF line endings?
442 |       make_body_test(
443 |         "LF-based messages",
444 |         "multipart-complex1",
445 |         { _eol: "\n" },
446 |         mpart_complex1
447 |       ),
448 |       make_body_test(
449 |         "CR-based messages",
450 |         "multipart-complex1",
451 |         { _eol: "\r" },
452 |         mpart_complex1
453 |       ),
454 |     ];
455 |     parser_tests.forEach(function(data) {
456 |       arrayTest(data, function() {
457 |         return testParser(data[1], data[2], data[3]);
458 |       });
459 |     });
460 |   });
461 | 
462 |   describe("Header tests", function() {
463 |     let parser_tests = [
464 |       // Basic cases for headers
465 |       [
466 |         "Multiparts get headers",
467 |         read_file("multipart-complex1"),
468 |         {},
469 |         {
470 |           "": { "Content-Type": ['multipart/mixed; boundary="boundary"'] },
471 |           "1": {
472 |             "Content-Type": ["application/octet-stream"],
473 |             "Content-Transfer-Encoding": ["base64"],
474 |           },
475 |           "2": {
476 |             "Content-Type": ["image/png"],
477 |             "Content-Transfer-Encoding": ["base64"],
478 |           },
479 |           "3": {
480 |             "Content-Type": ['multipart/related; boundary="boundary2"'],
481 |           },
482 |           "3.1": { "Content-Type": ["text/html"] },
483 |           "4": { "Content-Type": ["text/plain"] },
484 |           "5": {},
485 |         },
486 |       ],
487 |       // 'From ' is not an [iterable] header
488 |       [
489 |         "Exclude mbox delimiter",
490 |         read_file("bugmail11"),
491 |         {},
492 |         {
493 |           "": {
494 |             "X-Mozilla-Status": ["0001"],
495 |             "X-Mozilla-Status2": ["00000000"],
496 |             "X-Mozilla-Keys": [""],
497 |             "Return-Path": [
498 |               "<example@example.com>",
499 |               "<bugzilla-daemon@mozilla.org>",
500 |             ],
501 |             "Delivered-To": ["bugmail@example.org"],
502 |             Received: [
503 |               "by 10.114.166.12 with SMTP id o12cs163262wae;" +
504 |                 "        Fri, 11 Apr 2008 07:17:31 -0700 (PDT)",
505 |               "by 10.115.60.1 with SMTP id n1mr214763wak.181.1207923450166;" +
506 |                 "        Fri, 11 Apr 2008 07:17:30 -0700 (PDT)",
507 |               "from webapp-out.mozilla.org (webapp01.sj.mozilla.com [63.245.208.1" +
508 |                 "46])        by mx.google.com with ESMTP id n38si6807242wag.2.2008." +
509 |                 "04.11.07.17.29;        Fri, 11 Apr 2008 07:17:30 -0700 (PDT)",
510 |               "from mrapp51.mozilla.org (mrapp51.mozilla.org [127.0.0.1])" +
511 |                 "\tby webapp-out.mozilla.org (8.13.8/8.13.8) with ESMTP id m3BEHTGU" +
512 |                 "030132\tfor <bugmail@example.org>; Fri, 11 Apr 2008 07:17:29 -0700",
513 |               "(from root@localhost)" +
514 |                 "\tby mrapp51.mozilla.org (8.13.8/8.13.8/Submit) id m3BEHTk4030129;" +
515 |                 "\tFri, 11 Apr 2008 07:17:29 -0700",
516 |             ],
517 |             "Received-Spf": [
518 |               "neutral (google.com: 63.245.208.146 is neither perm" +
519 |                 "itted nor denied by best guess record for domain of bugzilla-daemo" +
520 |                 "n@mozilla.org) client-ip=63.245.208.146;",
521 |             ],
522 |             "Authentication-Results": [
523 |               "mx.google.com; spf=neutral (google.com: 6" +
524 |                 "3.245.208.146 is neither permitted nor denied by best guess record" +
525 |                 " for domain of bugzilla-daemon@mozilla.org) smtp.mail=bugzilla-dae" +
526 |                 "mon@mozilla.org",
527 |             ],
528 |             Date: ["Fri, 11 Apr 2008 07:17:29 -0700"],
529 |             "Message-ID": [
530 |               "<200804111417.m3BEHTk4030129@mrapp51.mozilla.org>",
531 |             ],
532 |             From: ["bugzilla-daemon@mozilla.org"],
533 |             To: ["bugmail@example.org"],
534 |             Subject: ["Bugzilla: confirm account creation"],
535 |             "X-Bugzilla-Type": ["admin"],
536 |             "Content-Type": ['text/plain; charset="UTF-8"'],
537 |             "MIME-Version": ["1.0"],
538 |           },
539 |         },
540 |       ],
541 |     ];
542 |     parser_tests.forEach(function(data) {
543 |       arrayTest(data, function() {
544 |         return testParser(data[1], data[2], data[3]);
545 |       });
546 |     });
547 |   });
548 | 
549 |   describe("Charset tests", function() {
550 |     function buildTree(file, options) {
551 |       var tree = new Map();
552 |       var emitter = {
553 |         startPart(part, headers) {
554 |           tree.set(part, { headers, body: null });
555 |         },
556 |         deliverPartData(part, data) {
557 |           var obj = tree.get(part);
558 |           if (obj.body === null) {
559 |             obj.body = data;
560 |           } else if (typeof obj.body === "string") {
561 |             obj.body += data;
562 |           } else {
563 |             var newData = new Uint8Array(obj.body.length + data.length);
564 |             newData.set(obj.body);
565 |             newData.subarray(obj.body.length).set(data);
566 |             obj.body = newData;
567 |           }
568 |         },
569 |       };
570 |       return file.then(function(data) {
571 |         var parser = new MimeParser(emitter, options);
572 |         parser.deliverData(data);
573 |         parser.deliverEOF();
574 |         return tree;
575 |       });
576 |     }
577 |     it("Unicode decoding", function() {
578 |       return buildTree(read_file("shift-jis-image"), {
579 |         strformat: "unicode",
580 |         bodyformat: "decode",
581 |       }).then(function(tree) {
582 |         // text/plain should be transcoded...
583 |         assert.equal(
584 |           tree
585 |             .get("1")
586 |             .headers.get("Content-Type")
587 |             .get("charset"),
588 |           "Shift-JIS"
589 |         );
590 |         assert.equal(tree.get("1").headers.charset, "Shift-JIS");
591 |         assert.equal(
592 |           tree.get("1").headers.get("Content-Description"),
593 |           "\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb"
594 |         );
595 |         assert.equal(
596 |           tree.get("1").body,
597 |           "Portable Network Graphics\uff08" +
598 |             "\u30dd\u30fc\u30bf\u30d6\u30eb\u30fb\u30cd\u30c3\u30c8\u30ef\u30fc" +
599 |             "\u30af\u30fb\u30b0\u30e9\u30d5\u30a3\u30c3\u30af\u30b9\u3001PNG" +
600 |             "\uff09\u306f\u30b3\u30f3\u30d4\u30e5\u30fc\u30bf\u3067\u30d3\u30c3" +
601 |             "\u30c8\u30de\u30c3\u30d7\u753b\u50cf\u3092\u6271\u3046\u30d5\u30a1" +
602 |             "\u30a4\u30eb\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u3067\u3042\u308b" +
603 |             "\u3002\u5727\u7e2e\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3068\u3057" +
604 |             "\u3066Deflate\u3092\u63a1\u7528\u3057\u3066\u3044\u308b\u3001" +
605 |             "\u5727\u7e2e\u306b\u3088\u308b\u753b\u8cea\u306e\u52a3\u5316\u306e" +
606 |             "\u306a\u3044\u53ef\u9006\u5727\u7e2e\u306e\u753b\u50cf\u30d5\u30a1" +
607 |             "\u30a4\u30eb\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u3067\u3042\u308b" +
608 |             "\u3002\r\n"
609 |         );
610 |         // ... but not image/png
611 |         assert.ok(
612 |           !tree
613 |             .get("2")
614 |             .headers.get("Content-Type")
615 |             .has("charset")
616 |         );
617 |         assert.equal(tree.get("2").headers.charset, "");
618 |         assert.equal(
619 |           tree.get("2").headers.get("Content-Description"),
620 |           "\ufffdP\ufffdc\ufffd@\ufffd\ufffd\ufffdR\ufffdA\ufffdg\ufffd\ufffd"
621 |         );
622 |         assert.equal(
623 |           tree.get("2").headers.getRawHeader("Content-Description"),
624 |           "\x83\x50\x83\x63\x83\x40\x83\x8b\x83\x52\x83\x41\x83\x67\x83\x8b"
625 |         );
626 |         var imageData =
627 |           "iVBORw0KGgoAAAANSUhEUgAAAIAAAABECAIAAADGJao+AAAAwklE" +
628 |           "QVR4Xu3UgQbDMBRA0bc03f//b7N0VuqJEmwoc+KqNEkDh9b+2HuJu1KNO4f+AQCAAA" +
629 |           "AQAAACAEAAAAgAAAEAIAAABACAAAAQAAACAEAAAAgAAAEAIAAAANReamRLlPWYfNH0" +
630 |           "klxcPs+cP3NxWF+vi3lb7pa2R+vx6tHOtuN1O+a5lY3HzgM5ya/GM5N7ZjfPq7/5yS" +
631 |           "8IgAAAEAAAAgBAAAAIAAABACAAAAQAgAAAEAAAAgBAAAAIAAABACAAAIw322gDIPvt" +
632 |           "lmUAAAAASUVORK5CYII=";
633 |         imageData = atob(imageData);
634 |         var asArray = new Uint8Array(imageData.length);
635 |         for (var i = 0; i < asArray.length; i++) {
636 |           asArray[i] = imageData.charCodeAt(i);
637 |         }
638 |         assert.deepEqual(tree.get("2").body, asArray);
639 | 
640 |         // Touching the header charset should change the interpretation.
641 |         tree.get("1").headers.charset = "Shift-JIS";
642 |         assert.equal(tree.get("1").headers.charset, "Shift-JIS");
643 |         assert.equal(
644 |           tree.get("1").headers.get("Content-Description"),
645 |           "\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb"
646 |         );
647 |       });
648 |     });
649 |     it("Fallback charset decoding", function() {
650 |       return buildTree(read_file("shift-jis-image"), {
651 |         strformat: "unicode",
652 |         charset: "ISO-8859-1",
653 |         bodyformat: "decode",
654 |       }).then(function(tree) {
655 |         // text/plain should be transcoded...
656 |         assert.equal(
657 |           tree
658 |             .get("1")
659 |             .headers.get("Content-Type")
660 |             .get("charset"),
661 |           "Shift-JIS"
662 |         );
663 |         assert.equal(tree.get("1").headers.charset, "Shift-JIS");
664 |         assert.equal(
665 |           tree.get("1").headers.get("Content-Description"),
666 |           "\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb"
667 |         );
668 |         assert.equal(
669 |           tree.get("1").body,
670 |           "Portable Network Graphics\uff08" +
671 |             "\u30dd\u30fc\u30bf\u30d6\u30eb\u30fb\u30cd\u30c3\u30c8\u30ef\u30fc" +
672 |             "\u30af\u30fb\u30b0\u30e9\u30d5\u30a3\u30c3\u30af\u30b9\u3001PNG" +
673 |             "\uff09\u306f\u30b3\u30f3\u30d4\u30e5\u30fc\u30bf\u3067\u30d3\u30c3" +
674 |             "\u30c8\u30de\u30c3\u30d7\u753b\u50cf\u3092\u6271\u3046\u30d5\u30a1" +
675 |             "\u30a4\u30eb\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u3067\u3042\u308b" +
676 |             "\u3002\u5727\u7e2e\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3068\u3057" +
677 |             "\u3066Deflate\u3092\u63a1\u7528\u3057\u3066\u3044\u308b\u3001" +
678 |             "\u5727\u7e2e\u306b\u3088\u308b\u753b\u8cea\u306e\u52a3\u5316\u306e" +
679 |             "\u306a\u3044\u53ef\u9006\u5727\u7e2e\u306e\u753b\u50cf\u30d5\u30a1" +
680 |             "\u30a4\u30eb\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u3067\u3042\u308b" +
681 |             "\u3002\r\n"
682 |         );
683 |         // ... but not image/png
684 |         assert.ok(
685 |           !tree
686 |             .get("2")
687 |             .headers.get("Content-Type")
688 |             .has("charset")
689 |         );
690 |         assert.equal(tree.get("2").headers.charset, "ISO-8859-1");
691 |         assert.equal(
692 |           tree.get("2").headers.get("Content-Description"),
693 |           "\u0192P\u0192c\u0192@\u0192\u2039\u0192R\u0192A\u0192g\u0192\u2039"
694 |         );
695 |         assert.equal(
696 |           tree.get("2").headers.getRawHeader("Content-Description"),
697 |           "\x83\x50\x83\x63\x83\x40\x83\x8b\x83\x52\x83\x41\x83\x67\x83\x8b"
698 |         );
699 |         var imageData =
700 |           "iVBORw0KGgoAAAANSUhEUgAAAIAAAABECAIAAADGJao+AAAAwklE" +
701 |           "QVR4Xu3UgQbDMBRA0bc03f//b7N0VuqJEmwoc+KqNEkDh9b+2HuJu1KNO4f+AQCAAA" +
702 |           "AQAAACAEAAAAgAAAEAIAAABACAAAAQAAACAEAAAAgAAAEAIAAAANReamRLlPWYfNH0" +
703 |           "klxcPs+cP3NxWF+vi3lb7pa2R+vx6tHOtuN1O+a5lY3HzgM5ya/GM5N7ZjfPq7/5yS" +
704 |           "8IgAAAEAAAAgBAAAAIAAABACAAAAQAgAAAEAAAAgBAAAAIAAABACAAAIw322gDIPvt" +
705 |           "lmUAAAAASUVORK5CYII=";
706 |         imageData = atob(imageData);
707 |         var asArray = new Uint8Array(imageData.length);
708 |         for (var i = 0; i < asArray.length; i++) {
709 |           asArray[i] = imageData.charCodeAt(i);
710 |         }
711 |         assert.deepEqual(tree.get("2").body, asArray);
712 | 
713 |         // Touching the header charset should change the interpretation.
714 |         tree.get("1").headers.charset = "Shift-JIS";
715 |         assert.equal(tree.get("1").headers.charset, "Shift-JIS");
716 |         assert.equal(
717 |           tree.get("1").headers.get("Content-Description"),
718 |           "\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb"
719 |         );
720 |       });
721 |     });
722 |     it("Forced charset decoding", function() {
723 |       return buildTree(read_file("shift-jis-image"), {
724 |         strformat: "unicode",
725 |         charset: "ISO-8859-1",
726 |         "force-charset": true,
727 |         bodyformat: "decode",
728 |       }).then(function(tree) {
729 |         // text/plain should be transcoded...
730 |         assert.equal(
731 |           tree
732 |             .get("1")
733 |             .headers.get("Content-Type")
734 |             .get("charset"),
735 |           "Shift-JIS"
736 |         );
737 |         assert.equal(tree.get("1").headers.charset, "ISO-8859-1");
738 |         assert.equal(
739 |           tree.get("1").headers.get("Content-Description"),
740 |           "\u0192P\u0192c\u0192@\u0192\u2039\u0192R\u0192A\u0192g\u0192\u2039"
741 |         );
742 |         assert.equal(
743 |           tree.get("1").body,
744 |           "Portable Network Graphics\u0081i" +
745 |             "\u0192|\u0081[\u0192^\u0192u\u0192\u2039\u0081E\u0192l\u0192b" +
746 |             "\u0192g\u0192\u008f\u0081[\u0192N\u0081E\u0192O\u0192\u2030\u0192t" +
747 |             "\u0192B\u0192b\u0192N\u0192X\u0081APNG\u0081j\u201a\u00cd\u0192R" +
748 |             "\u0192\u201c\u0192s\u0192\u2026\u0081[\u0192^\u201a\u00c5\u0192r" +
749 |             "\u0192b\u0192g\u0192}\u0192b\u0192v\u2030\u00e6\u2018\u0153\u201a" +
750 |             "\u00f0\u02c6\u00b5\u201a\u00a4\u0192t\u0192@\u0192C\u0192\u2039" +
751 |             "\u0192t\u0192H\u0081[\u0192}\u0192b\u0192g\u201a\u00c5\u201a\u00a0" +
752 |             "\u201a\u00e9\u0081B\u02c6\u00b3\u008fk\u0192A\u0192\u2039\u0192S" +
753 |             "\u0192\u0160\u0192Y\u0192\u20ac\u201a\u00c6\u201a\u00b5\u201a" +
754 |             "\u00c4Deflate\u201a\u00f0\u008d\u00cc\u2014p\u201a\u00b5\u201a" +
755 |             "\u00c4\u201a\u00a2\u201a\u00e9\u0081A\u02c6\u00b3\u008fk\u201a" +
756 |             "\u00c9\u201a\u00e6\u201a\u00e9\u2030\u00e6\u017d\u00bf\u201a\u00cc" +
757 |             "\u2014\u00f2\u2030\u00bb\u201a\u00cc\u201a\u00c8\u201a\u00a2\u2030" +
758 |             "\u00c2\u2039t\u02c6\u00b3\u008fk\u201a\u00cc\u2030\u00e6\u2018" +
759 |             "\u0153\u0192t\u0192@\u0192C\u0192\u2039\u0192t\u0192H\u0081[\u0192" +
760 |             "}\u0192b\u0192g\u201a\u00c5\u201a\u00a0\u201a\u00e9\u0081B\r\n"
761 |         );
762 |         // ... but not image/png
763 |         assert.ok(
764 |           !tree
765 |             .get("2")
766 |             .headers.get("Content-Type")
767 |             .has("charset")
768 |         );
769 |         assert.equal(tree.get("2").headers.charset, "ISO-8859-1");
770 |         assert.equal(
771 |           tree.get("2").headers.get("Content-Description"),
772 |           "\u0192P\u0192c\u0192@\u0192\u2039\u0192R\u0192A\u0192g\u0192\u2039"
773 |         );
774 |         assert.equal(
775 |           tree.get("2").headers.getRawHeader("Content-Description"),
776 |           "\x83\x50\x83\x63\x83\x40\x83\x8b\x83\x52\x83\x41\x83\x67\x83\x8b"
777 |         );
778 |         var imageData =
779 |           "iVBORw0KGgoAAAANSUhEUgAAAIAAAABECAIAAADGJao+AAAAwklE" +
780 |           "QVR4Xu3UgQbDMBRA0bc03f//b7N0VuqJEmwoc+KqNEkDh9b+2HuJu1KNO4f+AQCAAA" +
781 |           "AQAAACAEAAAAgAAAEAIAAABACAAAAQAAACAEAAAAgAAAEAIAAAANReamRLlPWYfNH0" +
782 |           "klxcPs+cP3NxWF+vi3lb7pa2R+vx6tHOtuN1O+a5lY3HzgM5ya/GM5N7ZjfPq7/5yS" +
783 |           "8IgAAAEAAAAgBAAAAIAAABACAAAAQAgAAAEAAAAgBAAAAIAAABACAAAIw322gDIPvt" +
784 |           "lmUAAAAASUVORK5CYII=";
785 |         imageData = atob(imageData);
786 |         var asArray = new Uint8Array(imageData.length);
787 |         for (var i = 0; i < asArray.length; i++) {
788 |           asArray[i] = imageData.charCodeAt(i);
789 |         }
790 |         assert.deepEqual(tree.get("2").body, asArray);
791 | 
792 |         // Touching the header charset should change the interpretation.
793 |         tree.get("1").headers.charset = "Shift-JIS";
794 |         assert.equal(tree.get("1").headers.charset, "Shift-JIS");
795 |         assert.equal(
796 |           tree.get("1").headers.get("Content-Description"),
797 |           "\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb"
798 |         );
799 |       });
800 |     });
801 |     it("Charset conversion", function() {
802 |       return buildTree(read_file("charsets"), {
803 |         strformat: "unicode",
804 |         bodyformat: "decode",
805 |       }).then(function(tree) {
806 |         var numParts = 14;
807 |         for (let i = 1; i < numParts; i += 2) {
808 |           assert.equal(tree.get("" + i).body, tree.get("" + (i + 1)).body);
809 |         }
810 |         assert.ok(!tree.has("" + (numParts + 1)));
811 |       });
812 |     });
813 |   });
814 | });
815 | 


--------------------------------------------------------------------------------
/lib/headerEmitter.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * This module implements the code for emitting structured representations of
  3 |  * MIME headers into their encoded forms. The code here is a companion to,
  4 |  * but completely independent of, jsmime.headerparser: the structured
  5 |  * representations that are used as input to the functions in this file are the
  6 |  * same forms that would be parsed.
  7 |  */
  8 | 
  9 | import { kMonthNames, uint8ArrayToString } from "./utils";
 10 | 
 11 | // Get the default structured encoders and add them to the map
 12 | import { spellings as preferredSpellings, encoders as headerEncoders} from "./structuredHeaders";
 13 | const encoders = new Map();
 14 | for (let [header, encoder] of headerEncoders) {
 15 |   addStructuredEncoder(header, encoder);
 16 | }
 17 | 
 18 | // Clamp a value in the range [min, max], defaulting to def
 19 | // if the object[property] does not contain the value.
 20 | function clamp(object, property, min, max, def) {
 21 |   if (!(property in object)) {
 22 |     return def;
 23 |   }
 24 |   let value = object[property];
 25 |   if (value < min) {
 26 |     return min;
 27 |   }
 28 |   if (value > max) {
 29 |     return max;
 30 |   }
 31 |   return value;
 32 | }
 33 | 
 34 | /**
 35 | * An object that can assemble structured header representations into their MIME
 36 | * representation.
 37 | *
 38 | * The character-counting portion of this class operates using individual JS
 39 | * characters as its representation of logical character, which is not the same
 40 | * as the number of octets used as UTF-8. If non-ASCII characters are to be
 41 | * included in headers without some form of encoding, then care should be taken
 42 | * to set the maximum line length to account for the mismatch between character
 43 | * counts and octet counts: the maximum line is 998 octets, which could be as
 44 | * few as 332 JS characters (non-BMP characters, although they take up 4 octets
 45 | * in UTF-8, count as 2 in JS strings).
 46 | *
 47 | * This code takes care to only insert line breaks at the higher-level breaking
 48 | * points in a header (as recommended by RFC 5322), but it may need to resort to
 49 | * including them more aggressively if this is not possible. If even aggressive
 50 | * line-breaking cannot allow a header to be emitted without violating line
 51 | * length restrictions, the methods will throw an exception to indicate this
 52 | * situation.
 53 | *
 54 | * In general, this code does not attempt to modify its input; for example, it
 55 | * does not attempt to change the case of any input characters, apply any
 56 | * Unicode normalization algorithms, or convert email addresses to ACE where
 57 | * applicable. The biggest exception to this rule is that most whitespace is
 58 | * collapsed to a single space, even in unstructured headers, while most leading
 59 | * and trailing whitespace is trimmed from inputs.
 60 | *
 61 | * @param {StreamHandler} handler The handler to which all output is sent.
 62 | *   @param {Function(String)} handler.deliverData Receives encoded data.
 63 | *   @param {Function()} handler.deliverEOF Sent when all text is sent.
 64 | * @param {Object} options Options for the emitter.
 65 | *   @param [options.softMargin=78] {30 <= Integer <= 900}
 66 | *     The ideal maximum number of logical characters to include in a line, not
 67 | *     including the final CRLF pair. Lines may exceed this margin if parameters
 68 | *     are excessively long.
 69 | *   @param [options.hardMargin=332] {softMargin <= Integer <= 998}
 70 | *     The maximum number of logical characters that can be included in a line,
 71 | *     not including the final CRLF pair. If this count would be exceeded, then
 72 | *     an error will be thrown and encoding will not be possible.
 73 | *   @param [options.useASCII=true] {Boolean}
 74 | *     If true, then RFC 2047 and RFC 2231 encoding of headers will be performed
 75 | *     as needed to retain headers as ASCII.
 76 | */
 77 | class HeaderEmitter {
 78 |   constructor(handler, options) {
 79 |     // The inferred value of options.useASCII
 80 |     this._useASCII = options.useASCII === undefined ? true : options.useASCII;
 81 |     this._sanitizeDate =
 82 |       options.sanitizeDate === undefined ? false : options.sanitizeDate;
 83 |     // The handler to use.
 84 |     this._handler = handler;
 85 |     /**
 86 |     * The current line being built; note that we may insert a line break in the
 87 |     * middle to keep under the maximum line length.
 88 |     *
 89 |     * @type String
 90 |     * @private
 91 |     */
 92 |     this._currentLine = "";
 93 | 
 94 |     // Our bounds for soft and margins are not completely arbitrary. The minimum
 95 |     // amount we need to encode is 20 characters, which can encode a single
 96 |     // non-BMP character with RFC 2047. The value of 30 is chosen to give some
 97 |     // breathing room for delimiters or other unbreakable characters. The maximum
 98 |     // length is 998 octets, per RFC 5322; soft margins are slightly lower to
 99 |     // allow for breathing room as well. The default of 78 for the soft margin is
100 |     // recommended by RFC 5322.
101 |     this._softMargin = clamp(options, "softMargin", 30, 900, 78);
102 |     this._hardMargin = clamp(
103 |       options,
104 |       "hardMargin",
105 |       this._softMargin,
106 |       998,
107 |       998
108 |     );
109 | 
110 |     /**
111 |     * The index of the last preferred breakable position in the current line.
112 |     *
113 |     * @type Integer
114 |     * @private
115 |     */
116 |     this._preferredBreakpoint = 0;
117 |   }
118 | 
119 | // Low-level methods
120 | // -----------------
121 | 
122 | // Explanation of the emitter internals:
123 | // RFC 5322 requires that we wrap our lines, ideally at 78 characters and at
124 | // least by 998 octets. We can't wrap in arbitrary places, but wherever CFWS is
125 | // valid... and ideally wherever clients are likely to expect it. In theory, we
126 | // can break between every token (this is how RFC 822 operates), but, in RFC
127 | // 5322, many of those breaks are relegated to obsolete productions, mostly
128 | // because it is common to not properly handle breaks in those locations.
129 | //
130 | // So how do we do line breaking? The algorithm we implement is greedy, to
131 | // simplify implementation. There are two margins: the soft margin, which we
132 | // want to keep within, and the hard margin, which we absolutely have to keep
133 | // within. There are also two kinds of break points: preferred and emergency.
134 | // As long as we keep the line within the hard margin, we will only break at
135 | // preferred breakpoints; emergency breakpoints are only used if we would
136 | // otherwise exceed the hard margin.
137 | //
138 | // For illustration, here is an example header and where these break points are
139 | // located:
140 | //
141 | //            To: John "The Rock" Smith <jsmith@a.long.domain.invalid>
142 | // Preferred:         ^          ^     ^
143 | // Emergency:         ^    ^     ^     ^^      ^ ^    ^      ^       ^
144 | //
145 | // Preferred breakpoints are indicated by setting the mayBreakAfter parameter of
146 | // addText to true, while emergency breakpoints are set after every token passed
147 | // into addText. This is handled implicitly by only adding text to _currentLine
148 | // if it ends in an emergency breakpoint.
149 | //
150 | // Internally, the code keeps track of margins by use of two variables. The
151 | // _softMargin and _hardMargin variables encode the positions at which code must
152 | // absolutely break, and are set up from the initial options parameter. Breaking
153 | // happens when _currentLine.length approaches these values, as mentioned above.
154 | 
155 | /**
156 | * Send a header line consisting of the first N characters to the handler.
157 | *
158 | * If the count parameter is missing, then we presume that the current header
159 | * value being emitted is done and therefore we should not send a continuation
160 | * space. Otherwise, we presume that we're still working, so we will send the
161 | * continuation space.
162 | *
163 | * @private
164 | * @param [count] {Integer} The number of characters in the current line to
165 | *   include before wrapping.
166 | */
167 |   _commitLine(count) {
168 |     let isContinuing = typeof count !== "undefined";
169 | 
170 |     // Split at the point, and lop off whitespace immediately before and after.
171 |     let firstN, lastN;
172 |     if (isContinuing) {
173 |       firstN = this._currentLine.slice(0, count).trimRight();
174 |       lastN = this._currentLine.slice(count).trimLeft();
175 |     } else {
176 |       firstN = this._currentLine.trimRight();
177 |       lastN = "";
178 |     }
179 | 
180 |     // Send the line plus the final CRLF.
181 |     this._handler.deliverData(firstN + "\r\n");
182 | 
183 |     // Fill the start of the line with the new data.
184 |     this._currentLine = lastN;
185 | 
186 |     // If this is a continuation, add an extra space at the beginning of the line.
187 |     // Adjust the breakpoint shift amount as well.
188 |     if (isContinuing) {
189 |       this._currentLine = " " + this._currentLine;
190 |     }
191 | 
192 |     // We will always break at a point at or after the _preferredBreakpoint, if it
193 |     // exists, so this always gets reset to 0.
194 |     this._preferredBreakpoint = 0;
195 |   }
196 | 
197 |   /**
198 |   * Reserve at least length characters in the current line. If there aren't
199 |   * enough characters, insert a line break.
200 |   *
201 |   * @private
202 |   * @param length {Integer} The number of characters to reserve space for.
203 |   * @return {Boolean} Whether or not there is enough space for length characters.
204 |   */
205 |   _reserveTokenSpace(length) {
206 |     // We are not going to do a sanity check that length is within the wrap
207 |     // margins. The rationale is that this lets code simply call this function to
208 |     // force a higher-level line break than normal preferred line breaks (see
209 |     // addAddress for an example use). The text that would be added may need to be
210 |     // itself broken up, so it might not need all the length anyways, but it
211 |     // starts the break already.
212 | 
213 |     // If we have enough space, we don't need to do anything.
214 |     if (this._currentLine.length + length <= this._softMargin) {
215 |       return true;
216 |     }
217 | 
218 |     // If we have a preferred breakpoint, commit the line at that point, and see
219 |     // if that is sufficient line-breaking.
220 |     if (this._preferredBreakpoint > 0) {
221 |       this._commitLine(this._preferredBreakpoint);
222 |       if (this._currentLine.length + length <= this._softMargin) {
223 |         return true;
224 |       }
225 |     }
226 | 
227 |     // At this point, we can no longer keep within the soft margin. Let us see if
228 |     // we can fit within the hard margin.
229 |     if (this._currentLine.length + length <= this._hardMargin) {
230 |       return true;
231 |     }
232 | 
233 |     // Adding the text to length would violate the hard margin as well. Break at
234 |     // the last emergency breakpoint.
235 |     if (this._currentLine.length > 0) {
236 |       this._commitLine(this._currentLine.length);
237 |     }
238 | 
239 |     // At this point, if there is still insufficient room in the hard margin, we
240 |     // can no longer do anything to encode this word. Bail.
241 |     return this._currentLine.length + length <= this._hardMargin;
242 |   }
243 | 
244 |   /**
245 |   * Adds a block of text to the current header, inserting a break if necessary.
246 |   * If mayBreakAfter is true and text does not end in whitespace, a single space
247 |   * character may be added to the output. If the text could not be added without
248 |   * violating line length restrictions, an error is thrown instead.
249 |   *
250 |   * @protected
251 |   * @param {String}  text          The text to add to the output.
252 |   * @param {Boolean} mayBreakAfter If true, the end of this text is a preferred
253 |   *                                breakpoint.
254 |   */
255 |   addText(text, mayBreakAfter) {
256 |     // Try to reserve space for the tokens. If we can't, give up.
257 |     if (!this._reserveTokenSpace(text.length)) {
258 |       throw new Error("Cannot encode " + text + " due to length.");
259 |     }
260 | 
261 |     this._currentLine += text;
262 |     if (mayBreakAfter) {
263 |       // Make sure that there is an extra space if text could break afterwards.
264 |       this._preferredBreakpoint = this._currentLine.length;
265 |       if (text[text.length - 1] != " ") {
266 |         this._currentLine += " ";
267 |       }
268 |     }
269 |   }
270 | 
271 |   /**
272 |   * Adds a block of text that may need quoting if it contains some character in
273 |   * qchars. If it is already quoted, no quoting will be applied. If the text
274 |   * cannot be added without violating maximum line length, an error is thrown
275 |   * instead.
276 |   *
277 |   * @protected
278 |   * @param {String}  text          The text to add to the output.
279 |   * @param {String}  qchars        The set of characters that cannot appear
280 |   *                                outside of a quoted string.
281 |   * @param {Boolean} mayBreakAfter If true, the end of this text is a preferred
282 |   *                                breakpoint.
283 |   */
284 |   addQuotable(
285 |     text,
286 |     qchars,
287 |     mayBreakAfter
288 |   ) {
289 |     // No text -> no need to be quoted (prevents strict warning errors).
290 |     if (text.length == 0) {
291 |       return;
292 |     }
293 | 
294 |     // Figure out if we need to quote the string. Don't quote a string which
295 |     // already appears to be quoted.
296 |     let needsQuote = false;
297 | 
298 |     if (!(text[0] == '"' && text[text.length - 1] == '"') && qchars != "") {
299 |       for (let i = 0; i < text.length; i++) {
300 |         if (qchars.includes(text[i])) {
301 |           needsQuote = true;
302 |           break;
303 |         }
304 |       }
305 |     }
306 | 
307 |     if (needsQuote) {
308 |       text = '"' + text.replace(/["\\]/g, "\\$&") + '"';
309 |     }
310 |     this.addText(text, mayBreakAfter);
311 |   }
312 | 
313 |   /**
314 |   * Adds a block of text that corresponds to the phrase production in RFC 5322.
315 |   * Such text is a sequence of atoms, quoted-strings, or RFC-2047 encoded-words.
316 |   * This method will preprocess input to normalize all space sequences to a
317 |   * single space. If the text cannot be added without violating maximum line
318 |   * length, an error is thrown instead.
319 |   *
320 |   * @protected
321 |   * @param {String}  text          The text to add to the output.
322 |   * @param {String}  qchars        The set of characters that cannot appear
323 |   *                                outside of a quoted string.
324 |   * @param {Boolean} mayBreakAfter If true, the end of this text is a preferred
325 |   *                                breakpoint.
326 |   */
327 |   addPhrase(text, qchars, mayBreakAfter) {
328 |     // Collapse all whitespace spans into a single whitespace node.
329 |     text = text.replace(/[ \t\r\n]+/g, " ");
330 | 
331 |     // If we have non-ASCII text, encode it using RFC 2047.
332 |     if (this._useASCII && nonAsciiRe.test(text)) {
333 |       this.encodeRFC2047Phrase(text, mayBreakAfter);
334 |       return;
335 |     }
336 | 
337 |     // If quoting the entire string at once could fit in the line length, then do
338 |     // so. The check here is very loose, but this will inform is if we are going
339 |     // to definitely overrun the soft margin.
340 |     if (this._currentLine.length + text.length < this._softMargin) {
341 |       try {
342 |         this.addQuotable(text, qchars, mayBreakAfter);
343 |         // If we don't have a breakpoint, and the text is encoded as a sequence of
344 |         // atoms (and not a quoted-string), then make the last space we added a
345 |         // breakpoint, regardless of the mayBreakAfter setting.
346 |         if (this._preferredBreakpoint == 0 && text.includes(" ")) {
347 |           if (this._currentLine[this._currentLine.length - 1] != '"') {
348 |             this._preferredBreakpoint = this._currentLine.lastIndexOf(" ");
349 |           }
350 |         }
351 |         return;
352 |       } catch (e) {
353 |         // If we get an error at this point, we failed to add the quoted string
354 |         // because the string was too long. Fall through to the case where we know
355 |         // that the input was too long to begin with.
356 |       }
357 |     }
358 | 
359 |     // If the text is too long, split the quotable string at space boundaries and
360 |     // add each word individually. If we still can't add all those words, there is
361 |     // nothing that we can do.
362 |     let words = text.split(" ");
363 |     for (let i = 0; i < words.length; i++) {
364 |       this.addQuotable(
365 |         words[i],
366 |         qchars,
367 |         i == words.length - 1 ? mayBreakAfter : true
368 |       );
369 |     }
370 |   }
371 | 
372 |   /**
373 |   * Add a block of text as a single RFC 2047 encoded word. This does not try to
374 |   * split words if they are too long.
375 |   *
376 |   * @private
377 |   * @param {Uint8Array} encodedText   The octets to encode.
378 |   * @param {Boolean}    useQP         If true, use quoted-printable; if false,
379 |   *                                   use base64.
380 |   * @param {Boolean}    mayBreakAfter If true, the end of this text is a
381 |   *                                   preferred breakpoint.
382 |   */
383 |   _addRFC2047Word(
384 |     encodedText,
385 |     useQP,
386 |     mayBreakAfter
387 |   ) {
388 |     let binaryString = uint8ArrayToString(encodedText);
389 |     let token;
390 |     if (useQP) {
391 |       token = qpPrelude;
392 |       for (let i = 0; i < encodedText.length; i++) {
393 |         if (
394 |           encodedText[i] < 0x20 ||
395 |           encodedText[i] >= 0x7f ||
396 |           qpForbidden.includes(binaryString[i])
397 |         ) {
398 |           let ch = encodedText[i];
399 |           token += "=" + hexString[(ch & 0xf0) >> 4] + hexString[ch & 0x0f];
400 |         } else if (binaryString[i] == " ") {
401 |           token += "_";
402 |         } else {
403 |           token += binaryString[i];
404 |         }
405 |       }
406 |       token += "?=";
407 |     } else {
408 |       token = b64Prelude + btoa(binaryString) + "?=";
409 |     }
410 |     this.addText(token, mayBreakAfter);
411 |   }
412 | 
413 |   /**
414 |   * Add a block of text as potentially several RFC 2047 encoded-word tokens.
415 |   *
416 |   * @protected
417 |   * @param {String}  text          The text to add to the output.
418 |   * @param {Boolean} mayBreakAfter If true, the end of this text is a preferred
419 |   *                                breakpoint.
420 |   */
421 |   encodeRFC2047Phrase(
422 |     text,
423 |     mayBreakAfter
424 |   ) {
425 |     // Start by encoding the text into UTF-8 directly.
426 |     let encodedText = new TextEncoder("UTF-8").encode(text);
427 | 
428 |     // Make sure there's enough room for a single token.
429 |     let minLineLen = b64Prelude.length + 10; // Eight base64 characters plus ?=
430 |     if (!this._reserveTokenSpace(minLineLen)) {
431 |       this._commitLine(this._currentLine.length);
432 |     }
433 | 
434 |     // Try to encode as much UTF-8 text as possible in each go.
435 |     let b64Len = 0,
436 |       qpLen = 0,
437 |       start = 0;
438 |     let maxChars =
439 |       this._softMargin - this._currentLine.length - (b64Prelude.length + 2);
440 |     for (let i = 0; i < encodedText.length; i++) {
441 |       let b64Inc = 0,
442 |         qpInc = 0;
443 |       // The length we need for base64 is ceil(length / 3) * 4...
444 |       if ((i - start) % 3 == 0) {
445 |         b64Inc += 4;
446 |       }
447 | 
448 |       // The length for quoted-printable is 3 chars only if encoded
449 |       if (
450 |         encodedText[i] < 0x20 ||
451 |         encodedText[i] >= 0x7f ||
452 |         qpForbidden.includes(String.fromCharCode(encodedText[i]))
453 |       ) {
454 |         qpInc = 3;
455 |       } else {
456 |         qpInc = 1;
457 |       }
458 | 
459 |       if (b64Len + b64Inc > maxChars && qpLen + qpInc > maxChars) {
460 |         // Oops, we have too many characters! We need to encode everything through
461 |         // the current character. However, we can't split in the middle of a
462 |         // multibyte character. In UTF-8, characters that start with 10xx xxxx are
463 |         // the middle of multibyte characters, so backtrack until the start
464 |         // character is legal.
465 |         while ((encodedText[i] & 0xc0) == 0x80) {
466 |           --i;
467 |         }
468 | 
469 |         // Add this part of the word and then make a continuation.
470 |         this._addRFC2047Word(
471 |           encodedText.subarray(start, i),
472 |           b64Len >= qpLen,
473 |           true
474 |         );
475 | 
476 |         // Reset the array for parsing.
477 |         start = i;
478 |         --i; // Reparse this character as well
479 |         b64Len = qpLen = 0;
480 |         maxChars = this._softMargin - b64Prelude.length - 3;
481 |       } else {
482 |         // Add the counts for the current variable to the count to encode.
483 |         b64Len += b64Inc;
484 |         qpLen += qpInc;
485 |       }
486 |     }
487 | 
488 |     // Add the entire array at this point.
489 |     this._addRFC2047Word(
490 |       encodedText.subarray(start),
491 |       b64Len >= qpLen,
492 |       mayBreakAfter
493 |     );
494 |   }
495 | 
496 |   // High-level methods
497 |   // ------------------
498 | 
499 |   /**
500 |   * Add the header name, with the colon and trailing space, to the output.
501 |   *
502 |   * @public
503 |   * @param {String} name The name of the header.
504 |   */
505 |   addHeaderName(name) {
506 |     this._currentLine = this._currentLine.trimRight();
507 |     if (this._currentLine.length > 0) {
508 |       this._commitLine();
509 |     }
510 |     this.addText(name + ": ", false);
511 |   }
512 | 
513 |   /**
514 |   * Add a header and its structured value to the output.
515 |   *
516 |   * The name can be any case-insensitive variant of a known structured header;
517 |   * the output will include the preferred name of the structure instead of the
518 |   * case put into the name. If no structured encoder can be found, and the input
519 |   * value is a string, then the header is assumed to be unstructured and the
520 |   * value is added as if {@link addUnstructured} were called.
521 |   *
522 |   * @public
523 |   * @param {String} name  The name of the header.
524 |   * @param          value The structured value of the header.
525 |   */
526 |   addStructuredHeader(name, value) {
527 |     let lowerName = name.toLowerCase();
528 |     if (encoders.has(lowerName)) {
529 |       this.addHeaderName(preferredSpellings.get(lowerName));
530 |       encoders.get(lowerName).call(this, value);
531 |     } else if (typeof value === "string") {
532 |       // Assume it's an unstructured header.
533 |       // All-lower-case-names are ugly, so capitalize first letters.
534 |       name = name.replace(/(^|-)[a-z]/g, function(match) {
535 |         return match.toUpperCase();
536 |       });
537 |       this.addHeaderName(name);
538 |       this.addUnstructured(value);
539 |     } else {
540 |       throw new Error("Unknown header " + name);
541 |     }
542 |   }
543 | 
544 |   /**
545 |   * Add a single address to the header. The address is an object consisting of a
546 |   * possibly-empty display name and an email address.
547 |   *
548 |   * @public
549 |   * @param Address addr The address to be added.
550 |   * @param {String} addr.name  The (possibly-empty) name of the address to add.
551 |   * @param {String} addr.email The email of the address to add.
552 |   * @see headerparser.parseAddressingHeader
553 |   */
554 |   addAddress(addr) {
555 |     // If we have a display name, add that first.
556 |     if (addr.name) {
557 |       // This is a simple estimate that keeps names on one line if possible.
558 |       this._reserveTokenSpace(addr.name.length + addr.email.length + 3);
559 |       this.addPhrase(addr.name, ',()<>[]:;@."', true);
560 | 
561 |       // If we don't have an email address, don't write out the angle brackets for
562 |       // the address. It's already an abnormal situation should this appear, and
563 |       // this has better round-tripping properties.
564 |       if (!addr.email) {
565 |         return;
566 |       }
567 | 
568 |       this.addText("<", false);
569 |     }
570 | 
571 |     // Find the local-part and domain of the address, since the local-part may
572 |     // need to be quoted separately. Note that the @ goes to the domain, so that
573 |     // the local-part may be quoted if it needs to be.
574 |     let at = addr.email.lastIndexOf("@");
575 |     let localpart = "",
576 |       domain = "";
577 |     if (at == -1) {
578 |       localpart = addr.email;
579 |     } else {
580 |       localpart = addr.email.slice(0, at);
581 |       domain = addr.email.slice(at);
582 |     }
583 | 
584 |     this.addQuotable(localpart, '()<>[]:;@\\," !', false);
585 |     this.addText(domain + (addr.name ? ">" : ""), false);
586 |   }
587 | 
588 |   /**
589 |   * Add an array of addresses and groups to the output. Such an array may be
590 |   * found as the output of {@link headerparser.parseAddressingHeader}. Each
591 |   * element is either an address (an object with properties name and email), or a
592 |   * group (an object with properties name and group).
593 |   *
594 |   * @public
595 |   * @param {(Address|Group)[]} addrs A collection of addresses to add.
596 |   * @param {String}    addrs[i].name    The (possibly-empty) name of the
597 |   *                                     address or the group to add.
598 |   * @param {String}    [addrs[i].email] The email of the address to add.
599 |   * @param {Address[]} [addrs[i].group] A list of email addresses in the group.
600 |   * @see HeaderEmitter.addAddress
601 |   * @see headerparser.parseAddressingHeader
602 |   */
603 |   addAddresses(addresses) {
604 |     let needsComma = false;
605 |     for (let addr of addresses) {
606 |       // Add a comma if this is not the first element.
607 |       if (needsComma) {
608 |         this.addText(", ", true);
609 |       }
610 |       needsComma = true;
611 | 
612 |       if ("email" in addr) {
613 |         this.addAddress(addr);
614 |       } else {
615 |         // A group has format name: member, member;
616 |         // Note that we still add a comma after the group is completed.
617 |         this.addPhrase(addr.name, ',()<>[]:;@."', false);
618 |         this.addText(":", true);
619 | 
620 |         this.addAddresses(addr.group);
621 |         this.addText(";", true);
622 |       }
623 |     }
624 |   }
625 | 
626 |   /**
627 |   * Add an unstructured header value to the output. This effectively means only
628 |   * inserting line breaks were necessary, and using RFC 2047 encoding where
629 |   * necessary.
630 |   *
631 |   * @public
632 |   * @param {String} text The text to add to the output.
633 |   */
634 |   addUnstructured(text) {
635 |     if (text.length == 0) {
636 |       return;
637 |     }
638 | 
639 |     // Unstructured text is basically a phrase that can't be quoted. So, if we
640 |     // have nothing in qchars, nothing should be quoted.
641 |     this.addPhrase(text, "", false);
642 |   }
643 | 
644 |   /**
645 |   * Add a date/time field to the output, using the JS date object as the time
646 |   * representation. The value will be output using the timezone offset of the
647 |   * date object, which is usually the timezone of the user (modulo timezone and
648 |   * DST changes).
649 |   *
650 |   * Note that if the date is an invalid date (its internal date parameter is a
651 |   * NaN value), this method throws an error instead of generating an invalid
652 |   * string.
653 |   *
654 |   * @public
655 |   * @param {Date} date The date to be added to the output string.
656 |   */
657 |   addDate(date) {
658 |     // Rather than make a header plastered with NaN values, throw an error on
659 |     // specific invalid dates.
660 |     if (isNaN(date.getTime())) {
661 |       throw new Error("Cannot encode an invalid date");
662 |     }
663 | 
664 |     let fullYear,
665 |       month,
666 |       dayOfMonth,
667 |       dayOfWeek,
668 |       hours,
669 |       minutes,
670 |       seconds,
671 |       tzOffset;
672 | 
673 |     if (this._sanitizeDate) {
674 |       fullYear = date.getUTCFullYear();
675 |       month = date.getUTCMonth();
676 |       dayOfMonth = date.getUTCDate();
677 |       dayOfWeek = date.getUTCDay();
678 |       hours = date.getUTCHours();
679 |       minutes = date.getUTCMinutes();
680 |       // To reduce the chance of fingerprinting the clock offset,
681 |       // round the time down to the nearest minute.
682 |       seconds = 0;
683 |       tzOffset = 0;
684 |     } else {
685 |       fullYear = date.getFullYear();
686 |       month = date.getMonth();
687 |       dayOfMonth = date.getDate();
688 |       dayOfWeek = date.getDay();
689 |       hours = date.getHours();
690 |       minutes = date.getMinutes();
691 |       seconds = date.getSeconds();
692 |       tzOffset = date.getTimezoneOffset();
693 |     }
694 | 
695 |     // RFC 5322 says years can't be before 1900. The after 9999 is a bit that
696 |     // derives from the specification saying that years have 4 digits.
697 |     if (fullYear < 1900 || fullYear > 9999) {
698 |       throw new Error("Date year is out of encodable range");
699 |     }
700 | 
701 |     // Start by computing the timezone offset for a day. We lack a good format, so
702 |     // the the 0-padding is done by hand. Note that the tzoffset we output is in
703 |     // the form ±hhmm, so we need to separate the offset (in minutes) into an hour
704 |     // and minute pair.
705 |     let tzOffHours = Math.abs(Math.trunc(tzOffset / 60));
706 |     let tzOffMinutes = Math.abs(tzOffset) % 60;
707 |     let tzOffsetStr =
708 |       (tzOffset > 0 ? "-" : "+") +
709 |       padTo2Digits(tzOffHours) +
710 |       padTo2Digits(tzOffMinutes);
711 | 
712 |     // Convert the day-time figure into a single value to avoid unwanted line
713 |     // breaks in the middle.
714 |     let dayTime = [
715 |       kDaysOfWeek[dayOfWeek] + ",",
716 |       dayOfMonth,
717 |       kMonthNames[month],
718 |       fullYear,
719 |       padTo2Digits(hours) +
720 |         ":" +
721 |         padTo2Digits(minutes) +
722 |         ":" +
723 |         padTo2Digits(seconds),
724 |       tzOffsetStr,
725 |     ].join(" ");
726 |     this.addText(dayTime, false);
727 |   }
728 | 
729 |   /**
730 |   * Signal that the current header has been finished encoding.
731 |   *
732 |   * @public
733 |   * @param {Boolean} deliverEOF If true, signal to the handler that no more text
734 |   *                             will be arriving.
735 |   */
736 |   finish(deliverEOF) {
737 |     this._commitLine();
738 |     if (deliverEOF) {
739 |       this._handler.deliverEOF();
740 |     }
741 |   }
742 | }
743 | 
744 | /**
745 | * Formatting helper to output numbers between 0-9 as 00-09 instead.
746 | */
747 | function padTo2Digits(num) {
748 |   return num < 10 ? "0" + num : num.toString();
749 | }
750 | 
751 | /** RFC 822 labels for days of the week. */
752 | const kDaysOfWeek = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
753 | 
754 | // A regular expression for characters that need to be encoded.
755 | const nonAsciiRe = /[^\x20-\x7e]/;
756 | 
757 | // The beginnings of RFC 2047 encoded-word
758 | const b64Prelude = "=?UTF-8?B?";
759 | const qpPrelude = "=?UTF-8?Q?";
760 | 
761 | // A list of ASCII characters forbidden in RFC 2047 encoded-words
762 | const qpForbidden = "\"#$%&'(),.:;<=>?@[\\]^_`{|}~";
763 | 
764 | const hexString = "0123456789ABCDEF";
765 | 
766 | /**
767 | * Make a streaming header emitter that outputs on the given handler.
768 | *
769 | * @param {StreamHandler} handler The handler to consume output
770 | * @param                 options Options to pass into the HeaderEmitter
771 | *                                constructor.
772 | * @returns {HeaderEmitter} A header emitter constructed with the given options.
773 | */
774 | export function makeStreamingEmitter(handler, options) {
775 |   return new HeaderEmitter(handler, options);
776 | }
777 | 
778 | class StringHandler {
779 |   constructor() {
780 |     this.value = "";
781 |   }
782 | 
783 |   deliverData(str) {
784 |     this.value += str;
785 |   }
786 | 
787 |   // eslint-disable-next-line class-methods-use-this
788 |   deliverEOF() {}
789 | }
790 | 
791 | /**
792 | * Given a header name and its structured value, output a string containing its
793 | * MIME-encoded value. The trailing CRLF for the header is included.
794 | *
795 | * @param {String} name    The name of the structured header.
796 | * @param          value   The value of the structured header.
797 | * @param          options Options for the HeaderEmitter constructor.
798 | * @returns {String} A MIME-encoded representation of the structured header.
799 | * @see HeaderEmitter.addStructuredHeader
800 | */
801 | export function emitStructuredHeader(name, value, options) {
802 |   let handler = new StringHandler();
803 |   let emitter = new HeaderEmitter(handler, options);
804 |   emitter.addStructuredHeader(name, value);
805 |   emitter.finish(true);
806 |   return handler.value;
807 | }
808 | 
809 | /**
810 | * Given a map of header names and their structured values, output a string
811 | * containing all of their headers and their MIME-encoded values.
812 | *
813 | * This method is designed to be able to emit header values given the headerData
814 | * values produced by MIME parsing. Thus, the values of the map are arrays
815 | * corresponding to header multiplicity.
816 | *
817 | * @param {Map(String->Object[])} headerValues A map of header names to arrays
818 | *                                             of their structured values.
819 | * @param                         options      Options for the HeaderEmitter
820 | *                                             constructor.
821 | * @returns {String} A MIME-encoded representation of the structured header.
822 | * @see HeaderEmitter.addStructuredHeader
823 | */
824 | export function emitStructuredHeaders(headerValues, options) {
825 |   let handler = new StringHandler();
826 |   let emitter = new HeaderEmitter(handler, options);
827 |   for (let instance of headerValues) {
828 |     instance[1].forEach(function(e) {
829 |       emitter.addStructuredHeader(instance[0], e);
830 |     });
831 |   }
832 |   emitter.finish(true);
833 |   return handler.value;
834 | }
835 | 
836 | /**
837 | * Add a custom structured MIME encoder to the set of known encoders. These
838 | * encoders are used for {@link emitStructuredHeader} and similar functions to
839 | * encode richer, more structured values instead of relying on string
840 | * representations everywhere.
841 | *
842 | * Structured encoders are functions which take in a single parameter
843 | * representing their structured value. The this parameter is set to be an
844 | * instance of {@link HeaderEmitter}, and it is intended that the several public
845 | * or protected methods on that class are useful for encoding values.
846 | *
847 | * There is a large set of structured encoders built-in to the jsmime library
848 | * already.
849 | *
850 | * @param {String}          header  The header name (in its preferred case) for
851 | *                                  which the encoder will be used.
852 | * @param {Function(Value)} encoder The structured encoder function.
853 | */
854 | export function addStructuredEncoder(header, encoder) {
855 |   let lowerName = header.toLowerCase();
856 |   encoders.set(lowerName, encoder);
857 |   if (!preferredSpellings.has(lowerName)) {
858 |     preferredSpellings.set(lowerName, header);
859 |   }
860 | }
861 | 


--------------------------------------------------------------------------------