├── .eslintignore ├── index.js ├── .gitignore ├── test ├── data │ ├── basic1 │ ├── charsets │ ├── shift-jis-image │ ├── multipart4 │ ├── base64-2 │ ├── multipart1 │ ├── multipart-base64-3 │ ├── multipart-base64-1 │ ├── multipart-base64-2 │ ├── multipart-binary │ ├── multipart2 │ ├── multipart3 │ ├── base64-1 │ ├── multipart-addresses │ ├── multipart-addresses-groups │ ├── message-encoded │ ├── multipart-empty-attachment │ ├── multipart-complex1 │ ├── multipart-encrypted-subject-utf8 │ ├── multipart-content-id │ ├── multipart-complex2 │ ├── multipartmalt-detach │ ├── bug505221 │ └── bugmail11 ├── test_custom_headers.js ├── test_structured_header_emitters.js ├── utils.js ├── test_mail_parser.ts ├── test_structured_headers.js ├── test_header_emitter.js └── test_mime_tree.js ├── lib ├── jsmime.js ├── textDecoders.js ├── utils.js ├── structuredHeaders.js ├── mailParser.js └── headerEmitter.js ├── .github ├── dependabot.yml └── workflows │ └── tests.yml ├── docs ├── Developing.mkd └── RelatedSpecifications.mkd ├── index.d.ts ├── LICENSE ├── package.json ├── .eslintrc.json ├── karma.conf.js └── README.md /.eslintignore: -------------------------------------------------------------------------------- 1 | index.d.ts -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | export { parseMail } from './lib/mailParser'; 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | node_modules 3 | test/.DS_Store 4 | .DS_Store 5 | -------------------------------------------------------------------------------- /test/data/basic1: -------------------------------------------------------------------------------- 1 | Content-Type: text/plain; charset=iso-8859-1 2 | 3 | Hello, world! 4 | -------------------------------------------------------------------------------- /test/data/charsets: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protonmail/jsmimeparser/main/test/data/charsets -------------------------------------------------------------------------------- /test/data/shift-jis-image: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protonmail/jsmimeparser/main/test/data/shift-jis-image -------------------------------------------------------------------------------- /test/data/multipart4: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary=boundary 2 | 3 | --boundary 4 | 5 | This has no headers, so should be recognized as plain text. 6 | 7 | --boundary-- 8 | -------------------------------------------------------------------------------- /lib/jsmime.js: -------------------------------------------------------------------------------- 1 | export { default as MimeParser } from './rawMimeParser'; 2 | export { default as headerparser } from './headerParser'; 3 | import * as headeremitter from './headerEmitter'; 4 | 5 | export { headeremitter }; 6 | -------------------------------------------------------------------------------- /test/data/base64-2: -------------------------------------------------------------------------------- 1 | Content-Type: text/html; encoding=iso-8859-1 2 | Content-Transfer-Encoding: base64 3 | 4 | PGh0bWw+PGJvZHk+VGhpcyBpcyBiYXNlNjQgZW5jb2RlZCBIVE1MIHRleHQsIGFuZCB0aGUgdGFncyB 5 | zaG91bGRuJ3QgYmUgc3RyaXBwZWQuDQo8Yj5Cb2xkIHRleHQgaXMgYm9sZCE8L2I+PC9ib2R5PjwvaH 6 | RtbD4NCg== 7 | -------------------------------------------------------------------------------- /test/data/multipart1: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; 2 | boundary="boundary" 3 | 4 | This is a text message in MIME format. 5 | This part shouldn't appear in the output. 6 | 7 | --boundary 8 | Content-Type: text/plain 9 | 10 | Hello, world! (yet again...) 11 | 12 | --boundary-- 13 | -------------------------------------------------------------------------------- /test/data/multipart-base64-3: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary=boundary 2 | 3 | etc etc 4 | --boundary 5 | Content-Type: text/html 6 | Content-Transfer-Encoding: base64 7 | 8 | PGh0bWw+PGhlYWQ+VGhpcyB0aW1lLCB0aGUgdGFncw0Kc2hvdWxkIGJlIHN0cmlwcGVkIG91dC48L2hlYWQ+PC9odG1sPg== 9 | 10 | --boundary-- 11 | -------------------------------------------------------------------------------- /test/data/multipart-base64-1: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary=boundary 2 | Content-Transfer-Encoding: base64 3 | 4 | This part shouldn't appear 5 | --boundary 6 | Content-Type: text/plain 7 | Content-Transfer-Encoding: base64 8 | 9 | TXVsdGlwYXJ0IGJhc2U2NCBlbmNvZGVkIHRleHQu 10 | 11 | --boundary-- 12 | -------------------------------------------------------------------------------- /test/data/multipart-base64-2: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary=boundary 2 | 3 | yadda yadda 4 | 5 | --boundary 6 | Content-Type: text/html 7 | Content-Transfer-Encoding: base64 8 | 9 | PGh0bWw+PGhlYWQ+YmFzZTY0ZW5jb2RlZCBIVE1MIHRleHQgaW5zaWRlIGEgbXVsdGlwYXJ0IG1lc3N 10 | hZ2UuPC9oZWFkPjwvaHRtbD4= 11 | 12 | --boundary-- 13 | -------------------------------------------------------------------------------- /test/data/multipart-binary: -------------------------------------------------------------------------------- 1 | Subject: binary attachment 2 | Content-Type: multipart/mixed; boundary="vungrzvzr" 3 | 4 | --vungrzvzr 5 | Content-Type: text/plain; 6 | Content-Transfer-Encoding: 8bit 7 | 8 | see binary attachment 9 | 10 | --vungrzvzr 11 | Content-Type: application/octect-stream 12 | Content-Transfer-Encoding: binary 13 | 14 |  15 | --vungrzvzr-- 16 | -------------------------------------------------------------------------------- /test/data/multipart2: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary=boundary 2 | 3 | This is a text/html message. This part shouldn't appear at all! 4 | 5 | --boundary 6 | Content-Type: text/html 7 | 8 | Multipart HTML message with just a single part! 9 | 10 | 11 | --boundary-- 12 | 13 | Actually, this part shouldn't appear either. 14 | -------------------------------------------------------------------------------- /test/data/multipart3: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary=boundary 2 | 3 | --boundary 4 | Content-Type: text/html 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | Here, the HTML tags should be stripped out. 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | --boundary-- 30 | -------------------------------------------------------------------------------- /test/data/base64-1: -------------------------------------------------------------------------------- 1 | Content-Type: text/plain; charset=iso-8859-1 2 | Content-Transfer-Encoding: base64 3 | 4 | DQpIZWxsbywgd29ybGQhIChBZ2Fpbi4uLikNCg0KTGV0J3Mgc2VlIGhvdyB3ZWxsIGJhc2U2NCB0ZXh 5 | 0IGlzIGhhbmRsZWQuICAgICAgICAgICAgICAgICAgICAgICAgICAgIFlheSwgbG90cyBvZiBzcGFjZX 6 | MhIFRoZXJlJ3MgZXZlbiBhIENSTEYgYXQgdGhlIGVuZCBhbmQgb25lIGF0IHRoZSBiZWdpbm5pbmcsI 7 | GJ1dCB0aGUgb3V0cHV0IHNob3VsZG4ndCBoYXZlIGl0Lg0K 8 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "npm" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | allow: 8 | - dependency-name: "playwright" 9 | versioning-strategy: increase 10 | ignore: 11 | - dependency-name: "playwright" 12 | update-types: ["version-update:semver-patch"] # patches do not include browser version updates 13 | -------------------------------------------------------------------------------- /docs/Developing.mkd: -------------------------------------------------------------------------------- 1 | Developing 2 | ========== 3 | 4 | JSMime is intended to rely only on HTML 5 Web APIs and ES 6 as external 5 | requirements for its development. However, these are still a work in progress, 6 | and thus support for them may vary from browser to browser or among different 7 | rendering engines. Polyfilling may be necessary to get this to work, but 8 | polyfills should only be present in test framework code and not in the main source or test files themselves. 9 | -------------------------------------------------------------------------------- /test/data/multipart-addresses: -------------------------------------------------------------------------------- 1 | From: Some One 2 | To: receiver@test.com, another_receiver@test.com 3 | Cc: copy@test.com 4 | Date: Sun, 12 Jun 2022 17:21:02 +0200 5 | MIME-Version: 1.0 6 | Content-Type: multipart/mixed; 7 | boundary="XXXXboundary text" 8 | 9 | This is a multipart message in MIME format. 10 | 11 | --XXXXboundary text 12 | Content-Type: text/plain 13 | 14 | this is the body text 15 | 16 | --XXXXboundary text 17 | Content-Type: text/plain; 18 | Content-Disposition: attachment; 19 | filename="test.txt" 20 | 21 | this is the attachment text 22 | 23 | --XXXXboundary text-- -------------------------------------------------------------------------------- /test/data/multipart-addresses-groups: -------------------------------------------------------------------------------- 1 | From: Some One 2 | To: undisclosed-recipients: ; 3 | Cc: Group A: AA , AB ;, Group B: b@b.com 4 | Date: Sun, 12 Jun 2022 17:21:02 +0200 5 | MIME-Version: 1.0 6 | Content-Type: multipart/mixed; 7 | boundary="XXXXboundary text" 8 | 9 | This is a multipart message in MIME format. 10 | 11 | --XXXXboundary text 12 | Content-Type: text/plain 13 | 14 | this is the body text 15 | 16 | --XXXXboundary text 17 | Content-Type: text/plain; 18 | Content-Disposition: attachment; 19 | filename="test.txt" 20 | 21 | this is the attachment text 22 | 23 | --XXXXboundary text-- -------------------------------------------------------------------------------- /test/data/message-encoded: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="iamaboundary" 2 | 3 | This is a text message in MIME format. 4 | This part shouldn't appear in the output. 5 | 6 | --iamaboundary 7 | Content-Type: message/rfc822 8 | 9 | Subject: I am a subject 10 | 11 | This is a plain-text message. 12 | --iamaboundary 13 | Content-Type: message/global 14 | Content-Transfer-Encoding: base64 15 | 16 | U3ViamVjdDog56eB44Gv44CB5Lu25ZCN5Y2I5YmNDQoNCkkgYW0gYSBwbGFpbi10ZXh0IG1lc3NhZ2Uu 17 | --iamaboundary 18 | Content-Type: message/news 19 | Content-Transfer-Encoding: quoted-printable 20 | 21 | Subject: =e7=a7=81=e3=81=af=e3=80=81=e4=bb=b6=e5=90=8d=e5=8d=88=e5=89=8d 22 | 23 | I am an encoded plain-text message. 24 | --iamaboundary-- 25 | -------------------------------------------------------------------------------- /test/data/multipart-empty-attachment: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; protected-headers="v1"; boundary="===============3607788715713061906==" 2 | MIME-Version: 1.0 3 | From: sender 4 | Subject: encryption test 5 | To: receiver@test.com 6 | Date: Sun, 12 Jun 2022 17:21:02 +0200 7 | Content-Language: en-US 8 | 9 | --===============3607788715713061906== 10 | Content-Type: text/rfc822-headers; protected-headers="v1" 11 | Content-Disposition: inline 12 | Subject: encryption test 13 | To: receiver@test.com 14 | From: sender 15 | Date: Sun, 12 Jun 2022 17:21:02 +0200 16 | 17 | 18 | --===============3607788715713061906== 19 | Content-Type: text/plain; charset=utf-8 20 | Content-Transfer-Encoding: 7bit 21 | 22 | test body 23 | 24 | --===============3607788715713061906==-- -------------------------------------------------------------------------------- /test/data/multipart-complex1: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="boundary" 2 | 3 | This shouldn't appear. 4 | --boundary 5 | Content-Type: application/octet-stream 6 | Content-Transfer-Encoding: base64 7 | 8 | VGhpcyBpc24ndCByZWFsbHkgYW4gYXBwbGljYXRpb24vb2N0ZXQtc3RyZWFtLiA7KQ==' 9 | 10 | --boundary 11 | Content-Type: image/png 12 | Content-Transfer-Encoding: base64 13 | 14 | TmVpdGhlciBpcyB0aGlzIGFuIGltYWdlL3BuZy4= 15 | 16 | --boundary 17 | Content-Type: multipart/related; boundary="boundary2" 18 | 19 | --boundary2 20 | Content-Type: text/html 21 | 22 | This part should be returned. 23 | 24 | --boundary2-- 25 | 26 | --boundary 27 | Content-Type: text/plain 28 | 29 | This part shouldn't. 30 | 31 | --boundary 32 | 33 | Neither should this part! 34 | 35 | --boundary-- 36 | -------------------------------------------------------------------------------- /index.d.ts: -------------------------------------------------------------------------------- 1 | export type Headers = { [key: string]: string[] }; 2 | 3 | interface Attachment { 4 | content: Uint8Array; 5 | headers: Headers; 6 | size: number; 7 | fileName?: string; 8 | contentType?: string; 9 | contentDisposition?: string; 10 | contentId?: string; 11 | } 12 | 13 | type Address = { name: string, email: string }; 14 | type Group = { name: string, group: Address[] }; 15 | type AddressOrGroup = Address | Group; 16 | 17 | export interface ParsedMessage { 18 | attachments: Attachment[]; 19 | headers: Headers; 20 | body: { 21 | html: string | null; // 'text/html' body parts, joined together separated by
\n 22 | text: string | null; // 'text/plain' body parts, joined together separated by \n 23 | }, 24 | date?: Date; 25 | subject?: string, 26 | from?: Address, 27 | to?: AddressOrGroup[], 28 | cc?: AddressOrGroup[], 29 | bcc?: AddressOrGroup[], 30 | 'reply-to'?: Address 31 | } 32 | 33 | export function parseMail(message: string | Uint8Array): ParsedMessage; 34 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: [main] 4 | pull_request: 5 | branches: [main] 6 | 7 | jobs: 8 | e2e-tests: 9 | name: Tests 10 | strategy: 11 | fail-fast: false # if tests for one version fail, continue with the rest 12 | matrix: 13 | # run on multiple platforms to test platform-specific code, if present 14 | # (e.g. webkit's WebCrypto API implementation is different in macOS vs Linux) 15 | runner: ['ubuntu-latest', 'macos-latest', 'windows-latest'] 16 | runs-on: ${{ matrix.runner }} 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | - uses: actions/setup-node@v4 21 | 22 | - name: Install dependencies 23 | run: npm ci 24 | 25 | - name: Install Chrome 26 | run: npx playwright install --with-deps chromium 27 | 28 | - name: Install Firefox 29 | run: npx playwright install --with-deps firefox 30 | 31 | - name: Install Webkit 32 | run: npx playwright install --with-deps webkit 33 | 34 | - name: Run tests 35 | run: npm test -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Joshua Cranmer 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /test/data/multipart-encrypted-subject-utf8: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="------------7VgK7B2dk0pUYjHBY0Zi2Fda"; 2 | protected-headers="v1" 3 | Subject: =?UTF-8?B?c3ViamVjdCB3aXRoIGVtb2ppcyDwn5iD8J+Yhw==?= 4 | From: Sender 5 | To: receiver@example.com 6 | Message-ID: <7daafa18-8595-8065-3eba-b08c07becf36@example.com> 7 | 8 | --------------7VgK7B2dk0pUYjHBY0Zi2Fda 9 | Content-Type: multipart/mixed; boundary="------------D5jH01SvFZAwYShsjQamYW8w" 10 | 11 | --------------D5jH01SvFZAwYShsjQamYW8w 12 | Content-Type: text/plain; charset=UTF-8; format=flowed 13 | Content-Transfer-Encoding: base64 14 | 15 | dGVzdCB1dGY4IGluIGVuY3J5cHRlZCBzdWJqZWN0DQo= 16 | --------------D5jH01SvFZAwYShsjQamYW8w 17 | Content-Type: application/pgp-keys; name="OpenPGP_0xabc.asc" 18 | Content-Disposition: attachment; filename="OpenPGP_0xabc.asc" 19 | Content-Description: OpenPGP public key 20 | Content-Transfer-Encoding: quoted-printable 21 | 22 | -----BEGIN PGP PUBLIC KEY BLOCK----- 23 | 24 | ... 25 | -----END PGP PUBLIC KEY BLOCK----- 26 | 27 | --------------D5jH01SvFZAwYShsjQamYW8w-- 28 | 29 | --------------7VgK7B2dk0pUYjHBY0Zi2Fda-- 30 | -------------------------------------------------------------------------------- /test/data/multipart-content-id: -------------------------------------------------------------------------------- 1 | Subject: attachment with ContentID 2 | Content-Type: multipart/mixed; boundary="vungrzvzr" 3 | 4 | --vungrzvzr 5 | Content-Type: text/plain; 6 | Content-Transfer-Encoding: 8bit 7 | 8 | the first attachment has a Content-ID set, the second does not. 9 | 10 | --vungrzvzr 11 | Content-Type: image/png 12 | Content-Transfer-Encoding: base64 13 | Content-ID: <001110.102211@siebel.com> 14 | 15 | iVBORw0KGgoAAAANSUhEUgAAAIAAAABECAIAAADGJao+AAAAwklEQVR4Xu3UgQbDMBRA0bc03f// 16 | b7N0VuqJEmwoc+KqNEkDh9b+2HuJu1KNO4f+AQCAAAAQAAACAEAAAAgAAAEAIAAABACAAAAQAAAC 17 | AEAAAAgAAAEAIAAAANReamRLlPWYfNH0klxcPs+cP3NxWF+vi3lb7pa2R+vx6tHOtuN1O+a5lY3H 18 | zgM5ya/GM5N7ZjfPq7/5yS8IgAAAEAAAAgBAAAAIAAABACAAAAQAgAAAEAAAAgBAAAAIAAABACAA 19 | AIw322gDIPvtlmUAAAAASUVORK5CYII= 20 | 21 | --vungrzvzr 22 | Content-Disposition: attachment; filename="test.png" 23 | Content-Type: image/png 24 | Content-Transfer-Encoding: base64 25 | 26 | iVBORw0KGgoAAAANSUhEUgAAAIAAAABECAIAAADGJao+AAAAwklEQVR4Xu3UgQbDMBRA0bc03f// 27 | b7N0VuqJEmwoc+KqNEkDh9b+2HuJu1KNO4f+AQCAAAAQAAACAEAAAAgAAAEAIAAABACAAAAQAAAC 28 | AEAAAAgAAAEAIAAAANReamRLlPWYfNH0klxcPs+cP3NxWF+vi3lb7pa2R+vx6tHOtuN1O+a5lY3H 29 | zgM5ya/GM5N7ZjfPq7/5yS8IgAAAEAAAAgBAAAAIAAABACAAAAQAgAAAEAAAAgBAAAAIAAABACAA 30 | AIw322gDIPvtlmUAAAAASUVORK5CYII= 31 | --vungrzvzr-- 32 | 33 | -------------------------------------------------------------------------------- /test/data/multipart-complex2: -------------------------------------------------------------------------------- 1 | From - Mon Jun 02 19:00:00 2008 2 | Content-Type: multipart/mixed; boundary="bou" 3 | Message-Id: <123456@example.com> 4 | 5 | Part 1 6 | --bou 7 | Content-Type: multipart/related; boundary="bound" 8 | 9 | Part 2 10 | --bound 11 | Content-Type: multipart/digest; boundary="boundar" 12 | 13 | Part 3 14 | --boundar 15 | Content-Type: multipart/alternative; boundary="boundary" 16 | 17 | Part 4 18 | --boundary 19 | Content-Type: application/octet-stream 20 | 21 | Wow, what alternatives! 22 | 23 | We're trying to confuse the parser here. 24 | 25 | --bou 26 | 27 | --bound 28 | 29 | --boundar 30 | 31 | --boundary 32 | Content-Type: application/pdf 33 | 34 | A choice between a PDF and an octet stream! How marvellous! 35 | 36 | --boundary-- 37 | 38 | --boundar 39 | Content-Type: multipart/mixed; boundary="boundary123456" 40 | 41 | --boundary123456 42 | Content-Type: text/plain 43 | 44 | This is the correct answer. 45 | 46 | --boundary123456-- 47 | 48 | --boundar-- 49 | 50 | --bound 51 | Content-Type: text/plain 52 | 53 | One last attempt at confusing the parser. 54 | 55 | --bound-- 56 | 57 | --bou 58 | Content-Type: text/html 59 | 60 | No harm in making another. 61 | 62 | --bou-- 63 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@protontech/jsmimeparser", 3 | "version": "3.0.2", 4 | "description": "An asynchronous MIME parser written in JavaScript", 5 | "main": "index.js", 6 | "types": "index.d.ts", 7 | "files": [ 8 | "lib/", 9 | "index.d.ts", 10 | "index.js" 11 | ], 12 | "scripts": { 13 | "postversion": "git push && git push --tags", 14 | "test": "karma start karma.conf.js", 15 | "lint": "eslint lib test --quiet" 16 | }, 17 | "repository": { 18 | "type": "git", 19 | "url": "git+https://github.com/ProtonMail/jsmimeparser.git" 20 | }, 21 | "author": "ProtonMail", 22 | "license": "MIT", 23 | "bugs": { 24 | "url": "https://github.com/ProtonMail/jsmimeparser/issues" 25 | }, 26 | "devDependencies": { 27 | "@types/chai": "^4.3.20", 28 | "@types/mocha": "^10.0.10", 29 | "chai": "^4.5.0", 30 | "eslint": "^8.57.1", 31 | "eslint-config-airbnb-base": "^15.0.0", 32 | "eslint-plugin-import": "^2.32.0", 33 | "karma": "^6.4.4", 34 | "karma-chrome-launcher": "^3.2.0", 35 | "karma-firefox-launcher": "^2.1.3", 36 | "karma-mocha": "^2.0.1", 37 | "karma-mocha-reporter": "^2.2.5", 38 | "karma-webkit-launcher": "^1.3.1", 39 | "karma-webpack": "^5.0.1", 40 | "mocha": "^10.8.2", 41 | "playwright": "^1.54.2", 42 | "webpack": "^5.101.0" 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /test/test_custom_headers.js: -------------------------------------------------------------------------------- 1 | 2 | import { assert } from "chai"; 3 | import { headeremitter, headerparser } from "../lib/jsmime"; 4 | 5 | describe("Custom decoder support", function() { 6 | function customDecoder(values) { 7 | let value = values.join(""); 8 | return atob(value); 9 | } 10 | function customEncoder(value) { 11 | this.addText(btoa(value), true); 12 | } 13 | it("addStructuredEncoder", function() { 14 | assert.equal( 15 | "X-Base64: String\r\n", 16 | headeremitter.emitStructuredHeader("X-Base64", "String", {}) 17 | ); 18 | headeremitter.addStructuredEncoder("X-Base64", customEncoder); 19 | assert.equal( 20 | "X-Base64: U3RyaW5n\r\n", 21 | headeremitter.emitStructuredHeader("X-Base64", "String", {}) 22 | ); 23 | assert.equal( 24 | "X-Base64: U3RyaW5n\r\n", 25 | headeremitter.emitStructuredHeader("x-bASe64", "String", {}) 26 | ); 27 | }); 28 | it("addStructuredDecoder", function() { 29 | assert.throws(function() { 30 | headerparser.parseStructuredHeader("X-Base64", "U3RyaW5n"); 31 | }, /Unknown structured header/); 32 | headerparser.addStructuredDecoder("X-Base64", customDecoder); 33 | assert.equal( 34 | "String", 35 | headerparser.parseStructuredHeader("X-Base64", "U3RyaW5n") 36 | ); 37 | assert.throws(function() { 38 | headerparser.addStructuredDecoder("To", customDecoder); 39 | }, /Cannot override header/); 40 | }); 41 | }); 42 | -------------------------------------------------------------------------------- /test/data/multipartmalt-detach: -------------------------------------------------------------------------------- 1 | From 2 | X-Account-Key: account1 3 | X-UIDL: 0397aedc0eee392343488772c79f110d 4 | X-Mozilla-Status: 0001 5 | X-Mozilla-Status2: 10000000 6 | X-Mozilla-Keys: 7 | Return-Path: 8 | X-Flags: 0000 9 | Date: Tue, 29 Aug 2006 16:42:08 GMT 10 | From: abc 11 | To: abc 12 | Subject: detach test 13 | Message-ID: 14 | MIME-Version: 1.0 15 | Content-Type: multipart/alternative; boundary="gmxboundary=-1156956072-29266-top" 16 | 17 | --gmxboundary=-1156956072-29266-top 18 | Content-Type: text/plain; charset="iso-8859-1" 19 | 20 | plain body 21 | --gmxboundary=-1156956072-29266-top 22 | Content-Type: multipart/related; boundary="gmxboundary=-1156956072-29266-sub" 23 | 24 | --gmxboundary=-1156956072-29266-sub 25 | Content-Type: text/html; charset="iso-8859-1" 26 | 27 | 29 | 30 | 31 | Update 32 | 33 | 34 | 35 | body hello 36 | 37 | 38 | --gmxboundary=-1156956072-29266-sub 39 | Content-Type: text/plain 40 | Content-Disposition: inline; filename="head_update.txt" 41 | 42 | head_update.txt 43 | --gmxboundary=-1156956072-29266-sub 44 | Content-Type: text/plain 45 | Content-Disposition: inline; filename="smurf_update_neu.txt" 46 | 47 | smurf_update_neu.txt 48 | --gmxboundary=-1156956072-29266-sub-- 49 | --gmxboundary=-1156956072-29266-top 50 | Content-Type: text/plain 51 | Content-Disposition: attachment; filename="head_update.txt" 52 | 53 | headUpdate.text 54 | --gmxboundary=-1156956072-29266-top-- 55 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | // "airbnb-base" 4 | "eslint:recommended" 5 | ], 6 | 7 | "parserOptions": { 8 | "ecmaVersion": 9, 9 | "sourceType": "module" 10 | }, 11 | 12 | "globals": { 13 | "window": "readonly", 14 | "btoa": "readonly", 15 | "atob": "readonly" 16 | }, 17 | "env": { 18 | "es6": true, 19 | "browser": true, 20 | "mocha": true 21 | }, 22 | "plugins": [ 23 | "import" 24 | ], 25 | "rules": { 26 | "no-unused-vars": ["error", {"args": "none"}], 27 | "prefer-spread": "off", 28 | "no-restricted-syntax": "off", 29 | "consistent-return": "off", 30 | "object-curly-newline": "off", 31 | "prefer-template": "off", 32 | "no-plusplus": "off", 33 | "no-continue": "off", 34 | "no-bitwise": "off", 35 | "no-await-in-loop": "off", 36 | "no-sequences": "warn", 37 | "no-param-reassign": "warn", 38 | "no-return-assign": "warn", 39 | "no-else-return": ["error", { "allowElseIf": true }], 40 | "no-shadow": "off", 41 | "no-undef": "error", 42 | "arrow-body-style": "off", 43 | "space-before-function-paren": "off", 44 | "operator-linebreak": "off", 45 | "implicit-arrow-linebreak": "off", 46 | "no-underscore-dangle": "off", 47 | "import/no-unresolved": ["error", { 48 | "ignore": ["^react$", "ttag", ".data"] 49 | }], 50 | "import/prefer-default-export": "off", 51 | "import/no-extraneous-dependencies": "off", 52 | "import/no-unassigned-import": "error", 53 | "import/named": "error", 54 | "import/extensions": "error", 55 | "max-len": ["error", { 56 | "ignoreComments": true, 57 | "code": 120, 58 | "ignoreStrings": true, 59 | "ignoreTemplateLiterals": true, 60 | "ignoreRegExpLiterals": true 61 | }], 62 | "no-multiple-empty-lines": ["error"], 63 | "no-trailing-spaces": ["error"], 64 | "eol-last": ["error"], 65 | "padded-blocks": "off", 66 | "max-classes-per-file": "off", 67 | "no-empty": "off" 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /test/data/bug505221: -------------------------------------------------------------------------------- 1 | From - Mon Jan 1 00:00:00 1965 2 | X-Mozilla-Status: 0001 3 | X-Mozilla-Status2: 10000000 4 | From: 5 | To: 6 | Subject: xxx 7 | Date: Tue, 9 Dec 2008 16:49:02 +0200 8 | MIME-Version: 1.0 9 | Content-Type: multipart/mixed; 10 | boundary="----=_NextPart_000_36B5_01C9DB8C.9514C300" 11 | X-Priority: 3 12 | X-MSMail-Priority: Normal 13 | X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2900.2180 14 | 15 | This is a multi-part message in MIME format. 16 | 17 | ------=_NextPart_000_36B5_01C9DB8C.9514C300 18 | Content-Type: text/html; 19 | charset="iso-8859-1" 20 | Content-Transfer-Encoding: quoted-printable 21 | 22 | 23 | 24 | 26 | 27 | 28 | 29 | bbb 30 | 31 | ------=_NextPart_000_36B5_01C9DB8C.9514C300 32 | Content-Type: message/rfc822 33 | Content-Transfer-Encoding: 7bit 34 | Content-Disposition: attachment 35 | 36 | From: 37 | To: 38 | Subject: yyy 39 | Date: Sun, 7 Dec 2008 17:53:47 +0200 40 | MIME-Version: 1.0 41 | Content-Type: message/rfc822 42 | Content-Transfer-Encoding: 7bit 43 | Content-Disposition: attachment 44 | X-Priority: 3 45 | X-MSMail-Priority: Normal 46 | X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2900.2180 47 | 48 | From: 49 | To: 50 | Subject: ccc 51 | Date: Sat, 23 May 2009 09:55:19 +0200 52 | MIME-Version: 1.0 53 | Content-Type: text/html; 54 | charset="iso-8859-1" 55 | Content-Transfer-Encoding: quoted-printable 56 | X-Priority: 3 57 | X-MSMail-Priority: Normal 58 | X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2900.2180 59 | 60 | 61 | 62 | 63 | 65 | 67 | ccc 68 | 69 | 70 | 71 | 72 | 73 | 74 | ------=_NextPart_000_36B5_01C9DB8C.9514C300-- 75 | 76 | -------------------------------------------------------------------------------- /lib/textDecoders.js: -------------------------------------------------------------------------------- 1 | import { uint8ArrayToString, base64ToUint8Array } from './utils'; 2 | 3 | class UTF7TextDecoder { 4 | constructor() { 5 | this.collectInput = ''; 6 | this.decodeString = decodeUtf7; 7 | } 8 | decode(input, options = {}) { 9 | let more = options.stream; 10 | // There are cases where this is called without input, to flush the collected input 11 | if (input) { 12 | this.collectInput += uint8ArrayToString(input); 13 | } 14 | if (more) { 15 | return ""; 16 | } 17 | return this.decodeString(this.collectInput); 18 | } 19 | } 20 | 21 | class UTF7ImapTextDecoder extends UTF7TextDecoder { 22 | constructor() { 23 | super(); 24 | this.decodeString = decodeUtf7Imap; 25 | } 26 | } 27 | 28 | export function MimeTextDecoder(charset, options) { 29 | switch (charset.toLowerCase()) { 30 | case "utf-7": 31 | return new UTF7TextDecoder(); 32 | case "utf-7-imap": 33 | return new UTF7ImapTextDecoder(); 34 | case "cp932": 35 | // https://bugzilla.mozilla.org/show_bug.cgi?id=1511950 36 | return new TextDecoder("shift_jis", options); 37 | default: 38 | return new TextDecoder(charset, options); 39 | } 40 | } 41 | 42 | // UTF7 helpers 43 | 44 | /** 45 | * Decode UTF7 string to unicode 46 | * @see {@link https://github.com/emailjs/emailjs-utf7} for original implementation 47 | */ 48 | function decodeFromUTF7 (str) { 49 | const octets = base64ToUint8Array(str) 50 | let output = '' 51 | 52 | // In modified UTF-7, all characters are represented by their two byte Unicode ID. 53 | for (let i = 0, len = octets.length; i < len;) { 54 | output += String.fromCharCode(octets[i++] << 8 | octets[i++]) 55 | } 56 | return output 57 | } 58 | 59 | /** 60 | * Decodes UTF-7 string, see RFC 2152 61 | * @see {@link https://github.com/emailjs/emailjs-utf7} for original implementation 62 | * @param {String} str String to decode 63 | */ 64 | export const decodeUtf7 = str => 65 | str.replace(/\+([A-Za-z0-9/]*)-?/gi, (_, chunk) => chunk === '' ? '+' : decodeFromUTF7(chunk)) 66 | 67 | /** 68 | * Decodes UTF-7 string, see RFC 3501 69 | * @see {@link https://github.com/emailjs/emailjs-utf7} for original implementation 70 | * @param {String} str String to decode 71 | */ 72 | export const decodeUtf7Imap = str => 73 | str.replace(/&([^-]*)-/g, (_, chunk) => (chunk === '') ? '&' : decodeFromUTF7(chunk.replace(/,/g, '/'))) 74 | -------------------------------------------------------------------------------- /test/data/bugmail11: -------------------------------------------------------------------------------- 1 | From - Mon Jun 02 19:00:00 2008 2 | X-Mozilla-Status: 0001 3 | X-Mozilla-Status2: 00000000 4 | X-Mozilla-Keys: 5 | Return-path: 6 | Delivered-To: bugmail@example.org 7 | Received: by 10.114.166.12 with SMTP id o12cs163262wae; 8 | Fri, 11 Apr 2008 07:17:31 -0700 (PDT) 9 | Received: by 10.115.60.1 with SMTP id n1mr214763wak.181.1207923450166; 10 | Fri, 11 Apr 2008 07:17:30 -0700 (PDT) 11 | Return-Path: 12 | Received: from webapp-out.mozilla.org (webapp01.sj.mozilla.com [63.245.208.146]) 13 | by mx.google.com with ESMTP id n38si6807242wag.2.2008.04.11.07.17.29; 14 | Fri, 11 Apr 2008 07:17:30 -0700 (PDT) 15 | Received-SPF: neutral (google.com: 63.245.208.146 is neither permitted nor denied by best guess record for domain of bugzilla-daemon@mozilla.org) client-ip=63.245.208.146; 16 | Authentication-Results: mx.google.com; spf=neutral (google.com: 63.245.208.146 is neither permitted nor denied by best guess record for domain of bugzilla-daemon@mozilla.org) smtp.mail=bugzilla-daemon@mozilla.org 17 | Received: from mrapp51.mozilla.org (mrapp51.mozilla.org [127.0.0.1]) 18 | by webapp-out.mozilla.org (8.13.8/8.13.8) with ESMTP id m3BEHTGU030132 19 | for ; Fri, 11 Apr 2008 07:17:29 -0700 20 | Received: (from root@localhost) 21 | by mrapp51.mozilla.org (8.13.8/8.13.8/Submit) id m3BEHTk4030129; 22 | Fri, 11 Apr 2008 07:17:29 -0700 23 | Date: Fri, 11 Apr 2008 07:17:29 -0700 24 | Message-Id: <200804111417.m3BEHTk4030129@mrapp51.mozilla.org> 25 | From: bugzilla-daemon@mozilla.org 26 | To: bugmail@example.org 27 | Subject: Bugzilla: confirm account creation 28 | X-Bugzilla-Type: admin 29 | Content-Type: text/plain; charset="UTF-8" 30 | MIME-Version: 1.0 31 | 32 | Bugzilla has received a request to create a user account 33 | using your email address (example@example.org). 34 | 35 | To confirm that you want to create an account using that email address, 36 | visit the following link: 37 | 38 | https://bugzilla.mozilla.org/token.cgi?t=xxxxxxxxxx&a=request_new_account 39 | 40 | If you are not the person who made this request, or you wish to cancel 41 | this request, visit the following link: 42 | 43 | https://bugzilla.mozilla.org/token.cgi?t=xxxxxxxxxx&a=cancel_new_account 44 | 45 | If you do nothing, the request will lapse after 3 days 46 | (on April 14th, 2008 at 07:17 PDT). 47 | 48 | -------------------------------------------------------------------------------- /karma.conf.js: -------------------------------------------------------------------------------- 1 | /* global require, Buffer, process, module */ 2 | 3 | const fs = require('fs'); 4 | const { firefox, chromium, webkit } = require('playwright'); 5 | process.env.CHROME_BIN = chromium.executablePath(); 6 | process.env.FIREFOX_BIN = firefox.executablePath(); 7 | process.env.WEBKIT_HEADLESS_BIN = webkit.executablePath(); 8 | 9 | // karma does not recognise the file as binary and automatically converts it to utf8 to apply preprocessors. 10 | // Using a middleware to load the file prevents the transformation and preserves the charset information. 11 | function charsets_middleware() { 12 | return function (request, response, next) { 13 | const match = request.url.match(/[/\w+]*charsets/); 14 | if (match && request.method === 'GET') { 15 | const path = match[0].replace(/\/base/, './'); 16 | // eslint-disable-next-line no-undef 17 | const data = Buffer.from(fs.readFileSync(path)); 18 | response.setHeader('Content-Type', 'application/octet-stream'); 19 | response.setHeader('Content-Length', data.length); 20 | response.writeHead(200); 21 | return response.end(data); 22 | } 23 | next(); 24 | } 25 | } 26 | 27 | module.exports = function(config) { 28 | config.set({ 29 | // base path that will be used to resolve all patterns (eg. files, exclude) 30 | basePath: '', 31 | 32 | // frameworks to use 33 | // available frameworks: https://www.npmjs.com/search?q=keywords:karma-adapter 34 | frameworks: ['mocha', 'webpack'], 35 | 36 | plugins: [ 37 | 'karma-mocha', 38 | 'karma-webpack', 39 | 'karma-mocha-reporter', 40 | 'karma-chrome-launcher', 41 | 'karma-firefox-launcher', 42 | 'karma-webkit-launcher', 43 | {'middleware:charsets': ['factory', charsets_middleware]} 44 | ], 45 | 46 | // list of files / patterns to load in the browser 47 | files: [ 48 | { pattern: 'test/test*', watched: false }, 49 | {pattern: 'test/data/**', watched: false, included: false, served: true}, 50 | ], 51 | 52 | beforeMiddleware: ['charsets'], 53 | 54 | // list of files / patterns to exclude 55 | exclude: [], 56 | 57 | // preprocess matching files before serving them to the browser 58 | // available preprocessors: https://www.npmjs.com/search?q=keywords:karma-preprocessor 59 | preprocessors: { 60 | 'test/test*': 'webpack' 61 | }, 62 | 63 | webpack: { 64 | resolve: { 65 | extensions: ['', '.js'] 66 | } 67 | }, 68 | 69 | // available reporters: https://www.npmjs.com/search?q=keywords:karma-reporter 70 | reporters: ['mocha'], 71 | 72 | // web server port 73 | port: 9876, 74 | 75 | // enable / disable colors in the output (reporters and logs) 76 | colors: true, 77 | 78 | // level of logging 79 | // possible values: config.LOG_DISABLE || config.LOG_ERROR || config.LOG_WARN || config.LOG_INFO || config.LOG_DEBUG 80 | logLevel: config.LOG_INFO, 81 | 82 | // enable / disable watching file and executing tests whenever any file changes 83 | autoWatch: false, 84 | 85 | customLaunchers: { 86 | ChromeHeadlessCI: { 87 | base: 'ChromeHeadless', 88 | flags: ['--no-sandbox'] 89 | } 90 | }, 91 | browsers: ['ChromeHeadlessCI', 'FirefoxHeadless', 'WebkitHeadless'], 92 | 93 | // Continuous Integration mode 94 | // if true, Karma captures browsers, runs the tests and exits 95 | singleRun: true, 96 | 97 | // Concurrency level 98 | // how many browser instances should be started simultaneously 99 | concurrency: Infinity 100 | }); 101 | }; 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This is a fork of [mozilla-comm/jsmime](https://github.com/mozilla-comm/jsmime) that has been updated to include the changes made in [mozilla/releases-comm-central](https://github.com/mozilla/releases-comm-central/tree/master/mailnews/mime/jsmime) (incl. UTF-7 support). 2 | Further, the library now uses ES6 modules and exposes a user-friendly `parseMail` function. 3 | 4 | ## Code Layout 5 | 6 | JSMime is a MIME parsing and composition library that is written completely in 7 | JavaScript using ES6 functionality and WebAPIs (where such APIs exist). There 8 | are a few features for which a standardized WebAPI does not exist; for these, 9 | external JavaScript libraries are used. 10 | 11 | The MIME parser consists of three logical phases of translation: 12 | 13 | 1. Build the MIME (and pseudo-MIME) tree. 14 | 2. Convert the MIME tree into a list of body parts and attachments. 15 | 3. Use the result to drive a displayed version of the message. 16 | 17 | The first stage is located in `rawMimeParser.js`, the second in `mailParser.js` (in particular, the `parseMail` function). The latter stage is left to the applications. 18 | 19 | ## Install 20 | 21 | ```js 22 | npm i @protontech/jsmimeparser 23 | ``` 24 | 25 | ## Usage 26 | 27 | The `parseMail` function is designed to be user-friendly but remains bare-bones in the sense that it does not add metadata or information that is not found in the original message (e.g. no automatic contentID or checksum generation for the attachments, unlike [Nodemailer's MailParser](https://github.com/nodemailer/mailparser)). 28 | 29 | ```js 30 | import { parseMail } from '@protontech/jsmimeparser'; 31 | 32 | const eml = `Message-Id: <200308210240.h7L2e5A0016623@sphinx.got.net> 33 | Received: from source ([69.9.251.177]) by exprod5mx37.postini.com ... 34 | From: "Bob Example" 35 | To: "Alice Example" 36 | Date: Wed, 20 Aug 2003 16:02:43 -0500 37 | Subject: Test message 38 | MIME-Version: 1.0 39 | Content-Type: multipart/mixed; 40 | boundary="XXXXboundary text" 41 | 42 | This is a multipart message in MIME format. 43 | 44 | --XXXXboundary text 45 | Content-Type: text/plain 46 | 47 | Hello Alice. 48 | This is a test message with 5 lines in the message body 49 | and an attachment. 50 | Your friend, 51 | Bob 52 | --XXXXboundary text 53 | Content-Type: image/gif 54 | Content-Transfer-Encoding: Base64 55 | Content-Disposition: attachment; filename=smile.gif 56 | 57 | R0lGODlhyADIAMIAAP...+lmxwBLZ7FjJNkKsbcbyuGq0vKpH7bO50klqJ7YSmCYn4Yrrn4+elGsurYeoKy67e/ZqrrfogivvvONu4i6B8CJ6L77nguKigD0O7FK+mhhskoZIEhzwJwpjxLCFUy7co8ANH1xwxhY/LIpdIB/qmr6Hhvztfih+XPLKJ6c4HsYtK2ByvShb9UQCADs= 58 | 59 | --XXXXboundary text--` 60 | 61 | const { 62 | attachments, // [{ contentType: 'image/gif', fileName: 'smile.gif', content: Uint8Array[71, 73, 70..], ... }] 63 | body, // { text: 'Hello Alice.\nThis is..', html: '' } 64 | subject, // 'Test message' 65 | from, // // { name: 'Bob Example', email: 'bob@internet.com' } 66 | to, // [{ name: 'Alice Example', email: 'alice@internet.com' }] 67 | date, // Date('Wed, 20 Aug 2003 16:02:43 -0500') 68 | ...rest // headers and more 69 | } = parseMail(eml); 70 | ``` 71 | 72 | See `test/test_mail_parser.ts` for other examples with different MIME messages. Type information can be found in `index.d.ts`. 73 | 74 | Aside from `parseMail`, several lower-level functions are exported by `lib/jsmime` and `lib/mailParser` (mostly unchanged from the original jsmime & mozilla repos). 75 | 76 | ## Testing 77 | Headless Chrome (or Chromium), Firefox and Webkit are used for the tests. 78 | To install any missing browsers automatically, you can run `npx playwright install-deps `. Alternatively, you can install them manually as you normally would on your platform. 79 | If you'd like to test on a subset of browsers, use e.g. `npm test -- --browsers ChromeHeadless,FirefoxHeadless`. 80 | -------------------------------------------------------------------------------- /test/test_structured_header_emitters.js: -------------------------------------------------------------------------------- 1 | 2 | import { assert } from 'chai'; 3 | import { headeremitter } from '../lib/jsmime'; 4 | import { MockDate } from "./utils"; 5 | 6 | function arrayTest(data, fn) { 7 | fn.toString = function() { 8 | let text = Function.prototype.toString.call(this); 9 | text = text.replace(/data\[([0-9]*)\]/g, function(m, p) { 10 | return JSON.stringify(data[p]); 11 | }); 12 | return text; 13 | }; 14 | return it(JSON.stringify(data[0]), fn); 15 | } 16 | 17 | function testHeader(header, tests) { 18 | describe(header, function() { 19 | tests.forEach(function(data) { 20 | arrayTest(data, function() { 21 | assert.deepEqual( 22 | headeremitter.emitStructuredHeader(header, data[0], { 23 | softMargin: 100, 24 | useASCII: true, 25 | }), 26 | (header + ": " + data[1]).trim() + "\r\n" 27 | ); 28 | }); 29 | }); 30 | }); 31 | } 32 | 33 | describe("Structured header emitters", function() { 34 | // Ad-hoc header tests 35 | // TODO: add structured encoder tests for Content-Type when it is added. 36 | 37 | testHeader("Content-Transfer-Encoding", [ 38 | ["", ""], 39 | ["8bit", "8bit"], 40 | ["invalid", "invalid"], 41 | ]); 42 | 43 | // Non-ad-hoc header tests 44 | let addressing_headers = [ 45 | "From", 46 | "To", 47 | "Cc", 48 | "Bcc", 49 | "Sender", 50 | "Reply-To", 51 | "Resent-Bcc", 52 | "Resent-To", 53 | "Resent-From", 54 | "Resent-Cc", 55 | "Resent-Sender", 56 | "Approved", 57 | "Disposition-Notification-To", 58 | "Delivered-To", 59 | "Return-Receipt-To", 60 | "Resent-Reply-To", 61 | "Mail-Reply-To", 62 | "Mail-Followup-To", 63 | ]; 64 | let address_tests = [ 65 | [{ name: "", email: "" }, ""], 66 | [ 67 | { name: "John Doe", email: "john.doe@test.invalid" }, 68 | "John Doe ", 69 | ], 70 | [ 71 | [{ name: "John Doe", email: "john.doe@test.invalid" }], 72 | "John Doe ", 73 | ], 74 | [ 75 | { name: "undisclosed-recipients", group: [] }, 76 | "undisclosed-recipients: ;", 77 | ], 78 | ]; 79 | addressing_headers.forEach(function(header) { 80 | testHeader(header, address_tests); 81 | }); 82 | 83 | let date_headers = [ 84 | "Date", 85 | "Expires", 86 | "Injection-Date", 87 | "NNTP-Posting-Date", 88 | "Resent-Date", 89 | ]; 90 | let date_tests = [ 91 | [ 92 | new MockDate("2012-09-06T08:08:21-0700"), 93 | "Thu, 6 Sep 2012 08:08:21 -0700", 94 | ], 95 | ]; 96 | date_headers.forEach(function(header) { 97 | testHeader(header, date_tests); 98 | }); 99 | 100 | let unstructured_headers = [ 101 | "Comments", 102 | "Content-Description", 103 | "Keywords", 104 | "Subject", 105 | ]; 106 | let unstructured_tests = [ 107 | ["", ""], 108 | ["This is a subject", "This is a subject"], 109 | [ 110 | "\u79c1\u306f\u4ef6\u540d\u5348\u524d", 111 | "=?UTF-8?B?56eB44Gv5Lu25ZCN5Y2I5YmN?=", 112 | ], 113 | ]; 114 | unstructured_headers.forEach(function(header) { 115 | testHeader(header, unstructured_tests); 116 | }); 117 | 118 | it("emitStructuredHeaders", function() { 119 | let headers = new Map(); 120 | headers.set("From", [{ name: "", email: "bugzilla-daemon@mozilla.org" }]); 121 | headers.set("subject", ["[Bug 939557] browsercomps.dll failed to build"]); 122 | headers.set("x-capitalization-test", ["should capitalize"]); 123 | let str = headeremitter.emitStructuredHeaders(headers, {}); 124 | assert.equal( 125 | str, 126 | "From: bugzilla-daemon@mozilla.org\r\n" + 127 | "Subject: [Bug 939557] browsercomps.dll failed to build\r\n" + 128 | "X-Capitalization-Test: should capitalize\r\n" 129 | ); 130 | }); 131 | }); 132 | -------------------------------------------------------------------------------- /docs/RelatedSpecifications.mkd: -------------------------------------------------------------------------------- 1 | Related specifications and MIME extensions 2 | ========================================== 3 | 4 | Properly generating or parsing email and news messages requires referring to a 5 | diverse set of specifications. All of the specifications used to develop jsmime 6 | and to which frequent reference is made in the code are provided here as a list. 7 | Although many specifications have multiple versions, only the newest version of 8 | these specifications are linked to. As jsmime is still in development, some 9 | specifications are not yet consulted for implementation. These are marked with 10 | XXX markers. 11 | 12 | Basic format of bodies: 13 | 14 | * [RFC 2045](http://tools.ietf.org/html/rfc2045): 15 | MIME Part 1, Format of Internet Message Bodies 16 | * [RFC 2046](http://tools.ietf.org/html/rfc2046): 17 | MIME Part 2, Media Types 18 | 19 | Structured header interpretation: 20 | 21 | * [RFC 2047](http://tools.ietf.org/html/rfc2047): 22 | MIME Part 3, Message Header Extensions for Non-ASCII Text 23 | * [RFC 2231](http://tools.ietf.org/html/rfc2231): 24 | MIME Parameter Value and Encoded Word Extensions 25 | * [RFC 5322](http://tools.ietf.org/html/rfc5322): 26 | Internet Message Format 27 | * [RFC 5536](http://tools.ietf.org/html/rfc5536): 28 | Netnews Article Format 29 | * [RFC 6532](http://tools.ietf.org/html/rfc6532): 30 | Internationalized Email Headers 31 | 32 | Body decoding: 33 | * XXX [Uuencode](http://pubs.opengroup.org/onlinepubs/7908799/xcu/uuencode.html) 34 | * XXX [yEnc](http://www.yenc.org/yenc-draft.1.3.txt) 35 | * XXX [TNEF](http://msdn.microsoft.com/en-us/library/cc425498%28v=exchg.80%29.aspx) 36 | * XXX [RFC 3156](http://tools.ietf.org/html/rfc3156) 37 | MIME Security with PGP 38 | * XXX [RFC 4880](http://tools.ietf.org/html/rfc4880) 39 | OpenPGP Message Format 40 | * XXX [RFC 5751](http://tools.ietf.org/html/rfc5751) 41 | S/MIME Version 3.2 Message Format Specification 42 | 43 | Other: 44 | * XXX [RFC 2387](http://tools.ietf.org/html/rfc2387) 45 | The MIME Multipart/Related Content-type 46 | * XXX [RFC 2392](http://tools.ietf.org/html/rfc2392) 47 | Content-ID and Message-ID Uniform Resource Locators 48 | * XXX [RFC 2557](http://tools.ietf.org/html/rfc2557) 49 | MIME Encapsulation of Aggregate Documents, such as HTML (MHTML) 50 | * [RFC 3501](http://tools.ietf.org/html/rfc3501) 51 | IMAP Version 4rev1 (specifically the part numbering section) 52 | * XXX [RFC 3676](http://tools.ietf.org/html/rfc3676) 53 | The Text/Plain Format and DelSp Parameters 54 | * XXX [RFC 3798](http://tools.ietf.org/html/rfc3978) 55 | Message Disposition Notification 56 | 57 | 58 | Willful violations 59 | ------------------ 60 | 61 | An unfortunate consequence of historical developments for email is that the set 62 | of specifications for handling email is often an insufficient guide to actually 63 | working with email in practice. For further aid to prospective implementers, the 64 | following list is a list of all deliberate deviations from specification 65 | requirements. 66 | 67 | * All three line conventions are treated as a CRLF (`\r`, `\n`, `\r\n`). In this 68 | parser, it is possible to use a mixture of line endings in the same file, 69 | although this is highly unlikely to come up in practice. 70 | * MIME and message headers need not be either ASCII or UTF-8, as use of other 71 | character sets without proper wrapping is very common in practice. For more 72 | precise rules on how non-ASCII headers are interpreted, read the comments in 73 | the source code. 74 | * CFWS is permitted in fewer places than the specifications require. This was 75 | done to match other parsers (including the one this replaced, among others). 76 | In particular, the Content-Type parameter needs to be a single run of text, so 77 | `multipart / mixed` would be treated as an invalid type. 78 | * If the first line of a headers block starts with the Berkeley mailbox 79 | delimiter (`From` followed by a space character), it is ignored. 80 | * A `message/rfc822`-like part may be encoded in quoted-printable or base64, 81 | while RFC 6532 only permits this for `message/global`. 82 | * Decoding the `%hh` specifiers (per RFC 2231) happens for any parameter whose 83 | name ends with a `'*'`, regardless of whether or not it is enclosed in a 84 | string or not. 85 | * RFC 2047 encoded-words may contain embedded spaces. 86 | * RFC 2047 decoding tolerates multibyte characters being split between adjacent 87 | encoded-words. 88 | * A quoted string whose contents is a full RFC 2047 encoded-word is decoded. 89 | -------------------------------------------------------------------------------- /lib/utils.js: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * Decode a quoted-printable buffer into a binary string. 4 | * 5 | * @param buffer {BinaryString} The string to decode. 6 | * @returns {Array(BinaryString, BinaryString)} The first element of the array 7 | * is the decoded string. The second element is always the empty 8 | * string. 9 | */ 10 | export function decode_qp(buffer) { 11 | // Unlike base64, quoted-printable isn't stateful across multiple lines, so 12 | // there is no need to buffer input, so we can always ignore more. 13 | let decoded = buffer.replace( 14 | // Replace either = or =CRLF 15 | /=([0-9A-F][0-9A-F]|[ \t]*(\r\n|[\r\n]|$))/gi, 16 | function(match, param) { 17 | // If trailing text matches [ \t]*CRLF, drop everything, since it's a 18 | // soft line break. 19 | if (param.trim().length == 0) { 20 | return ""; 21 | } 22 | return String.fromCharCode(parseInt(param, 16)); 23 | } 24 | ); 25 | return [decoded, ""]; 26 | } 27 | 28 | /** 29 | * Decode a base64 buffer into a binary string. Unlike window.atob, the buffer 30 | * may contain non-base64 characters that will be ignored. 31 | * 32 | * @param buffer {BinaryString} The string to decode. 33 | * @param more {Boolean} If true, we expect that this function could be 34 | * called again and should retain extra data. If 35 | * false, we should flush all pending output. 36 | * @returns {Array(BinaryString, BinaryString)} The first element of the array 37 | * is the decoded string. The second element contains the data that 38 | * could not be decoded and needs to be retained for the next call. 39 | */ 40 | export function decode_base64(buffer, more) { 41 | // Drop all non-base64 characters 42 | let sanitize = buffer.replace(/[^A-Za-z0-9+/=]/g, ""); 43 | // Remove harmful `=' chars in the middle. 44 | sanitize = sanitize.replace(/=+([A-Za-z0-9+/])/g, "$1"); 45 | // We need to encode in groups of 4 chars. If we don't have enough, leave the 46 | // excess for later. If there aren't any more, drop enough to make it 4. 47 | let excess = sanitize.length % 4; 48 | if (excess != 0 && more) { 49 | buffer = sanitize.slice(-excess); 50 | } else { 51 | buffer = ""; 52 | } 53 | sanitize = sanitize.substring(0, sanitize.length - excess); 54 | // Delete all unnecessary '====' in padding. 55 | sanitize = sanitize.replace(/(====)+$/g, ""); 56 | // Use the atob function we (ought to) have in global scope. 57 | return [atob(sanitize), buffer]; 58 | } 59 | 60 | /** 61 | * Converts a binary string into a Uint8Array buffer. 62 | * 63 | * @param buffer {BinaryString} The string to convert. 64 | * @returns {Uint8Array} The converted data. 65 | */ 66 | export function stringToUint8Array(buffer) { 67 | var typedarray = new Uint8Array(buffer.length); 68 | for (var i = 0; i < buffer.length; i++) { 69 | typedarray[i] = buffer.charCodeAt(i); 70 | } 71 | return typedarray; 72 | } 73 | 74 | /** 75 | * Converts a Uint8Array buffer to a binary string. 76 | * 77 | * @param buffer {Uint8Array} The Uint8Array to convert. 78 | * @returns {string} The converted string. 79 | */ 80 | export function uint8ArrayToString(buffer) { 81 | var string = ""; 82 | for (let i = 0; i < buffer.length; i += 100) { 83 | string += String.fromCharCode.apply( 84 | undefined, 85 | buffer.subarray(i, i + 100) 86 | ); 87 | } 88 | return string; 89 | } 90 | 91 | export const base64ToUint8Array = (base64) => stringToUint8Array(atob(base64)); 92 | 93 | /** A list of month names for Date parsing. */ 94 | export const kMonthNames = [ 95 | "Jan", 96 | "Feb", 97 | "Mar", 98 | "Apr", 99 | "May", 100 | "Jun", 101 | "Jul", 102 | "Aug", 103 | "Sep", 104 | "Oct", 105 | "Nov", 106 | "Dec", 107 | ]; 108 | 109 | export function concatUint8Arrays(arrays) { 110 | if (arrays.length === 1) return arrays[0]; 111 | 112 | let totalLength = 0; 113 | for (let i = 0; i < arrays.length; i++) { 114 | if (!(arrays[i] instanceof Uint8Array)) { 115 | throw new Error('concatArrays: Data must be in the form of a Uint8Array'); 116 | } 117 | 118 | totalLength += arrays[i].length; 119 | } 120 | 121 | const result = new Uint8Array(totalLength); 122 | let pos = 0; 123 | arrays.forEach((element) => { 124 | result.set(element, pos); 125 | pos += element.length; 126 | }); 127 | 128 | return result; 129 | } 130 | 131 | export function uint8ArrayToHex(bytes) { 132 | const res = []; 133 | for (let c = 0; c < bytes.length; c++) { 134 | const hex = bytes[c].toString(16); 135 | res.push(hex.length < 2 ? '0' + hex : hex); 136 | } 137 | return res.join(''); 138 | } 139 | -------------------------------------------------------------------------------- /test/utils.js: -------------------------------------------------------------------------------- 1 | /** 2 | * A class which appears to act like the Date class with customizable timezone 3 | * offsets. 4 | * @param {String} iso8601String An ISO-8601 date/time string including a 5 | * timezone offset. 6 | */ 7 | export function MockDate(iso8601String) { 8 | // Find the timezone offset (Z or ±hhmm) from the ISO-8601 date string, and 9 | // then convert that into a number of minutes. 10 | let parse = /\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d(Z|[+-]\d{4})/.exec( 11 | iso8601String 12 | ); 13 | let tzOffsetStr = parse[1]; 14 | if (tzOffsetStr == "Z") { 15 | this._tzOffset = 0; 16 | } else { 17 | this._tzOffset = 18 | parseInt(tzOffsetStr.substring(1, 3)) * 60 + 19 | parseInt(tzOffsetStr.substring(3)); 20 | if (tzOffsetStr[0] == "-") { 21 | this._tzOffset = -this._tzOffset; 22 | } 23 | } 24 | 25 | // To store the offset, we store both the real time in _realDate and a time 26 | // that is offset by the tzOffset in _shiftedDate. Only the getUTC* methods 27 | // should be used on these properties, to avoid problems caused by daylight 28 | // savings time or other timezone effects. This shifting is always legal 29 | // because ES6 is specified to assume that leap seconds do not exist, so there 30 | // are always 60 seconds in a minute. 31 | this._realDate = new Date(iso8601String); 32 | this._shiftedDate = new Date( 33 | this._realDate.getTime() + this._tzOffset * 60 * 1000 34 | ); 35 | } 36 | MockDate.prototype = { 37 | getTimezoneOffset() { 38 | // This property is reversed from how it's defined in ISO 8601, i.e., 39 | // UTC +0100 needs to return -60. 40 | return -this._tzOffset; 41 | }, 42 | getTime() { 43 | return this._realDate.getTime(); 44 | }, 45 | }; 46 | 47 | // Provide an implementation of Date methods that will be need in JSMime. For 48 | // the time being, we only need .get* methods. 49 | for (let name of Object.getOwnPropertyNames(Date.prototype)) { 50 | // Only copy getters, not setters or x.toString. 51 | if (!name.startsWith("get")) { 52 | continue; 53 | } 54 | // No redefining any other names on MockDate. 55 | if (MockDate.prototype.hasOwnProperty(name)) { 56 | continue; 57 | } 58 | 59 | if (name.includes("UTC")) { 60 | // 'name' is already supposed to be freshly bound per newest ES6 drafts, but 61 | // current ES6 implementations reuse the bindings. Until implementations 62 | // catch up, use a new let to bind it freshly. 63 | let boundName = name; 64 | Object.defineProperty(MockDate.prototype, name, { 65 | value(...aArgs) { 66 | return Date.prototype[boundName].call(this._realDate, aArgs); 67 | }, 68 | }); 69 | } else { 70 | let newName = "getUTC" + name.substr(3); 71 | Object.defineProperty(MockDate.prototype, name, { 72 | value(...aArgs) { 73 | return Date.prototype[newName].call(this._shiftedDate, aArgs); 74 | }, 75 | }); 76 | } 77 | } 78 | 79 | 80 | // A file cache for read_file. 81 | const file_cache = {}; 82 | /** 83 | * Read a file into a string (all line endings become CRLF). 84 | * @param file The name of the file to read, relative to the data/ directory. 85 | * @param start The first line of the file to return, defaulting to 0 86 | * @param end The last line of the file to return, defaulting to the number of 87 | * lines in the file. 88 | * @return Promise The contents of the file as a binary string. 89 | */ 90 | export function read_file(file, start = undefined, end = undefined) { 91 | if (!(file in file_cache)) { 92 | var realFile = new Promise(function(resolve, reject) { 93 | fetch('base/test/data/' + file) 94 | .then(response => response.ok ? response.arrayBuffer() : reject(new Error('error fetching file'))) 95 | .then(buffer => { 96 | resolve(new Uint8Array(buffer)) 97 | }) 98 | .catch(err => reject(err)) 99 | }); 100 | var loader = realFile.then(function(contents) { 101 | var inStrForm = ""; 102 | while (contents.length > 0) { 103 | inStrForm += String.fromCharCode.apply( 104 | null, 105 | contents.subarray(0, 1024) 106 | ); 107 | contents = contents.subarray(1024); 108 | } 109 | return inStrForm.split(/\r\n|[\r\n]/); 110 | }); 111 | file_cache[file] = loader; 112 | } 113 | return file_cache[file].then(function(contents) { 114 | if (start !== undefined) { 115 | contents = contents.slice(start - 1, end - 1); 116 | } 117 | return contents.join("\r\n"); 118 | }); 119 | } 120 | 121 | export function read_file_raw(file) { 122 | return new Promise(function(resolve, reject) { 123 | fetch('base/test/data/' + file) 124 | .then(response => response.ok ? response.arrayBuffer() : reject(new Error('error fetching file'))) 125 | .then(buffer => { 126 | resolve(new Uint8Array(buffer)) 127 | }) 128 | .catch(err => reject(err)) 129 | }); 130 | } 131 | 132 | export function isFirefox() { 133 | return navigator.userAgent.toLowerCase().includes('firefox') 134 | } 135 | -------------------------------------------------------------------------------- /lib/structuredHeaders.js: -------------------------------------------------------------------------------- 1 | /** 2 | * This file implements knowledge of how to encode or decode structured headers 3 | * for several key headers. It is not meant to be used externally to jsmime. 4 | */ 5 | 6 | var structuredDecoders = new Map(); 7 | var structuredEncoders = new Map(); 8 | var preferredSpellings = new Map(); 9 | 10 | function addHeader(name, decoder, encoder) { 11 | var lowerName = name.toLowerCase(); 12 | structuredDecoders.set(lowerName, decoder); 13 | structuredEncoders.set(lowerName, encoder); 14 | preferredSpellings.set(lowerName, name); 15 | } 16 | 17 | // Addressing headers: We assume that they can be specified in 1* form (this is 18 | // false for From, but it's close enough to the truth that it shouldn't matter). 19 | // There is no need to specialize the results for the header, so just pun it 20 | // back to parseAddressingHeader. 21 | function parseAddress(value) { 22 | let headerparser = this; 23 | return value.reduce(function(results, header) { 24 | return results.concat(headerparser.parseAddressingHeader(header, true)); 25 | }, []); 26 | } 27 | function writeAddress(value) { 28 | // Make sure the input is an array (accept a single entry) 29 | if (!Array.isArray(value)) { 30 | value = [value]; 31 | } 32 | this.addAddresses(value); 33 | } 34 | 35 | // Addressing headers from RFC 5322: 36 | addHeader("Bcc", parseAddress, writeAddress); 37 | addHeader("Cc", parseAddress, writeAddress); 38 | addHeader("From", parseAddress, writeAddress); 39 | addHeader("Reply-To", parseAddress, writeAddress); 40 | addHeader("Resent-Bcc", parseAddress, writeAddress); 41 | addHeader("Resent-Cc", parseAddress, writeAddress); 42 | addHeader("Resent-From", parseAddress, writeAddress); 43 | addHeader("Resent-Reply-To", parseAddress, writeAddress); 44 | addHeader("Resent-Sender", parseAddress, writeAddress); 45 | addHeader("Resent-To", parseAddress, writeAddress); 46 | addHeader("Sender", parseAddress, writeAddress); 47 | addHeader("To", parseAddress, writeAddress); 48 | // From RFC 5536: 49 | addHeader("Approved", parseAddress, writeAddress); 50 | // From RFC 3798: 51 | addHeader("Disposition-Notification-To", parseAddress, writeAddress); 52 | // Non-standard headers: 53 | addHeader("Delivered-To", parseAddress, writeAddress); 54 | addHeader("Return-Receipt-To", parseAddress, writeAddress); 55 | 56 | // http://cr.yp.to/proto/replyto.html 57 | addHeader("Mail-Reply-To", parseAddress, writeAddress); 58 | addHeader("Mail-Followup-To", parseAddress, writeAddress); 59 | 60 | // Parameter-based headers. Note that all parameters are slightly different, so 61 | // we use slightly different variants here. 62 | function parseParameterHeader(value, do2231, do2047) { 63 | // Only use the first header for parameters; ignore subsequent redefinitions. 64 | return this.parseParameterHeader(value[0], do2231, do2047); 65 | } 66 | 67 | // RFC 2045 68 | function parseContentType(value) { 69 | let params = parseParameterHeader.call(this, value, false, false); 70 | let origtype = params.preSemi; 71 | let parts = origtype.split("/"); 72 | if (parts.length != 2) { 73 | // Malformed. Return to text/plain. Evil, ain't it? 74 | params = new Map(); 75 | parts = ["text", "plain"]; 76 | } 77 | let mediatype = parts[0].toLowerCase(); 78 | let subtype = parts[1].toLowerCase(); 79 | let type = mediatype + "/" + subtype; 80 | let structure = new Map(); 81 | structure.mediatype = mediatype; 82 | structure.subtype = subtype; 83 | structure.type = type; 84 | params.forEach(function(value, name) { 85 | structure.set(name.toLowerCase(), value); 86 | }); 87 | return structure; 88 | } 89 | structuredDecoders.set("Content-Type", parseContentType); 90 | 91 | // Unstructured headers (just decode RFC 2047 for the first header value) 92 | function parseUnstructured(values) { 93 | return this.decodeRFC2047Words(values[0]); 94 | } 95 | function writeUnstructured(value) { 96 | this.addUnstructured(value); 97 | } 98 | 99 | // Message-ID headers. 100 | function parseMessageID(values) { 101 | // TODO: Proper parsing support for these headers is currently unsupported). 102 | return this.decodeRFC2047Words(values[0]); 103 | } 104 | function writeMessageID(value) { 105 | // TODO: Proper parsing support for these headers is currently unsupported). 106 | this.addUnstructured(value); 107 | } 108 | 109 | // RFC 5322 110 | addHeader("Comments", parseUnstructured, writeUnstructured); 111 | addHeader("Keywords", parseUnstructured, writeUnstructured); 112 | addHeader("Subject", parseUnstructured, writeUnstructured); 113 | 114 | // RFC 2045 115 | addHeader("MIME-Version", parseUnstructured, writeUnstructured); 116 | addHeader("Content-Description", parseUnstructured, writeUnstructured); 117 | 118 | // RFC 7231 119 | addHeader("User-Agent", parseUnstructured, writeUnstructured); 120 | 121 | // Date headers 122 | function parseDate(values) { 123 | return this.parseDateHeader(values[0]); 124 | } 125 | function writeDate(value) { 126 | this.addDate(value); 127 | } 128 | 129 | // RFC 5322 130 | addHeader("Date", parseDate, writeDate); 131 | addHeader("Resent-Date", parseDate, writeDate); 132 | // RFC 5536 133 | addHeader("Expires", parseDate, writeDate); 134 | addHeader("Injection-Date", parseDate, writeDate); 135 | addHeader("NNTP-Posting-Date", parseDate, writeDate); 136 | 137 | // RFC 5322 138 | addHeader("Message-ID", parseMessageID, writeMessageID); 139 | addHeader("Resent-Message-ID", parseMessageID, writeMessageID); 140 | 141 | // Miscellaneous headers (those that don't fall under the above schemes): 142 | 143 | // RFC 2047 144 | structuredDecoders.set("Content-Transfer-Encoding", function(values) { 145 | return values[0].toLowerCase(); 146 | }); 147 | structuredEncoders.set("Content-Transfer-Encoding", writeUnstructured); 148 | 149 | // Some clients like outlook.com send non-compliant References headers that 150 | // separate values using commas. Also, some clients don't separate References 151 | // with spaces, since these are optional according to RFC2822. So here we 152 | // preprocess these headers (see bug 1154521 and bug 1197686). 153 | function preprocessMessageIDs(values) { 154 | let msgId = /<[^>]*>/g; 155 | let match, 156 | ids = []; 157 | while ((match = msgId.exec(values)) !== null) { 158 | ids.push(match[0]); 159 | } 160 | return ids.join(" "); 161 | } 162 | structuredDecoders.set("References", preprocessMessageIDs); 163 | structuredDecoders.set("In-Reply-To", preprocessMessageIDs); 164 | 165 | export { 166 | structuredDecoders as decoders, 167 | structuredEncoders as encoders, 168 | preferredSpellings as spellings, 169 | }; 170 | -------------------------------------------------------------------------------- /test/test_mail_parser.ts: -------------------------------------------------------------------------------- 1 | import { expect } from "chai"; 2 | import { read_file_raw } from "./utils"; 3 | import { parseMail } from "../"; 4 | import { uint8ArrayToString } from "../lib/utils"; 5 | 6 | const toBase64 = uInt8Array => btoa(uint8ArrayToString(uInt8Array)); 7 | 8 | describe('mail parser', () => { 9 | it('correctly parses multipart message with both HTML and plain text data', async () => { 10 | const eml = await read_file_raw("multipart-complex1"); 11 | const { body, attachments } = parseMail(eml); 12 | 13 | expect(body.html).to.equal('This part should be returned.\n'); 14 | expect(body.text).to.equal("This part shouldn't.\n\nNeither should this part!\n"); 15 | 16 | expect(toBase64(attachments[0].content)).to.equal('VGhpcyBpc24ndCByZWFsbHkgYW4gYXBwbGljYXRpb24vb2N0ZXQtc3RyZWFtLiA7KQ=='); 17 | expect(attachments[0].contentType).to.equal('application/octet-stream'); 18 | expect(attachments[0].fileName).to.equal(''); 19 | 20 | expect(attachments[1].contentType).to.equal('image/png'); 21 | expect(toBase64(attachments[1].content)).to.equal('TmVpdGhlciBpcyB0aGlzIGFuIGltYWdlL3BuZy4='); 22 | expect(attachments[1].fileName).to.equal(''); 23 | }); 24 | 25 | it('correctly parses UTF-8 string input', async () => { 26 | const eml = `Content-Type: multipart/mixed; 27 | boundary="------------cJMvmFk1NneB7MT4jwYHY7ap" 28 | 29 | This is a multi-part message in MIME format. 30 | --------------cJMvmFk1NneB7MT4jwYHY7ap 31 | Content-Type: text/plain; charset=UTF-8; 32 | Content-Transfer-Encoding: 8bit 33 | 34 | Import HTML cöntäct//Subjεέςτ// 35 | 36 | --------------cJMvmFk1NneB7MT4jwYHY7ap--`; 37 | const { body } = parseMail(eml); 38 | 39 | expect(body.text).to.equal('Import HTML cöntäct//Subjεέςτ//\n'); 40 | }); 41 | 42 | it('correctly parses SHIFT-JIS body with png attachment', async () => { 43 | const expectedText = 'Portable Network Graphics(ポータブル・ネットワーク・グラフィックス、PNG)はコンピュータでビットマップ画像を扱うファイルフォーマットである。圧縮アルゴリズムとしてDeflateを採用している、圧縮による画質の劣化のない可逆圧縮の画像ファイルフォーマットである。\n'; 44 | const expectedAttachmentContent = 'iVBORw0KGgoAAAANSUhEUgAAAIAAAABECAIAAADGJao+AAAAwklEQVR4Xu3UgQbDMBRA0bc03f//b7N0VuqJEmwoc+KqNEkDh9b+2HuJu1KNO4f+AQCAAAAQAAACAEAAAAgAAAEAIAAABACAAAAQAAACAEAAAAgAAAEAIAAAANReamRLlPWYfNH0klxcPs+cP3NxWF+vi3lb7pa2R+vx6tHOtuN1O+a5lY3HzgM5ya/GM5N7ZjfPq7/5yS8IgAAAEAAAAgBAAAAIAAABACAAAAQAgAAAEAAAAgBAAAAIAAABACAAAIw322gDIPvtlmUAAAAASUVORK5CYII='; 45 | 46 | const eml = await read_file_raw("shift-jis-image"); 47 | const { body, subject, headers, attachments: [attachment] } = parseMail(eml); 48 | 49 | expect(body.text).to.equal(expectedText); 50 | expect(subject).to.equal('Shift-JIS and PNG test'); 51 | expect(headers.subject[0]).to.equal('Shift-JIS and PNG test'); 52 | expect(toBase64(attachment.content)).to.equal(expectedAttachmentContent); 53 | expect(attachment.size).to.equal(251); 54 | expect(attachment.contentType).to.equal('image/png'); 55 | expect(attachment.fileName).to.equal(''); 56 | }); 57 | 58 | it('correctly reads binary attachments', async () => { 59 | const eml = await read_file_raw("multipart-binary"); 60 | const { attachments: [attachment] } = parseMail(eml); 61 | 62 | expect(attachment.content).to.deep.equal(new Uint8Array([1, 2, 3])); 63 | expect(attachment.contentType).to.equal('application/octect-stream'); 64 | expect(attachment.fileName).to.equal(''); 65 | }); 66 | 67 | it('includes the content-id and filename for each attachment', async () => { 68 | const eml = await read_file_raw("multipart-content-id"); 69 | const { attachments: [attachment1, attachment2] } = parseMail(eml); 70 | 71 | expect(attachment1.content).to.deep.equal(attachment2.content); 72 | expect(attachment1.contentId).to.equal('<001110.102211@siebel.com>'); 73 | expect(attachment1.contentType).to.equal('image/png'); 74 | expect(attachment1.fileName).to.equal(''); 75 | expect(attachment2.contentType).to.equal('image/png'); 76 | expect(attachment2.fileName).to.equal('test.png'); 77 | }); 78 | 79 | it('returns an empty array for empty attachment body', async () => { 80 | const eml = await read_file_raw("multipart-empty-attachment"); 81 | const { attachments: [attachment] } = parseMail(eml); 82 | 83 | expect(attachment.content).to.be.instanceOf(Uint8Array); 84 | expect(attachment.content).to.have.length(0); 85 | expect(attachment.contentType).to.equal('text/rfc822-headers'); 86 | expect(attachment.fileName).to.equal(''); 87 | }); 88 | 89 | it('returns null for non-existent html body', async () => { 90 | const eml = `Content-Type: multipart/mixed; boundary="------------P7E1gxp6rCvfn0to5n3PZ2h0"; 91 | protected-headers="v1" 92 | From: Sender 93 | To: receiver@test.com 94 | Message-ID: <39b3134c-0fcd-4618-b1bd-2b20481bf2af> 95 | Subject: Empty message test 96 | 97 | --------------P7E1gxp6rCvfn0to5n3PZ2h0 98 | Content-Type: text/plain; charset=UTF-8; format=flowed 99 | Content-Transfer-Encoding: 7bit 100 | 101 | 102 | --------------P7E1gxp6rCvfn0to5n3PZ2h0--` 103 | const { subject, body: { html, text } } = parseMail(eml); 104 | 105 | expect(subject).to.equal('Empty message test'); 106 | expect(text).to.equal(''); 107 | expect(html).to.be.null; 108 | }); 109 | 110 | it('decodes the subject', async () => { 111 | const eml = await read_file_raw("multipart-encrypted-subject-utf8"); 112 | const { subject, body } = parseMail(eml); 113 | 114 | expect(subject).to.equal('subject with emojis 😃😇'); 115 | expect(body.text).to.equal('test utf8 in encrypted subject\n'); 116 | }); 117 | 118 | it('parses addresses and date', async () => { 119 | const eml = await read_file_raw("multipart-addresses"); 120 | const { from, to, cc, bcc, date } = parseMail(eml); 121 | 122 | expect(from).to.deep.equal({ name: 'Some One', email: 'someone@test.com' }); 123 | expect(to).to.deep.equal([{ name: '', email: 'receiver@test.com' }, { name: '', email: 'another_receiver@test.com' }]); 124 | expect(cc).to.deep.equal([{ name: '', email: 'copy@test.com' }]); 125 | expect(bcc).to.be.undefined; 126 | expect(date).to.deep.equal(new Date('Sun, 12 Jun 2022 17:21:02 +0200')); 127 | }); 128 | 129 | it('parses address groups', async () => { 130 | const eml = await read_file_raw("multipart-addresses-groups"); 131 | const { from, to, cc, bcc } = parseMail(eml); 132 | 133 | expect(from).to.deep.equal({ name: 'Some One', email: 'someone@test.com' }); 134 | expect(to).to.deep.equal([{ name: 'undisclosed-recipients', group: [] }]); 135 | expect(cc).to.deep.equal([ 136 | { name: 'Group A', group: [{ name: 'AA', email: 'a@b.com' }, { name: 'AB', email: 'a@b.com' }] }, 137 | { name: 'Group B', group: [{ name: '', email: 'b@b.com' }] } 138 | ]); 139 | expect(bcc).to.be.undefined; 140 | }); 141 | }); 142 | -------------------------------------------------------------------------------- /test/test_structured_headers.js: -------------------------------------------------------------------------------- 1 | import { assert } from "chai"; 2 | import { headerparser } from "../lib/jsmime"; 3 | 4 | function smartDeepEqual(actual, expected) { 5 | assert.deepEqual(actual, expected); 6 | if (actual instanceof Map && expected instanceof Map) { 7 | assert.deepEqual( 8 | Array.from(actual.entries()), 9 | Array.from(expected.entries()) 10 | ); 11 | } 12 | } 13 | 14 | function arrayTest(data, fn) { 15 | fn.toString = function() { 16 | let text = Function.prototype.toString.call(this); 17 | text = text.replace(/data\[([0-9]*)\]/g, function(m, p) { 18 | return JSON.stringify(data[p]); 19 | }); 20 | return text; 21 | }; 22 | return it(data[0].toString(), fn); 23 | } 24 | 25 | function testHeader(header, tests) { 26 | describe(header, function() { 27 | tests.forEach(function(data) { 28 | arrayTest(data, function() { 29 | smartDeepEqual( 30 | headerparser.parseStructuredHeader(header, data[0]), 31 | data[1] 32 | ); 33 | }); 34 | }); 35 | }); 36 | } 37 | 38 | function makeCT(media, sub, params) { 39 | var object = new Map(); 40 | object.mediatype = media; 41 | object.subtype = sub; 42 | object.type = media + "/" + sub; 43 | for (let k in params) { 44 | object.set(k, params[k]); 45 | } 46 | return object; 47 | } 48 | describe("Structured headers", function() { 49 | // Ad-hoc header tests 50 | testHeader("Content-Type", [ 51 | ["text/plain", makeCT("text", "plain", {})], 52 | ["text/html", makeCT("text", "html", {})], 53 | [ 54 | 'text/plain; charset="UTF-8"', 55 | makeCT("text", "plain", { charset: "UTF-8" }), 56 | ], 57 | ["text/", makeCT("text", "", {})], 58 | ["text", makeCT("text", "plain", {})], 59 | ["image/", makeCT("image", "", {})], 60 | ["image", makeCT("text", "plain", {})], 61 | ["hacker/x-mailnews", makeCT("hacker", "x-mailnews", {})], 62 | ["hacker/x-mailnews;", makeCT("hacker", "x-mailnews", {})], 63 | ["HACKER/X-MAILNEWS", makeCT("hacker", "x-mailnews", {})], 64 | [ 65 | "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", 66 | makeCT( 67 | "application", 68 | "vnd.openxmlformats-officedocument.spreadsheetml.sheet", 69 | {} 70 | ), 71 | ], 72 | [ 73 | "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;\r" + 74 | '\n name="Presentation.pptx"', 75 | makeCT( 76 | "application", 77 | "vnd.openxmlformats-officedocument.spreadsheetml.sheet", 78 | { name: "Presentation.pptx" } 79 | ), 80 | ], 81 | ["", makeCT("text", "plain", {})], 82 | [" ", makeCT("text", "plain", {})], 83 | ["text/plain; c", makeCT("text", "plain", {})], 84 | ["text/plain; charset=", makeCT("text", "plain", { charset: "" })], 85 | ['text/plain; charset="', makeCT("text", "plain", { charset: "" })], 86 | ["text\\/enriched", makeCT("text\\", "enriched", {})], 87 | ['multipart/mixed ";" wtf=stupid', makeCT("multipart", "mixed", {})], 88 | [ 89 | "multipart/mixed; wtf=stupid", 90 | makeCT("multipart", "mixed", { wtf: "stupid" }), 91 | ], 92 | [ 93 | "text/plain; CHARSET=Big5", 94 | makeCT("text", "plain", { charset: "Big5" }), 95 | ], 96 | [ 97 | 'text/html; CHARSET="Big5"', 98 | makeCT("text", "html", { charset: "Big5" }), 99 | ], 100 | ['text/html; CHARSET="Big5', makeCT("text", "html", { charset: "Big5" })], 101 | [["text/html", "multipart/mixed"], makeCT("text", "html", {})], 102 | ]); 103 | testHeader("Content-Transfer-Encoding", [ 104 | ["", ""], 105 | ["8bit", "8bit"], 106 | ["8BIT", "8bit"], 107 | ["QuOtEd-PrInTaBlE", "quoted-printable"], 108 | ["Base64", "base64"], 109 | ["7bit", "7bit"], 110 | [["7bit", "8bit"], "7bit"], 111 | ["x-uuencode", "x-uuencode"], 112 | ]); 113 | 114 | // Non-ad-hoc header tests 115 | let addressing_headers = [ 116 | "From", 117 | "To", 118 | "Cc", 119 | "Bcc", 120 | "Sender", 121 | "Reply-To", 122 | "Resent-Bcc", 123 | "Resent-To", 124 | "Resent-From", 125 | "Resent-Cc", 126 | "Resent-Sender", 127 | "Approved", 128 | "Disposition-Notification-To", 129 | "Delivered-To", 130 | "Return-Receipt-To", 131 | "Resent-Reply-To", 132 | "Mail-Reply-To", 133 | "Mail-Followup-To", 134 | ]; 135 | let address_tests = [ 136 | ["", []], 137 | ["a@example.invalid", [{ name: "", email: "a@example.invalid" }]], 138 | [ 139 | "John Doe ", 140 | [{ name: "John Doe", email: "a@example.invalid" }], 141 | ], 142 | [ 143 | "John Doe ", 144 | [{ name: "John Doe", email: "A@EXAMPLE.INVALID" }], 145 | ], 146 | [ 147 | "=?UTF-8?B?5bGx55Sw5aSq6YOO?= ", 148 | [{ name: "\u5c71\u7530\u592a\u90ce", email: "a@example.invalid" }], 149 | ], 150 | [ 151 | "undisclosed-recipients:;", 152 | [{ name: "undisclosed-recipients", group: [] }], 153 | ], 154 | [ 155 | "world: a@example.invalid, b@example.invalid;", 156 | [ 157 | { 158 | name: "world", 159 | group: [ 160 | { name: "", email: "a@example.invalid" }, 161 | { name: "", email: "b@example.invalid" }, 162 | ], 163 | }, 164 | ], 165 | ], 166 | // TODO when we support IDN: 167 | // This should be \u4f8b.invalid instead (Japanese kanji for "example") 168 | [ 169 | "\u5c71\u7530\u592a\u90ce ", 170 | [{ name: "\u5c71\u7530\u592a\u90ce", email: "a@xn--fsq.invalid" }], 171 | ], 172 | [ 173 | "\u5c71\u7530\u592a\u90ce ", 174 | [{ name: "\u5c71\u7530\u592a\u90ce", email: "a@\u4f8b.invalid" }], 175 | ], 176 | [ 177 | "\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb@\u4f8b.invalid", 178 | [ 179 | { 180 | name: "", 181 | email: 182 | "\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb@\u4f8b.invalid", 183 | }, 184 | ], 185 | ], 186 | [ 187 | ["a@example.invalid", "b@example.invalid"], 188 | [ 189 | { name: "", email: "a@example.invalid" }, 190 | { name: "", email: "b@example.invalid" }, 191 | ], 192 | ], 193 | ]; 194 | addressing_headers.forEach(function(header) { 195 | testHeader(header, address_tests); 196 | }); 197 | 198 | let date_headers = [ 199 | "Date", 200 | "Expires", 201 | "Injection-Date", 202 | "NNTP-Posting-Date", 203 | "Resent-Date", 204 | ]; 205 | let date_tests = [ 206 | ["Thu, 06 Sep 2012 08:08:21 -0700", new Date("2012-09-06T08:08:21-0700")], 207 | ["This is so not a date", new Date(NaN)], 208 | ]; 209 | date_headers.forEach(function(header) { 210 | testHeader(header, date_tests); 211 | }); 212 | 213 | let multiple_unstructured_headers = ["In-Reply-To", "References"]; 214 | let multiple_unstructured_tests = [ 215 | ["", ""], 216 | [" ", " "], 217 | 218 | // This test is needed for clients sending non-compliant headers, see bug 1154521 219 | [ 220 | ",,", 221 | " ", 222 | ], 223 | // Test for bug 1197686 224 | [ 225 | "", 226 | " ", 227 | ], 228 | ]; 229 | multiple_unstructured_headers.forEach(function(header) { 230 | testHeader(header, multiple_unstructured_tests); 231 | }); 232 | 233 | let unstructured_headers = [ 234 | "Comments", 235 | "Content-Description", 236 | "Keywords", 237 | "Subject", 238 | ]; 239 | let unstructured_tests = [ 240 | ["", ""], 241 | ["This is a subject", "This is a subject"], 242 | [["Subject 1", "Subject 2"], "Subject 1"], 243 | [ 244 | "=?UTF-8?B?56eB44Gv5Lu25ZCN5Y2I5YmN?=", 245 | "\u79c1\u306f\u4ef6\u540d\u5348\u524d", 246 | ], 247 | ]; 248 | unstructured_headers.forEach(function(header) { 249 | testHeader(header, unstructured_tests); 250 | }); 251 | }); 252 | -------------------------------------------------------------------------------- /test/test_header_emitter.js: -------------------------------------------------------------------------------- 1 | 2 | import { assert } from 'chai'; 3 | import { headeremitter } from '../lib/jsmime'; 4 | 5 | import { MockDate } from './utils'; 6 | 7 | function arrayTest(data, fn) { 8 | fn.toString = function() { 9 | let text = Function.prototype.toString.call(this); 10 | text = text.replace(/data\[([0-9]*)\]/g, function(m, p) { 11 | return JSON.stringify(data[p]); 12 | }); 13 | return text; 14 | }; 15 | return it(JSON.stringify(data[0]), fn); 16 | } 17 | 18 | describe("headeremitter", function() { 19 | describe("addAddresses", function() { 20 | let handler = { 21 | reset(expected) { 22 | this.output = ""; 23 | this.expected = expected; 24 | }, 25 | deliverData(data) { 26 | this.output += data; 27 | }, 28 | deliverEOF() { 29 | assert.equal(this.output, this.expected + "\r\n"); 30 | for (let line of this.output.split("\r\n")) { 31 | assert.ok(line.length <= 30, "Line is too long"); 32 | } 33 | }, 34 | }; 35 | let header_tests = [ 36 | [[{ name: "", email: "" }], ""], 37 | [[{ name: "", email: "a@example.com" }], "a@example.com"], 38 | [ 39 | [{ name: "John Doe", email: "a@example.com" }], 40 | "John Doe ", 41 | ], 42 | [ 43 | [ 44 | { name: "", email: "a@b.c" }, 45 | { name: "", email: "b@b.c" }, 46 | ], 47 | "a@b.c, b@b.c", 48 | ], 49 | [ 50 | [ 51 | { name: "JD", email: "a@a.c" }, 52 | { name: "SD", email: "b@b.c" }, 53 | ], 54 | "JD , SD ", 55 | ], 56 | [ 57 | [ 58 | { name: "John Doe", email: "a@example.com" }, 59 | { name: "Sally Doe", email: "b@example.com" }, 60 | ], 61 | "John Doe ,\r\n Sally Doe ", 62 | ], 63 | [ 64 | [ 65 | { 66 | name: "My name is really long and I split somewhere", 67 | email: "a@a.c", 68 | }, 69 | ], 70 | "My name is really long and I\r\n split somewhere ", 71 | ], 72 | // Note that the name is 29 chars here, so adding the email needs a break. 73 | [ 74 | [{ name: "My name is really really long", email: "a@a.c" }], 75 | "My name is really really long\r\n ", 76 | ], 77 | [ 78 | [ 79 | { name: "", email: "a@a.c" }, 80 | { name: "This name is long", email: "b@b.c" }, 81 | ], 82 | "a@a.c,\r\n This name is long ", 83 | ], 84 | [ 85 | [ 86 | { name: "", email: "a@a.c" }, 87 | { name: "This name is also long", email: "b@b.c" }, 88 | ], 89 | "a@a.c,\r\n This name is also long\r\n ", 90 | ], 91 | [[{ name: "", email: "hi!bad@all.com" }], '"hi!bad"@all.com'], 92 | [[{ name: "", email: '"hi!bad"@all.com' }], '"hi!bad"@all.com'], 93 | [[{ name: "Doe, John", email: "a@a.com" }], '"Doe, John" '], 94 | // This one violates the line length, so it underquotes instead. 95 | [ 96 | [ 97 | { 98 | name: "A really, really long name to quote", 99 | email: "a@example.com", 100 | }, 101 | ], 102 | 'A "really," really long name\r\n to quote ', 103 | ], 104 | [ 105 | [ 106 | { 107 | name: "Group", 108 | group: [ 109 | { name: "", email: "a@a.c" }, 110 | { name: "", email: "b@b.c" }, 111 | ], 112 | }, 113 | ], 114 | "Group: a@a.c, b@b.c;", 115 | ], 116 | [[{ name: "No email address", email: "" }], "No email address"], 117 | [ 118 | [{ name: "]user[ domain", email: "user@d.com" }], 119 | '"]user[ domain" ', 120 | ], 121 | [ 122 | [ 123 | { 124 | name: "Group", 125 | group: [ 126 | { name: "]u[ d", email: "a@a.c" }, 127 | { name: "]u[ c", email: "b@b.c" }, 128 | ], 129 | }, 130 | ], 131 | 'Group: "]u[ d" ,\r\n "]u[ c" ;', 132 | ], 133 | [ 134 | [{ name: "user@domain", email: "user@d.com" }], 135 | '"user@domain" ', 136 | ], 137 | [ 138 | [ 139 | { 140 | name: "Group", 141 | group: [ 142 | { name: "u@d", email: "a@a.c" }, 143 | { name: "u@c", email: "b@b.c" }, 144 | ], 145 | }, 146 | ], 147 | 'Group: "u@d" ,\r\n "u@c" ;', 148 | ], 149 | ]; 150 | header_tests.forEach(function(data) { 151 | arrayTest(data, function() { 152 | let emitter = headeremitter.makeStreamingEmitter(handler, { 153 | softMargin: 30, 154 | useASCII: false, 155 | }); 156 | handler.reset(data[1]); 157 | emitter.addAddresses(data[0]); 158 | emitter.finish(true); 159 | }); 160 | }); 161 | }); 162 | describe("addAddresses (RFC 2047)", function() { 163 | let handler = { 164 | reset(expected) { 165 | this.output = ""; 166 | this.expected = expected; 167 | }, 168 | deliverData(data) { 169 | this.output += data; 170 | }, 171 | deliverEOF() { 172 | assert.equal(this.output, this.expected + "\r\n"); 173 | for (let line of this.output.split("\r\n")) { 174 | assert.ok(line.length <= 30, "Line is too long"); 175 | } 176 | }, 177 | }; 178 | let header_tests = [ 179 | [[{ name: "\u0436", email: "a@a.c" }], "=?UTF-8?B?0LY=?= "], 180 | [ 181 | [{ name: "dioxyg\u00e8ne", email: "a@a.c" }], 182 | "=?UTF-8?Q?dioxyg=C3=A8ne?=\r\n ", 183 | ], 184 | // Prefer QP if base64 and QP are exactly the same length 185 | [ 186 | [{ name: "oxyg\u00e8ne", email: "a@a.c" }], 187 | // =?UTF-8?B?b3h5Z8OobmU=?= 188 | "=?UTF-8?Q?oxyg=C3=A8ne?=\r\n ", 189 | ], 190 | [ 191 | [ 192 | { 193 | name: "\ud83d\udca9\ud83d\udca9\ud83d\udca9\ud83d\udca9", 194 | email: "a@a.c", 195 | }, 196 | ], 197 | "=?UTF-8?B?8J+SqfCfkqnwn5Kp?=\r\n =?UTF-8?B?8J+SqQ==?= ", 198 | ], 199 | // Bug 1088975: Since the encoded-word should be recognized as an atom, 200 | // encode commas. 201 | [ 202 | [{ name: "B\u00fcg 1088975, FirstName", email: "a@b.c" }], 203 | "=?UTF-8?Q?B=C3=BCg_1088975?=\r\n" + 204 | " =?UTF-8?Q?=2C_FirstName?=\r\n ", 205 | ], 206 | ]; 207 | header_tests.forEach(function(data) { 208 | arrayTest(data, function() { 209 | let emitter = headeremitter.makeStreamingEmitter(handler, { 210 | softMargin: 30, 211 | useASCII: true, 212 | }); 213 | handler.reset(data[1]); 214 | emitter.addAddresses(data[0]); 215 | emitter.finish(true); 216 | }); 217 | }); 218 | }); 219 | describe("addUnstructured (RFC 2047)", function() { 220 | let handler = { 221 | reset(expected) { 222 | this.output = ""; 223 | this.expected = expected; 224 | }, 225 | deliverData(data) { 226 | this.output += data; 227 | }, 228 | deliverEOF() { 229 | assert.equal(this.output, this.expected + "\r\n"); 230 | for (let line of this.output.split("\r\n")) { 231 | assert.ok(line.length <= 30, "Line is too long"); 232 | } 233 | }, 234 | }; 235 | let header_tests = [ 236 | ["My house burned down!", "My house burned down!"], 237 | 238 | // Which of the 32 "special" characters need to be encoded in QP encoding? 239 | // Note: Encoding is forced by adding a \x7f at the end. 240 | // These 5 don't need encoding: 241 | [" ! * + - / \x7f", "=?UTF-8?Q?_!_*_+_-_/_=7F?="], 242 | 243 | // Bug 1438590: RFC2047 [5. (3)] requests the 244 | // encoding of these 27 "special" characters: 245 | // " # $ % & ' ( ) , . : ; < = > ? @ [ \ ] ^ _ ` { | } ~. 246 | // Note: If there are enough characters for padding, 247 | // QP is used and not base64. 248 | ['Test " # \x7f', "=?UTF-8?Q?Test_=22_=23_=7F?="], 249 | ["Test $ % \x7f", "=?UTF-8?Q?Test_=24_=25_=7F?="], 250 | ["Test & ' \x7f", "=?UTF-8?Q?Test_=26_=27_=7F?="], 251 | ["Test ( ) \x7f", "=?UTF-8?Q?Test_=28_=29_=7F?="], 252 | ["Test , . \x7f", "=?UTF-8?Q?Test_=2C_=2E_=7F?="], 253 | ["Test : ; \x7f", "=?UTF-8?Q?Test_=3A_=3B_=7F?="], 254 | ["Test < = \x7f", "=?UTF-8?Q?Test_=3C_=3D_=7F?="], 255 | ["Test > ? \x7f", "=?UTF-8?Q?Test_=3E_=3F_=7F?="], 256 | ["Test @ [ \x7f", "=?UTF-8?Q?Test_=40_=5B_=7F?="], 257 | ["Test \\ ] \x7f", "=?UTF-8?Q?Test_=5C_=5D_=7F?="], 258 | ["Test ^ _ \x7f", "=?UTF-8?Q?Test_=5E_=5F_=7F?="], 259 | ["Test ` { \x7f", "=?UTF-8?Q?Test_=60_=7B_=7F?="], 260 | ["Test | } \x7f", "=?UTF-8?Q?Test_=7C_=7D_=7F?="], 261 | ["Test ~ \x7f", "=?UTF-8?Q?Test_=7E_=7F?="], 262 | 263 | // But the 32 printable "special" characters don't need it in the first place! 264 | [ 265 | "! \" # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \\ ] ^ _ ` { | } ~", 266 | "! \" # $ % & ' ( ) * + , - . /\r\n" + 267 | " : ; < = > ? @ [ \\ ] ^ _ ` { |\r\n" + 268 | " } ~", 269 | ], 270 | 271 | // Test to make sure 2047-encoding chooses the right values. 272 | ["\u001f", "=?UTF-8?Q?=1F?="], 273 | ["\u001fa", "=?UTF-8?Q?=1Fa?="], 274 | ["\u001faa", "=?UTF-8?B?H2Fh?="], 275 | ["\u001faaa", "=?UTF-8?Q?=1Faaa?="], 276 | ["\u001faaa\u001f", "=?UTF-8?B?H2FhYR8=?="], 277 | ["\u001faaa\u001fa", "=?UTF-8?B?H2FhYR9h?="], 278 | ["\u001faaa\u001faa", "=?UTF-8?Q?=1Faaa=1Faa?="], 279 | ["\u001faaa\u001faa\u001faaaa", "=?UTF-8?B?H2FhYR9hYR9hYWFh?="], 280 | 281 | // Make sure line breaking works right at the edge cases 282 | ["\u001faaa\u001faaaaaaaaa", "=?UTF-8?Q?=1Faaa=1Faaaaaaaaa?="], 283 | [ 284 | "\u001faaa\u001faaaaaaaaaa", 285 | "=?UTF-8?Q?=1Faaa=1Faaaaaaaaa?=\r\n =?UTF-8?Q?a?=", 286 | ], 287 | 288 | // Choose base64/qp independently for each word 289 | [ 290 | "\ud83d\udca9\ud83d\udca9\ud83d\udca9a", 291 | "=?UTF-8?B?8J+SqfCfkqnwn5Kp?=\r\n =?UTF-8?Q?a?=", 292 | ], 293 | 294 | // Don't split a surrogate character! 295 | [ 296 | "a\ud83d\udca9\ud83d\udca9\ud83d\udca9a", 297 | "=?UTF-8?B?YfCfkqnwn5Kp?=\r\n =?UTF-8?B?8J+SqWE=?=", 298 | ], 299 | 300 | // Spacing a UTF-8 string 301 | [ 302 | "L'oxyg\u00e8ne est un \u00e9l\u00e9ment chimique du groupe des " + 303 | "chalcog\u00e8nes", 304 | // 1 2 3 305 | // 123456789012345678901234567890 306 | "=?UTF-8?B?TCdveHlnw6huZSBl?=\r\n" + 307 | " =?UTF-8?B?c3QgdW4gw6lsw6lt?=\r\n" + 308 | " =?UTF-8?Q?ent_chimique_du_g?=\r\n" + 309 | " =?UTF-8?Q?roupe_des_chalcog?=\r\n" + 310 | " =?UTF-8?B?w6huZXM=?=", 311 | ], 312 | ]; 313 | header_tests.forEach(function(data) { 314 | arrayTest(data, function() { 315 | let emitter = headeremitter.makeStreamingEmitter(handler, { 316 | softMargin: 30, 317 | useASCII: true, 318 | }); 319 | handler.reset(data[1]); 320 | emitter.addUnstructured(data[0]); 321 | emitter.finish(true); 322 | }); 323 | }); 324 | }); 325 | describe("addDate", function() { 326 | let handler = { 327 | reset(expected) { 328 | this.output = ""; 329 | this.expected = expected; 330 | }, 331 | deliverData(data) { 332 | this.output += data; 333 | }, 334 | deliverEOF() { 335 | assert.equal(this.output, this.expected + "\r\n"); 336 | }, 337 | }; 338 | let header_tests = [ 339 | // Test basic day/month names 340 | ["2000-01-01T00:00:00Z", "Sat, 1 Jan 2000 00:00:00 +0000"], 341 | ["2000-02-01T00:00:00Z", "Tue, 1 Feb 2000 00:00:00 +0000"], 342 | ["2000-03-01T00:00:00Z", "Wed, 1 Mar 2000 00:00:00 +0000"], 343 | ["2000-04-01T00:00:00Z", "Sat, 1 Apr 2000 00:00:00 +0000"], 344 | ["2000-05-01T00:00:00Z", "Mon, 1 May 2000 00:00:00 +0000"], 345 | ["2000-06-01T00:00:00Z", "Thu, 1 Jun 2000 00:00:00 +0000"], 346 | ["2000-07-01T00:00:00Z", "Sat, 1 Jul 2000 00:00:00 +0000"], 347 | ["2000-08-01T00:00:00Z", "Tue, 1 Aug 2000 00:00:00 +0000"], 348 | ["2000-09-01T00:00:00Z", "Fri, 1 Sep 2000 00:00:00 +0000"], 349 | ["2000-10-01T00:00:00Z", "Sun, 1 Oct 2000 00:00:00 +0000"], 350 | ["2000-11-01T00:00:00Z", "Wed, 1 Nov 2000 00:00:00 +0000"], 351 | ["2000-12-01T00:00:00Z", "Fri, 1 Dec 2000 00:00:00 +0000"], 352 | 353 | // Test timezone offsets 354 | ["2000-06-01T12:00:00Z", "Thu, 1 Jun 2000 12:00:00 +0000"], 355 | ["2000-06-01T12:00:00+0100", "Thu, 1 Jun 2000 12:00:00 +0100"], 356 | ["2000-06-01T12:00:00+0130", "Thu, 1 Jun 2000 12:00:00 +0130"], 357 | ["2000-06-01T12:00:00-0100", "Thu, 1 Jun 2000 12:00:00 -0100"], 358 | ["2000-06-01T12:00:00-0130", "Thu, 1 Jun 2000 12:00:00 -0130"], 359 | ["2000-06-01T12:00:00+1345", "Thu, 1 Jun 2000 12:00:00 +1345"], 360 | ["2000-06-01T12:00:00-1200", "Thu, 1 Jun 2000 12:00:00 -1200"], 361 | ["2000-06-01T12:00:00+1337", "Thu, 1 Jun 2000 12:00:00 +1337"], 362 | ["2000-06-01T12:00:00+0101", "Thu, 1 Jun 2000 12:00:00 +0101"], 363 | ["2000-06-01T12:00:00-1337", "Thu, 1 Jun 2000 12:00:00 -1337"], 364 | 365 | // Try some varying hour, minute, and second amounts, to double-check 366 | // padding and time dates. 367 | ["2000-06-01T01:02:03Z", "Thu, 1 Jun 2000 01:02:03 +0000"], 368 | ["2000-06-01T23:13:17Z", "Thu, 1 Jun 2000 23:13:17 +0000"], 369 | ["2000-06-01T00:05:04Z", "Thu, 1 Jun 2000 00:05:04 +0000"], 370 | ["2000-06-01T23:59:59Z", "Thu, 1 Jun 2000 23:59:59 +0000"], 371 | ["2000-06-01T13:17:40Z", "Thu, 1 Jun 2000 13:17:40 +0000"], 372 | ["2000-06-01T11:15:34Z", "Thu, 1 Jun 2000 11:15:34 +0000"], 373 | ["2000-06-01T04:09:09Z", "Thu, 1 Jun 2000 04:09:09 +0000"], 374 | ["2000-06-01T04:10:10Z", "Thu, 1 Jun 2000 04:10:10 +0000"], 375 | ["2000-06-01T09:13:17Z", "Thu, 1 Jun 2000 09:13:17 +0000"], 376 | ["2000-06-01T13:12:14Z", "Thu, 1 Jun 2000 13:12:14 +0000"], 377 | ["2000-06-01T14:16:48Z", "Thu, 1 Jun 2000 14:16:48 +0000"], 378 | 379 | // Try varying month, date, and year values. 380 | ["2000-01-31T00:00:00Z", "Mon, 31 Jan 2000 00:00:00 +0000"], 381 | ["2000-02-28T00:00:00Z", "Mon, 28 Feb 2000 00:00:00 +0000"], 382 | ["2000-02-29T00:00:00Z", "Tue, 29 Feb 2000 00:00:00 +0000"], 383 | ["2001-02-28T00:00:00Z", "Wed, 28 Feb 2001 00:00:00 +0000"], 384 | ["2000-03-31T00:00:00Z", "Fri, 31 Mar 2000 00:00:00 +0000"], 385 | ["2000-04-30T00:00:00Z", "Sun, 30 Apr 2000 00:00:00 +0000"], 386 | ["2000-05-31T00:00:00Z", "Wed, 31 May 2000 00:00:00 +0000"], 387 | ["2000-06-30T00:00:00Z", "Fri, 30 Jun 2000 00:00:00 +0000"], 388 | ["2000-07-31T00:00:00Z", "Mon, 31 Jul 2000 00:00:00 +0000"], 389 | ["2000-08-31T00:00:00Z", "Thu, 31 Aug 2000 00:00:00 +0000"], 390 | ["2000-09-30T00:00:00Z", "Sat, 30 Sep 2000 00:00:00 +0000"], 391 | ["2000-10-31T00:00:00Z", "Tue, 31 Oct 2000 00:00:00 +0000"], 392 | ["2000-11-30T00:00:00Z", "Thu, 30 Nov 2000 00:00:00 +0000"], 393 | ["2000-12-31T00:00:00Z", "Sun, 31 Dec 2000 00:00:00 +0000"], 394 | ["1900-01-01T00:00:00Z", "Mon, 1 Jan 1900 00:00:00 +0000"], 395 | ["9999-12-31T23:59:59Z", "Fri, 31 Dec 9999 23:59:59 +0000"], 396 | 397 | // Tests that are not actually missing: 398 | // We don't actually need to test daylight savings time issues, so long as 399 | // getTimezoneOffset is correct. We've confirmed black-box that the value 400 | // is being directly queried on every instance, since we have tests that 401 | // make MockDate.getTimezoneOffset return different values. 402 | // In addition, ES6 Date objects don't support leap seconds. Invalid dates 403 | // per RFC 5322 are handled in a later run of code. 404 | ]; 405 | header_tests.forEach(function(data) { 406 | arrayTest(data, function() { 407 | let emitter = headeremitter.makeStreamingEmitter(handler, {}); 408 | handler.reset(data[1]); 409 | emitter.addDate(new MockDate(data[0])); 410 | emitter.finish(true); 411 | }); 412 | }); 413 | 414 | // An invalid date should throw an error instead of make a malformed header. 415 | it("Invalid dates", function() { 416 | let emitter = headeremitter.makeStreamingEmitter(handler, {}); 417 | assert.throws(function() { 418 | emitter.addDate(new Date(NaN)); 419 | }, /Cannot encode an invalid date/); 420 | assert.throws(function() { 421 | emitter.addDate(new Date("1850-01-01")); 422 | }, /Date year is out of encodable range/); 423 | assert.throws(function() { 424 | emitter.addDate(new Date("10000-01-01")); 425 | }); // Firefox considers the date invalid, Chrome does not, resulting in different error messages 426 | }); 427 | 428 | // Test preferred breaking for the date header. 429 | it("Break spot", function() { 430 | let emitter = headeremitter.makeStreamingEmitter(handler, { 431 | softMargin: 30, 432 | }); 433 | handler.reset("Overly-Long-Date:\r\n Sat, 1 Jan 2000 00:00:00 +0000"); 434 | emitter.addHeaderName("Overly-Long-Date"); 435 | emitter.addDate(new MockDate("2000-01-01T00:00:00Z")); 436 | emitter.finish(); 437 | }); 438 | 439 | it("Correctness of date", function() { 440 | let emitter = headeremitter.makeStreamingEmitter(handler, {}); 441 | handler.reset(); 442 | let now = new Date(); 443 | emitter.addDate(now); 444 | emitter.finish(); 445 | // All engines can parse the date strings we produce 446 | let reparsed = new Date(handler.output); 447 | 448 | // Now and reparsed should be correct to second-level precision. 449 | assert.equal(reparsed.getMilliseconds(), 0); 450 | assert.equal(now.getTime() - now.getMilliseconds(), reparsed.getTime()); 451 | }); 452 | }); 453 | 454 | describe("Header lengths", function() { 455 | let handler = { 456 | reset(expected) { 457 | this.output = ""; 458 | this.expected = expected; 459 | }, 460 | deliverData(data) { 461 | this.output += data; 462 | }, 463 | deliverEOF() { 464 | assert.equal(this.output, this.expected + "\r\n"); 465 | }, 466 | }; 467 | let header_tests = [ 468 | [ 469 | [{ name: "Supercalifragilisticexpialidocious", email: "a@b.c" }], 470 | "Supercalifragilisticexpialidocious\r\n ", 471 | ], 472 | [ 473 | [ 474 | { 475 | email: 476 | "supercalifragilisticexpialidocious@" + 477 | "the.longest.domain.name.in.the.world.invalid", 478 | }, 479 | ], 480 | "supercalifragilisticexpialidocious\r\n" + 481 | " @the.longest.domain.name.in.the.world.invalid", 482 | ], 483 | [ 484 | [ 485 | { 486 | name: 487 | "Lopadotemachoselachogaleokranioleipsanodrimhypotrimmatosilphi" + 488 | "paraomelitokatakechymenokichlepikossyphophattoperisteralektryonoptek" + 489 | "ephalliokigklopeleiolagoiosiraiobaphetraganopterygon", 490 | email: "a@b.c", 491 | }, 492 | ], 493 | new Error(), 494 | ], 495 | ]; 496 | header_tests.forEach(function(data) { 497 | arrayTest(data, function() { 498 | let emitter = headeremitter.makeStreamingEmitter(handler, { 499 | softMargin: 30, 500 | hardMargin: 50, 501 | useASCII: false, 502 | }); 503 | handler.reset(data[1]); 504 | if (data[1] instanceof Error) { 505 | assert.throws(function() { 506 | emitter.addAddresses(data[0]); 507 | }, /Cannot encode/); 508 | } else { 509 | assert.doesNotThrow(function() { 510 | emitter.addAddresses(data[0]); 511 | }); 512 | emitter.finish(true); 513 | } 514 | }); 515 | }); 516 | }); 517 | }); 518 | -------------------------------------------------------------------------------- /lib/mailParser.js: -------------------------------------------------------------------------------- 1 | /* This Source Code Form is subject to the terms of the Mozilla Public 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 | * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 | 5 | import headerParser from './headerParser'; 6 | import RawMimeParser from './rawMimeParser'; 7 | import { concatUint8Arrays, stringToUint8Array, uint8ArrayToString } from './utils'; 8 | 9 | // Emitter helpers, for internal functions later on. 10 | var ExtractMimeMsgEmitter = { 11 | getAttachmentName(part) { 12 | if (!part || !part["headers"]) { 13 | return ""; 14 | } 15 | 16 | if (part.headers["content-disposition"]) { 17 | let filename = MimeParser.getParameter( 18 | part.headers["content-disposition"][0], 19 | "filename" 20 | ); 21 | if (filename) { 22 | return filename; 23 | } 24 | } 25 | 26 | if (part.headers["content-type"]) { 27 | let name = MimeParser.getParameter( 28 | part.headers["content-type"][0], 29 | "name" 30 | ); 31 | if (name) { 32 | return name; 33 | } 34 | } 35 | 36 | return ""; 37 | }, 38 | 39 | // All parts of content-disposition = "attachment" are returned as attachments. 40 | // For content-disposition = "inline", all parts except those with content-type 41 | // text/plain, text/html and text/enriched are returned as attachments. 42 | isAttachment(part) { 43 | if (!part) { 44 | return false; 45 | } 46 | 47 | let contentType = part.contentType || "text/plain"; 48 | if (contentType.search(/^multipart\//i) === 0) { 49 | return false; 50 | } 51 | 52 | let contentDisposition = ""; 53 | if ( 54 | Array.isArray(part.headers["content-disposition"]) && 55 | part.headers["content-disposition"].length > 0 56 | ) { 57 | contentDisposition = part.headers["content-disposition"][0]; 58 | } 59 | 60 | if ( 61 | contentDisposition.search(/^attachment/i) === 0 || 62 | contentType.search(/^text\/plain|^text\/html|^text\/enriched/i) === -1 63 | ) { 64 | return true; 65 | } 66 | 67 | return false; 68 | }, 69 | 70 | /** JSMime API **/ 71 | startMessage() { 72 | this.mimeTree = { 73 | partName: "", 74 | contentType: "message/rfc822", 75 | parts: [], 76 | size: 0, 77 | headers: {}, 78 | rawHeaderText: "", 79 | allAttachments: [], 80 | // keep track of encountered body parts, based on content-type 81 | bodyParts: { text: [], html: [] }, 82 | // No support for encryption. 83 | isEncrypted: false, 84 | }; 85 | // partsPath is a hierarchical stack of parts from the root to the 86 | // current part. 87 | this.partsPath = [this.mimeTree]; 88 | this.options = this.options || {}; 89 | }, 90 | 91 | endMessage() { 92 | // Prepare the mimeMsg object, which is the final output of the emitter. 93 | this.mimeMsg = null; 94 | if (this.mimeTree.parts.length == 0) { 95 | return; 96 | } 97 | 98 | // Check if only a specific mime part has been requested. 99 | if (this.options.getMimePart) { 100 | if (this.mimeTree.parts[0].partName == this.options.getMimePart) { 101 | this.mimeMsg = this.mimeTree.parts[0]; 102 | this.mimeMsg.bodyAsTypedArray = stringToUint8Array( 103 | this.mimeMsg.body 104 | ); 105 | } 106 | return; 107 | } 108 | 109 | this.mimeMsg = this.mimeTree; 110 | }, 111 | 112 | startPart(partNum, headerMap) { 113 | let utf8Encoder = new TextEncoder(); 114 | 115 | let contentType = headerMap.contentType && headerMap.contentType.type 116 | ? headerMap.contentType.type 117 | : "text/plain"; 118 | 119 | let rawHeaderText = headerMap.rawHeaderText; 120 | 121 | let headers = {}; 122 | for (let [headerName, headerValue] of headerMap._rawHeaders) { 123 | // MsgHdrToMimeMessage always returns an array, even for single values. 124 | let valueArray = Array.isArray(headerValue) ? headerValue : [headerValue]; 125 | // Return a binary string, to mimic MsgHdrToMimeMessage. 126 | headers[headerName] = valueArray.map(value => { 127 | let utf8ByteArray = utf8Encoder.encode(value); 128 | return uint8ArrayToString(utf8ByteArray); 129 | }); 130 | } 131 | 132 | // Get the most recent part from the hierarchical parts stack, which is the 133 | // parent of the new part to by added. 134 | let currentPart = this.partsPath[this.partsPath.length - 1]; 135 | 136 | // Add a leading 1 to the partNum. 137 | let partName = "1" + (partNum !== "" ? "." : "") + partNum; 138 | if (partName == "1") { 139 | // MsgHdrToMimeMessage differentiates between the message headers and the 140 | // headers of the first part. jsmime.js however returns all headers of 141 | // the message in the first part. 142 | 143 | // Move rawHeaderText and add the content-* headers back to the new/first 144 | // part. 145 | currentPart.rawHeaderText = rawHeaderText; 146 | rawHeaderText = rawHeaderText 147 | .split(/\n(?![ \t])/) 148 | .filter(h => h.toLowerCase().startsWith("content-")) 149 | .join("\n") 150 | .trim(); 151 | 152 | // Move all headers and add the content-* headers back to the new/first 153 | // part. 154 | currentPart.headers = headers; 155 | headers = Object.fromEntries( 156 | Object.entries(headers).filter(h => h[0].startsWith("content-")) 157 | ); 158 | } 159 | 160 | // Add default content-type header. 161 | if (!headers["content-type"]) { 162 | headers["content-type"] = ["text/plain"]; 163 | } 164 | 165 | let newPart = { 166 | partName, 167 | rawBody: null, // Uint8Array 168 | body: '', // string, coerced based on options 169 | headers, 170 | rawHeaderText, 171 | contentType, 172 | size: 0, 173 | parts: [], 174 | // No support for encryption. 175 | isEncrypted: false, 176 | }; 177 | 178 | // Add nested new part. 179 | currentPart.parts.push(newPart); 180 | // Update the newly added part to be current part. 181 | this.partsPath.push(newPart); 182 | }, 183 | 184 | endPart(partNum) { 185 | let deleteBody = false; 186 | // Get the most recent part from the hierarchical parts stack. 187 | let currentPart = this.partsPath[this.partsPath.length - 1]; 188 | 189 | // Add size. 190 | let size = currentPart.body.length; 191 | currentPart.size += size; 192 | 193 | if (this.isAttachment(currentPart)) { 194 | currentPart.fileName = this.getAttachmentName(currentPart); 195 | const contentDispositionHeader = currentPart.headers["content-disposition"] && currentPart.headers["content-disposition"][0]; 196 | const contentIdHeader = currentPart.headers["content-id"] && currentPart.headers["content-id"][0]; 197 | 198 | // the content-disposition header, as parsed by jsmime, also contains the filename 199 | currentPart.contentDisposition = contentDispositionHeader ? contentDispositionHeader.split(';').shift() : undefined; 200 | currentPart.contentId = contentIdHeader || undefined; 201 | 202 | if (this.options.includeAttachments) { 203 | this.mimeTree.allAttachments.push(currentPart); 204 | } else { 205 | deleteBody = true; 206 | } 207 | } else if (currentPart.rawBody) { 208 | delete currentPart.rawBody; // drop Uint8Array data outside of attachments, to free up memory 209 | 210 | const bodyType = currentPart.contentType || 'text/plain'; 211 | switch(bodyType) { 212 | case 'text/html': 213 | this.mimeTree.bodyParts.html.push(currentPart.body); 214 | break; 215 | case 'text/plain': 216 | this.mimeTree.bodyParts.text.push(currentPart.body); 217 | break; 218 | // no support for rich text 219 | } 220 | } 221 | 222 | if (deleteBody) { 223 | delete currentPart.body; 224 | delete currentPart.rawBody; 225 | } 226 | 227 | // Remove content-disposition and content-transfer-encoding headers. 228 | currentPart.headers = Object.fromEntries( 229 | Object.entries(currentPart.headers).filter( 230 | h => 231 | !["content-disposition", "content-transfer-encoding"].includes(h[0]) 232 | ) 233 | ); 234 | 235 | // Set the parent of this part to be the new current part. 236 | this.partsPath.pop(); 237 | 238 | // Add the size of this part to its parent as well. 239 | currentPart = this.partsPath[this.partsPath.length - 1]; 240 | currentPart.size += size; 241 | }, 242 | 243 | /** 244 | * The data parameter is either a string or a Uint8Array. 245 | */ 246 | deliverPartData(partNum, data, rawData) { 247 | // Get the most recent part from the hierarchical parts stack. 248 | let currentPart = this.partsPath[this.partsPath.length - 1]; 249 | 250 | if (typeof data === "string") { 251 | currentPart.body += data; 252 | } else { 253 | currentPart.body += uint8ArrayToString(data); 254 | } 255 | 256 | // we keep both raw and string data as at this point we do not know whether the part is an attachment 257 | if (currentPart.rawBody === null) { 258 | currentPart.rawBody = rawData; 259 | } else { 260 | currentPart.rawBody = concatUint8Arrays([currentPart.rawBody, rawData]) 261 | } 262 | }, 263 | }; 264 | 265 | var ExtractHeadersEmitter = { 266 | startPart(partNum, headers) { 267 | if (partNum == "") { 268 | this.headers = headers; 269 | } 270 | }, 271 | }; 272 | 273 | var ExtractHeadersAndBodyEmitter = { 274 | body: "", 275 | startPart: ExtractHeadersEmitter.startPart, 276 | deliverPartData(partNum, data) { 277 | if (partNum == "") { 278 | this.body += data; 279 | } 280 | }, 281 | }; 282 | 283 | export const MimeParser = { 284 | /*** 285 | * Determine an arbitrary "parameter" part of a mail header. 286 | * 287 | * @param {string} headerStr - The string containing all parts of the header. 288 | * @param {string} parameter - The parameter we are looking for. 289 | * 290 | * 291 | * 'multipart/signed; protocol="xyz"', 'protocol' --> returns "xyz" 292 | * 293 | * @return {string} String containing the value of the parameter; or "". 294 | */ 295 | 296 | getParameter(headerStr, parameter) { 297 | parameter = parameter.toLowerCase(); 298 | headerStr = headerStr.replace(/[\r\n]+[ \t]+/g, ""); 299 | 300 | let hdrMap = headerParser.parseParameterHeader( 301 | ";" + headerStr, 302 | true, 303 | true 304 | ); 305 | 306 | for (let [key, value] of hdrMap.entries()) { 307 | if (parameter == key.toLowerCase()) { 308 | return value; 309 | } 310 | } 311 | 312 | return ""; 313 | }, 314 | 315 | /** 316 | * Triggers an synchronous parse of the given input. 317 | * 318 | * The input is a string that is immediately parsed, calling all functions on 319 | * the emitter before this function returns. 320 | * 321 | * @param {BinaryString} input A string or input stream of text to parse. 322 | * @param emitter The emitter to receive callbacks on. 323 | * @param opts A set of options for the parser. 324 | */ 325 | parseSync(input, emitter, opts) { 326 | // We only support string parsing if we are trying to do this parse 327 | // synchronously. 328 | if (typeof input != "string") { 329 | throw new Error("input is not a recognizable type!"); 330 | } 331 | var parser = new RawMimeParser(emitter, opts); 332 | parser.deliverData(input); 333 | parser.deliverEOF(); 334 | }, 335 | 336 | /** 337 | * Returns a stream listener that feeds data into a parser. 338 | * 339 | * In addition to the functions on the emitter that the parser may use, the 340 | * generated stream listener will also make calls to onStartRequest and 341 | * onStopRequest on the emitter (if they exist). 342 | * 343 | * @param emitter The emitter to receive callbacks on. 344 | * @param opts A set of options for the parser. 345 | */ 346 | // makeStreamListenerParser(emitter, opts) { 347 | // var StreamListener = { 348 | // onStartRequest(aRequest) { 349 | // try { 350 | // if ("onStartRequest" in emitter) { 351 | // emitter.onStartRequest(aRequest); 352 | // } 353 | // } finally { 354 | // this._parser.resetParser(); 355 | // } 356 | // }, 357 | // onStopRequest(aRequest, aStatus) { 358 | // this._parser.deliverEOF(); 359 | // if ("onStopRequest" in emitter) { 360 | // emitter.onStopRequest(aRequest, aStatus); 361 | // } 362 | // }, 363 | // onDataAvailable(aRequest, aStream, aOffset, aCount) { 364 | // var scriptIn = Cc[ 365 | // "@mozilla.org/scriptableinputstream;1" 366 | // ].createInstance(Ci.nsIScriptableInputStream); 367 | // scriptIn.init(aStream); 368 | // // Use readBytes instead of read to handle embedded NULs properly. 369 | // this._parser.deliverData(scriptIn.readBytes(aCount)); 370 | // }, 371 | // QueryInterface: ChromeUtils.generateQI([ 372 | // "nsIStreamListener", 373 | // "nsIRequestObserver", 374 | // ]), 375 | // }; 376 | // setDefaultParserOptions(opts); 377 | // StreamListener._parser = new RawMimeParser(emitter, opts); 378 | // return StreamListener; 379 | // }, 380 | 381 | /** 382 | * Returns a new raw MIME parser. 383 | * 384 | * Prefer one of the other methods where possible, since the input here must 385 | * be driven manually. 386 | * 387 | * @param emitter The emitter to receive callbacks on. 388 | * @param opts A set of options for the parser. 389 | */ 390 | makeParser(emitter, opts) { 391 | return new RawMimeParser(emitter, opts); 392 | }, 393 | 394 | /** 395 | * Returns a mimeMsg object for the given input. The returned object tries to 396 | * be compatible with the return value of MsgHdrToMimeMessage. Differences: 397 | * - no support for encryption 398 | * - calculated sizes differ slightly 399 | * - allAttachments includes the content and not a URL 400 | * - does not eat TABs in headers, if they follow a CRLF 401 | * 402 | * The input is any type of input that would be accepted by parseSync. 403 | * 404 | * @param {BinaryString} input A string of text to parse. 405 | */ 406 | extractMimeMsg(input, options = {}) { 407 | var emitter = Object.create(ExtractMimeMsgEmitter); 408 | // Set default options. 409 | emitter.options = { 410 | includeAttachments: true, 411 | getMimePart: "", 412 | }; 413 | // Override default options. 414 | for (let option of Object.keys(options)) { 415 | emitter.options[option] = options[option]; 416 | } 417 | 418 | MimeParser.parseSync(input, emitter, { 419 | // jsmime does not use the "1." prefix for the partName. 420 | pruneat: emitter.options.getMimePart 421 | .split(".") 422 | .slice(1) 423 | .join("."), 424 | bodyformat: "decode", 425 | stripcontinuations: true, 426 | strformat: "unicode", 427 | }); 428 | return emitter.mimeMsg; 429 | }, 430 | 431 | /** 432 | * Returns a dictionary of headers for the given input. 433 | * 434 | * The input is any type of input that would be accepted by parseSync. What 435 | * is returned is a JS object that represents the headers of the entire 436 | * envelope as would be received by startPart when partNum is the empty 437 | * string. 438 | * 439 | * @param input A string of text to parse. 440 | */ 441 | extractHeaders(input) { 442 | var emitter = Object.create(ExtractHeadersEmitter); 443 | MimeParser.parseSync(input, emitter, { pruneat: "", bodyformat: "none" }); 444 | return emitter.headers; 445 | }, 446 | 447 | /** 448 | * Returns the headers and body for the given input message. 449 | * 450 | * The return value is an array whose first element is the dictionary of 451 | * headers (as would be returned by extractHeaders) and whose second element 452 | * is a binary string of the entire body of the message. 453 | * 454 | * @param input A string of text to parse. 455 | */ 456 | extractHeadersAndBody(input) { 457 | var emitter = Object.create(ExtractHeadersAndBodyEmitter); 458 | MimeParser.parseSync(input, emitter, { pruneat: "", bodyformat: "raw" }); 459 | return [emitter.headers, emitter.body]; 460 | }, 461 | 462 | // Parameters for parseHeaderField 463 | 464 | /** 465 | * Parse the header as if it were unstructured. 466 | * 467 | * This results in the same string if no other options are specified. If other 468 | * options are specified, this causes the string to be modified appropriately. 469 | */ 470 | HEADER_UNSTRUCTURED: 0x00, 471 | /** 472 | * Parse the header as if it were in the form text; attr=val; attr=val. 473 | * 474 | * Such headers include Content-Type, Content-Disposition, and most other 475 | * headers used by MIME as opposed to messages. 476 | */ 477 | HEADER_PARAMETER: 0x02, 478 | /** 479 | * Parse the header as if it were a sequence of mailboxes. 480 | */ 481 | HEADER_ADDRESS: 0x03, 482 | 483 | /** 484 | * This decodes parameter values according to RFC 2231. 485 | * 486 | * This flag means nothing if HEADER_PARAMETER is not specified. 487 | */ 488 | HEADER_OPTION_DECODE_2231: 0x10, 489 | /** 490 | * This decodes the inline encoded-words that are in RFC 2047. 491 | */ 492 | HEADER_OPTION_DECODE_2047: 0x20, 493 | /** 494 | * This converts the header from a raw string to proper Unicode. 495 | */ 496 | HEADER_OPTION_ALLOW_RAW: 0x40, 497 | 498 | // Convenience for all three of the above. 499 | HEADER_OPTION_ALL_I18N: 0x70, 500 | 501 | /** 502 | * Parse a header field according to the specification given by flags. 503 | * 504 | * Permissible flags begin with one of the HEADER_* flags, which may be or'd 505 | * with any of the HEADER_OPTION_* flags to modify the result appropriately. 506 | * 507 | * If the option HEADER_OPTION_ALLOW_RAW is passed, the charset parameter, if 508 | * present, is the charset to fallback to if the header is not decodable as 509 | * UTF-8 text. If HEADER_OPTION_ALLOW_RAW is passed but the charset parameter 510 | * is not provided, then no fallback decoding will be done. If 511 | * HEADER_OPTION_ALLOW_RAW is not passed, then no attempt will be made to 512 | * convert charsets. 513 | * 514 | * @param text The value of a MIME or message header to parse. 515 | * @param flags A set of flags that controls interpretation of the header. 516 | * @param charset A default charset to assume if no information may be found. 517 | */ 518 | parseHeaderField(text, flags, charset) { 519 | // If we have a raw string, convert it to Unicode first 520 | if (flags & MimeParser.HEADER_OPTION_ALLOW_RAW) { 521 | text = headerParser.convert8BitHeader(text, charset); 522 | } 523 | 524 | // The low 4 bits indicate the type of the header we are parsing. All of the 525 | // higher-order bits are flags. 526 | switch (flags & 0x0f) { 527 | case MimeParser.HEADER_UNSTRUCTURED: 528 | if (flags & MimeParser.HEADER_OPTION_DECODE_2047) { 529 | text = headerParser.decodeRFC2047Words(text); 530 | } 531 | return text; 532 | case MimeParser.HEADER_PARAMETER: 533 | return headerParser.parseParameterHeader( 534 | text, 535 | (flags & MimeParser.HEADER_OPTION_DECODE_2047) != 0, 536 | (flags & MimeParser.HEADER_OPTION_DECODE_2231) != 0 537 | ); 538 | case MimeParser.HEADER_ADDRESS: 539 | return headerParser.parseAddressingHeader( 540 | text, 541 | (flags & MimeParser.HEADER_OPTION_DECODE_2047) != 0 542 | ); 543 | default: 544 | throw new Error("Illegal type of header field"); 545 | } 546 | }, 547 | }; 548 | 549 | /** 550 | * Parse MIME message 551 | * @param {String|Uint8Array} data - MIME message to parse 552 | * @returns {Object} parsed content (see TS definitions for more details) 553 | */ 554 | export function parseMail(data) { 555 | const encoded = (typeof data === 'string') ? new TextEncoder().encode(data) : data; 556 | 557 | const { headers, allAttachments, bodyParts } = MimeParser.extractMimeMsg(uint8ArrayToString(encoded)); 558 | // these fields can only contain a single value 559 | const singleKeys = new Set([ 560 | 'message-id', 561 | 'content-id', 562 | 'from', 563 | 'sender', 564 | 'in-reply-to', 565 | 'reply-to', 566 | 'subject', 567 | 'date', 568 | 'content-disposition', 569 | 'content-type', 570 | 'content-transfer-encoding', 571 | 'priority', 572 | 'mime-version', 573 | 'content-description', 574 | 'precedence', 575 | 'errors-to' 576 | ]); 577 | 578 | const mail = { 579 | headers, 580 | // drop some fields for each attachment. 581 | // also, and convert a `null` rawBody to an empty array (edge-case when passing only the attachment headers as `data`) 582 | attachments: allAttachments.map( 583 | ({ parts, partName, body, isEncrypted, rawBody, ...rest }) => ({ ...rest, content: rawBody || new Uint8Array() }) 584 | ), 585 | // join all body parts and normalise EOL to \n 586 | body: { 587 | html: bodyParts.html.length ? bodyParts.html.join('
\n').replace(/\r?\n/g, '\n') : null, 588 | text: bodyParts.text.length ? bodyParts.text.join('\n').replace(/\r?\n/g, '\n') : null 589 | }, 590 | }; 591 | 592 | // copy some headers into top-level object 593 | ['subject', 'date', 'to', 'from', 'to', 'cc', 'bcc', 'message-id', 'in-reply-to', 'reply-to'].forEach(key => { 594 | if (!headers[key]) return; 595 | const maybeArrayValue = headers[key] && headerParser.parseStructuredHeader(key, headers[key]); 596 | mail[key] = singleKeys.has(key) && Array.isArray(maybeArrayValue) 597 | ? maybeArrayValue[maybeArrayValue.length - 1] 598 | : maybeArrayValue; 599 | }); 600 | 601 | return mail; 602 | } 603 | -------------------------------------------------------------------------------- /test/test_mime_tree.js: -------------------------------------------------------------------------------- 1 | import { assert } from "chai"; 2 | import { read_file } from "./utils"; 3 | import { MimeParser, headerparser } from "../lib/jsmime"; 4 | 5 | function arrayTest(data, fn) { 6 | fn.toString = function() { 7 | let text = Function.prototype.toString.call(this); 8 | text = text.replace(/data\[([0-9]*)\]/g, function(m, p) { 9 | return JSON.stringify(data[p]); 10 | }); 11 | return text; 12 | }; 13 | return it(data[0], fn); 14 | } 15 | 16 | // Returns and deletes object[field] if present, or undefined if not. 17 | function extract_field(object, field) { 18 | if (field in object) { 19 | var result = object[field]; 20 | delete object[field]; 21 | return result; 22 | } 23 | return undefined; 24 | } 25 | 26 | /** 27 | * Helper for body tests. 28 | * 29 | * Some extra options are listed too: 30 | * _split: The contents of the file will be passed in packets split by this 31 | * regex. Be sure to include the split delimiter in a group so that they 32 | * are included in the output packets! 33 | * _eol: The CRLFs in the input file will be replaced with the given line 34 | * ending instead. 35 | * @param test The name of test 36 | * @param file The name of the file to read (relative to mailnews/data) 37 | * @param opts Options for the mime parser, as well as a few extras detailed 38 | * above. 39 | * @param partspec An array of [partnum, line start, line end] detailing the 40 | * expected parts in the body. It will be expected that the 41 | * accumulated body part data for partnum would be the contents 42 | * of the file from [line start, line end) [1-based lines] 43 | */ 44 | function make_body_test(test, file, opts, partspec) { 45 | var results = Promise.all( 46 | partspec.map(p => Promise.all([p[0], read_file(file, p[1], p[2])])) 47 | ); 48 | var eol = extract_field(opts, "_eol"); 49 | var msgtext = read_file(file).then(function(msgcontents) { 50 | var packetize = extract_field(opts, "_split"); 51 | if (packetize !== undefined) { 52 | msgcontents = msgcontents.split(packetize); 53 | } 54 | if (eol !== undefined) { 55 | msgcontents = msgcontents.replace(/\r\n/g, eol); 56 | } 57 | return msgcontents; 58 | }); 59 | if (eol !== undefined) { 60 | results = results.then(function(results_) { 61 | for (let part of results_) { 62 | part[1] = part[1].replace(/\r\n/g, eol); 63 | } 64 | return results_; 65 | }); 66 | } 67 | return [test, msgtext, opts, results]; 68 | } 69 | 70 | /** 71 | * Execute a single MIME tree test. 72 | * 73 | * @param message Either the text of the message, an array of textual message 74 | * part data (imagine coming on different TCP packets), or a 75 | * promise that resolves to any of the above. 76 | * @param opts A set of options for the parser and for the test. 77 | * @param results The expected results of the call. This may either be a 78 | * dictionary of part number -> header -> values (to check 79 | * headers), or an array of [partnum, partdata] for expected 80 | * results to deliverPartData, or a promise for the above. 81 | * @return A promise containing the results of the test. 82 | */ 83 | function testParser(message, opts, results) { 84 | var uncheckedValues; 85 | var checkingHeaders; 86 | var calls = 0; 87 | var fusingParts = extract_field(opts, "_nofuseparts") === undefined; 88 | var emitter = { 89 | stack: [], 90 | startMessage: function emitter_startMsg() { 91 | assert.equal(this.stack.length, 0); 92 | calls++; 93 | this.partData = ""; 94 | }, 95 | endMessage: function emitter_endMsg() { 96 | assert.equal(this.stack.length, 0); 97 | calls++; 98 | }, 99 | startPart: function emitter_startPart(partNum, headers) { 100 | this.stack.push(partNum); 101 | if (checkingHeaders) { 102 | assert.ok(partNum in uncheckedValues); 103 | // Headers is a map, convert it to an object. 104 | var objmap = {}; 105 | for (let pair of headers) { 106 | objmap[pair[0]] = pair[1]; 107 | } 108 | var expected = uncheckedValues[partNum]; 109 | var convresults = {}; 110 | for (let key in expected) { 111 | try { 112 | convresults[key] = headerparser.parseStructuredHeader( 113 | key, 114 | expected[key] 115 | ); 116 | } catch (e) { 117 | convresults[key] = expected[key]; 118 | } 119 | } 120 | assert.deepEqual(objmap, convresults); 121 | if (fusingParts) { 122 | assert.equal(this.partData, ""); 123 | } 124 | delete uncheckedValues[partNum]; 125 | } 126 | }, 127 | deliverPartData: function emitter_partData(partNum, data) { 128 | assert.equal(this.stack[this.stack.length - 1], partNum); 129 | if (!checkingHeaders) { 130 | if (fusingParts) { 131 | this.partData += data; 132 | } else { 133 | let check = uncheckedValues.shift(); 134 | assert.equal(partNum, check[0]); 135 | assert.equal(data, check[1]); 136 | } 137 | } 138 | }, 139 | endPart: function emitter_endPart(partNum) { 140 | if (this.partData != "") { 141 | let check = uncheckedValues.shift(); 142 | assert.equal(partNum, check[0]); 143 | assert.equal(this.partData, check[1]); 144 | this.partData = ""; 145 | } 146 | assert.equal(this.stack.pop(), partNum); 147 | }, 148 | }; 149 | opts.onerror = function(e) { 150 | throw e; 151 | }; 152 | 153 | return Promise.all([message, results]).then(function(vals) { 154 | let [message_, results_] = vals; 155 | // Clone the results array into uncheckedValues 156 | if (Array.isArray(results_)) { 157 | uncheckedValues = Array.from(results_); 158 | checkingHeaders = false; 159 | } else { 160 | uncheckedValues = {}; 161 | for (let key in results_) { 162 | uncheckedValues[key] = results_[key]; 163 | } 164 | checkingHeaders = true; 165 | } 166 | if (!Array.isArray(message_)) { 167 | message_ = [message_]; 168 | } 169 | var parser = new MimeParser(emitter, opts); 170 | message_.forEach(function(packet) { 171 | parser.deliverData(packet); 172 | }); 173 | parser.deliverEOF(); 174 | assert.equal(calls, 2); 175 | if (!checkingHeaders) { 176 | assert.equal(0, uncheckedValues.length); 177 | } else { 178 | assert.deepEqual({}, uncheckedValues); 179 | } 180 | }); 181 | } 182 | 183 | describe("MimeParser", function() { 184 | // This is the expected part specifier for the multipart-complex1 test file, 185 | // specified here because it is used in several cases. 186 | let mpart_complex1 = [ 187 | ["1", 8, 10], 188 | ["2", 14, 16], 189 | ["3.1", 22, 24], 190 | ["4", 29, 31], 191 | ["5", 33, 35], 192 | ]; 193 | 194 | describe("Simple tests", function() { 195 | let parser_tests = [ 196 | // The following tests are either degenerate or error cases that should 197 | // work 198 | ["Empty string", "", {}, { "": {} }], 199 | ["No value for header", "Header", {}, { "": { Header: [""] } }], 200 | [ 201 | "No trailing newline", 202 | "To: eof@example.net", 203 | {}, 204 | { "": { To: ["eof@example.net"] } }, 205 | ], 206 | [ 207 | "Header no val", 208 | "To: eof@example.net\r\n", 209 | {}, 210 | { "": { To: ["eof@example.net"] } }, 211 | ], 212 | ["No body no headers", "\r\n\r\n", {}, { "": {} }], 213 | ["Body no headers", "\r\n\r\nA", {}, { "": {} }], 214 | // Basic cases for headers 215 | [ 216 | "Multiparts get headers", 217 | read_file("multipart-complex1"), 218 | {}, 219 | { 220 | "": { "Content-Type": ['multipart/mixed; boundary="boundary"'] }, 221 | "1": { 222 | "Content-Type": ["application/octet-stream"], 223 | "Content-Transfer-Encoding": ["base64"], 224 | }, 225 | "2": { 226 | "Content-Type": ["image/png"], 227 | "Content-Transfer-Encoding": ["base64"], 228 | }, 229 | "3": { 230 | "Content-Type": ['multipart/related; boundary="boundary2"'], 231 | }, 232 | "3.1": { "Content-Type": ["text/html"] }, 233 | "4": { "Content-Type": ["text/plain"] }, 234 | "5": {}, 235 | }, 236 | ], 237 | ]; 238 | parser_tests.forEach(function(data) { 239 | arrayTest(data, function() { 240 | return testParser(data[1], data[2], data[3]); 241 | }); 242 | }); 243 | }); 244 | 245 | describe("Body tests", function() { 246 | let parser_tests = [ 247 | // Body tests from data 248 | // (Note: line numbers are 1-based. Also, to capture trailing EOF, add 2 249 | // to the last line number of the file). 250 | make_body_test("Basic body", "basic1", {}, [["", 3, 5]]), 251 | make_body_test("Basic multipart", "multipart1", {}, [["1", 10, 12]]), 252 | make_body_test("Basic multipart", "multipart2", {}, [["1", 8, 11]]), 253 | make_body_test( 254 | "Complex multipart", 255 | "multipart-complex1", 256 | {}, 257 | mpart_complex1 258 | ), 259 | make_body_test("Truncated multipart", "multipart-complex2", {}, [ 260 | ["1.1.1.1", 21, 25], 261 | ["2", 27, 57], 262 | ["3", 60, 62], 263 | ]), 264 | make_body_test("No LF multipart", "multipartmalt-detach", {}, [ 265 | ["1", 20, 21], 266 | ["2.1", 27, 38], 267 | ["2.2", 42, 43], 268 | ["2.3", 47, 48], 269 | ["3", 53, 54], 270 | ]), 271 | make_body_test("Raw body", "multipart1", { bodyformat: "raw" }, [ 272 | ["", 4, 14], 273 | ]), 274 | [ 275 | "Base64 decode 1", 276 | read_file("base64-1"), 277 | { bodyformat: "decode" }, 278 | [ 279 | [ 280 | "", 281 | "\r\nHello, world! (Again...)\r\n\r\nLet's see how well base64 " + 282 | "text is handled. Yay, lots of space" + 283 | "s! There's even a CRLF at the end and one at the beginning, bu" + 284 | "t the output shouldn't have it.\r\n", 285 | ], 286 | ], 287 | ], 288 | [ 289 | "Base64 decode 2", 290 | read_file("base64-2"), 291 | { bodyformat: "decode" }, 292 | [ 293 | [ 294 | "", 295 | "This is base64 encoded HTML text, and the tags sho" + 296 | "uldn't be stripped.\r\nBold text is bold!" + 297 | "\r\n", 298 | ], 299 | ], 300 | ], 301 | [ 302 | "Base64 decode line issues", 303 | read_file("base64-2").then(function(s) { 304 | return s.split(/(\r\n)/); 305 | }), 306 | { bodyformat: "decode" }, 307 | [ 308 | [ 309 | "", 310 | "This is base64 encoded HTML text, and the tags sho" + 311 | "uldn't be stripped.\r\nBold text is bold!" + 312 | "\r\n", 313 | ], 314 | ], 315 | ], 316 | make_body_test("Base64 nodecode", "base64-1", {}, [["", 4, 9]]), 317 | [ 318 | "QP decode", 319 | read_file("bug505221"), 320 | { pruneat: "1", bodyformat: "decode" }, 321 | [ 322 | [ 323 | "1", 324 | '\r\n\r\n\r\n\r\n\r\n\r\n bbb\r\n<' + 328 | "/BODY>", 329 | ], 330 | ], 331 | ], 332 | [ 333 | "Nested messages", 334 | read_file("message-encoded"), 335 | { bodyformat: "decode" }, 336 | [ 337 | ["1$", "This is a plain-text message."], 338 | ["2$", "I am a plain-text message."], 339 | ["3$", "I am an encoded plain-text message."], 340 | ], 341 | ], 342 | [ 343 | "Nested message headers", 344 | read_file("message-encoded"), 345 | {}, 346 | { 347 | "": { 348 | "Content-Type": ['multipart/mixed; boundary="iamaboundary"'], 349 | }, 350 | "1": { "Content-Type": ["message/rfc822"] }, 351 | "1$": { Subject: ["I am a subject"] }, 352 | "2": { 353 | "Content-Type": ["message/global"], 354 | "Content-Transfer-Encoding": ["base64"], 355 | }, 356 | "2$": { Subject: ["\u79c1\u306f\u3001\u4ef6\u540d\u5348\u524d"] }, 357 | "3": { 358 | "Content-Type": ["message/news"], 359 | "Content-Transfer-Encoding": ["quoted-printable"], 360 | }, 361 | "3$": { Subject: ["\u79c1\u306f\u3001\u4ef6\u540d\u5348\u524d"] }, 362 | }, 363 | ], 364 | ]; 365 | parser_tests.forEach(function(data) { 366 | arrayTest(data, function() { 367 | return testParser(data[1], data[2], data[3]); 368 | }); 369 | }); 370 | }); 371 | 372 | describe("Torture tests", function() { 373 | // Generate a very long message for tests 374 | let teststr = "a"; 375 | for (let i = 0; i < 16; i++) { 376 | teststr += teststr; 377 | } 378 | let parser_tests = [ 379 | [ 380 | "Base64 very long decode", 381 | "Content-Transfer-Encoding: base64\r\n\r\n" + btoa(teststr) + "\r\n", 382 | { bodyformat: "decode" }, 383 | [["", teststr]], 384 | ], 385 | make_body_test("Torture regular body", "mime-torture", {}, [ 386 | ["1", 17, 21], 387 | ["2$.1", 58, 75], 388 | ["2$.2.1", 83, 97], 389 | ["2$.3", 102, 130], 390 | ["3$", 155, 7742], 391 | ["4", 7747, 8213], 392 | ["5", 8218, 8242], 393 | ["6$.1.1", 8284, 8301], 394 | ["6$.1.2", 8306, 8733], 395 | ["6$.2.1", 8742, 9095], 396 | ["6$.2.2", 9100, 9354], 397 | ["6$.2.3", 9357, 11794], 398 | ["6$.2.4", 11797, 12155], 399 | ["6$.3", 12161, 12809], 400 | ["7$.1", 12844, 12845], 401 | ["7$.2", 12852, 13286], 402 | ["7$.3", 13288, 13297], 403 | ["8$.1", 13331, 13358], 404 | ["8$.2", 13364, 13734], 405 | ["9$", 13757, 20179], 406 | ["10", 20184, 21200], 407 | ["11$.1", 21223, 22031], 408 | ["11$.2", 22036, 22586], 409 | ["12$.1", 22607, 23469], 410 | ["12$.2", 23474, 23774], 411 | ["12$.3$.1", 23787, 23795], 412 | ["12$.3$.2.1", 23803, 23820], 413 | ["12$.3$.2.2", 23825, 24633], 414 | ["12$.3$.3", 24640, 24836], 415 | ["12$.3$.4$", 24848, 25872], 416 | ]), 417 | make_body_test("Torture pruneat", "mime-torture", { pruneat: "4" }, [ 418 | ["4", 7747, 8213], 419 | ]), 420 | 421 | // Test packetization problems 422 | make_body_test( 423 | "Large packets", 424 | "multipart-complex1", 425 | { _split: /(.{30})/ }, 426 | mpart_complex1 427 | ), 428 | make_body_test( 429 | "Split on newline", 430 | "multipart-complex1", 431 | { _split: /(\r\n)/ }, 432 | mpart_complex1 433 | ), 434 | make_body_test( 435 | "Pathological splitting", 436 | "multipart-complex1", 437 | { _split: "" }, 438 | mpart_complex1 439 | ), 440 | 441 | // Non-CLRF line endings? 442 | make_body_test( 443 | "LF-based messages", 444 | "multipart-complex1", 445 | { _eol: "\n" }, 446 | mpart_complex1 447 | ), 448 | make_body_test( 449 | "CR-based messages", 450 | "multipart-complex1", 451 | { _eol: "\r" }, 452 | mpart_complex1 453 | ), 454 | ]; 455 | parser_tests.forEach(function(data) { 456 | arrayTest(data, function() { 457 | return testParser(data[1], data[2], data[3]); 458 | }); 459 | }); 460 | }); 461 | 462 | describe("Header tests", function() { 463 | let parser_tests = [ 464 | // Basic cases for headers 465 | [ 466 | "Multiparts get headers", 467 | read_file("multipart-complex1"), 468 | {}, 469 | { 470 | "": { "Content-Type": ['multipart/mixed; boundary="boundary"'] }, 471 | "1": { 472 | "Content-Type": ["application/octet-stream"], 473 | "Content-Transfer-Encoding": ["base64"], 474 | }, 475 | "2": { 476 | "Content-Type": ["image/png"], 477 | "Content-Transfer-Encoding": ["base64"], 478 | }, 479 | "3": { 480 | "Content-Type": ['multipart/related; boundary="boundary2"'], 481 | }, 482 | "3.1": { "Content-Type": ["text/html"] }, 483 | "4": { "Content-Type": ["text/plain"] }, 484 | "5": {}, 485 | }, 486 | ], 487 | // 'From ' is not an [iterable] header 488 | [ 489 | "Exclude mbox delimiter", 490 | read_file("bugmail11"), 491 | {}, 492 | { 493 | "": { 494 | "X-Mozilla-Status": ["0001"], 495 | "X-Mozilla-Status2": ["00000000"], 496 | "X-Mozilla-Keys": [""], 497 | "Return-Path": [ 498 | "", 499 | "", 500 | ], 501 | "Delivered-To": ["bugmail@example.org"], 502 | Received: [ 503 | "by 10.114.166.12 with SMTP id o12cs163262wae;" + 504 | " Fri, 11 Apr 2008 07:17:31 -0700 (PDT)", 505 | "by 10.115.60.1 with SMTP id n1mr214763wak.181.1207923450166;" + 506 | " Fri, 11 Apr 2008 07:17:30 -0700 (PDT)", 507 | "from webapp-out.mozilla.org (webapp01.sj.mozilla.com [63.245.208.1" + 508 | "46]) by mx.google.com with ESMTP id n38si6807242wag.2.2008." + 509 | "04.11.07.17.29; Fri, 11 Apr 2008 07:17:30 -0700 (PDT)", 510 | "from mrapp51.mozilla.org (mrapp51.mozilla.org [127.0.0.1])" + 511 | "\tby webapp-out.mozilla.org (8.13.8/8.13.8) with ESMTP id m3BEHTGU" + 512 | "030132\tfor ; Fri, 11 Apr 2008 07:17:29 -0700", 513 | "(from root@localhost)" + 514 | "\tby mrapp51.mozilla.org (8.13.8/8.13.8/Submit) id m3BEHTk4030129;" + 515 | "\tFri, 11 Apr 2008 07:17:29 -0700", 516 | ], 517 | "Received-Spf": [ 518 | "neutral (google.com: 63.245.208.146 is neither perm" + 519 | "itted nor denied by best guess record for domain of bugzilla-daemo" + 520 | "n@mozilla.org) client-ip=63.245.208.146;", 521 | ], 522 | "Authentication-Results": [ 523 | "mx.google.com; spf=neutral (google.com: 6" + 524 | "3.245.208.146 is neither permitted nor denied by best guess record" + 525 | " for domain of bugzilla-daemon@mozilla.org) smtp.mail=bugzilla-dae" + 526 | "mon@mozilla.org", 527 | ], 528 | Date: ["Fri, 11 Apr 2008 07:17:29 -0700"], 529 | "Message-ID": [ 530 | "<200804111417.m3BEHTk4030129@mrapp51.mozilla.org>", 531 | ], 532 | From: ["bugzilla-daemon@mozilla.org"], 533 | To: ["bugmail@example.org"], 534 | Subject: ["Bugzilla: confirm account creation"], 535 | "X-Bugzilla-Type": ["admin"], 536 | "Content-Type": ['text/plain; charset="UTF-8"'], 537 | "MIME-Version": ["1.0"], 538 | }, 539 | }, 540 | ], 541 | ]; 542 | parser_tests.forEach(function(data) { 543 | arrayTest(data, function() { 544 | return testParser(data[1], data[2], data[3]); 545 | }); 546 | }); 547 | }); 548 | 549 | describe("Charset tests", function() { 550 | function buildTree(file, options) { 551 | var tree = new Map(); 552 | var emitter = { 553 | startPart(part, headers) { 554 | tree.set(part, { headers, body: null }); 555 | }, 556 | deliverPartData(part, data) { 557 | var obj = tree.get(part); 558 | if (obj.body === null) { 559 | obj.body = data; 560 | } else if (typeof obj.body === "string") { 561 | obj.body += data; 562 | } else { 563 | var newData = new Uint8Array(obj.body.length + data.length); 564 | newData.set(obj.body); 565 | newData.subarray(obj.body.length).set(data); 566 | obj.body = newData; 567 | } 568 | }, 569 | }; 570 | return file.then(function(data) { 571 | var parser = new MimeParser(emitter, options); 572 | parser.deliverData(data); 573 | parser.deliverEOF(); 574 | return tree; 575 | }); 576 | } 577 | it("Unicode decoding", function() { 578 | return buildTree(read_file("shift-jis-image"), { 579 | strformat: "unicode", 580 | bodyformat: "decode", 581 | }).then(function(tree) { 582 | // text/plain should be transcoded... 583 | assert.equal( 584 | tree 585 | .get("1") 586 | .headers.get("Content-Type") 587 | .get("charset"), 588 | "Shift-JIS" 589 | ); 590 | assert.equal(tree.get("1").headers.charset, "Shift-JIS"); 591 | assert.equal( 592 | tree.get("1").headers.get("Content-Description"), 593 | "\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb" 594 | ); 595 | assert.equal( 596 | tree.get("1").body, 597 | "Portable Network Graphics\uff08" + 598 | "\u30dd\u30fc\u30bf\u30d6\u30eb\u30fb\u30cd\u30c3\u30c8\u30ef\u30fc" + 599 | "\u30af\u30fb\u30b0\u30e9\u30d5\u30a3\u30c3\u30af\u30b9\u3001PNG" + 600 | "\uff09\u306f\u30b3\u30f3\u30d4\u30e5\u30fc\u30bf\u3067\u30d3\u30c3" + 601 | "\u30c8\u30de\u30c3\u30d7\u753b\u50cf\u3092\u6271\u3046\u30d5\u30a1" + 602 | "\u30a4\u30eb\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u3067\u3042\u308b" + 603 | "\u3002\u5727\u7e2e\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3068\u3057" + 604 | "\u3066Deflate\u3092\u63a1\u7528\u3057\u3066\u3044\u308b\u3001" + 605 | "\u5727\u7e2e\u306b\u3088\u308b\u753b\u8cea\u306e\u52a3\u5316\u306e" + 606 | "\u306a\u3044\u53ef\u9006\u5727\u7e2e\u306e\u753b\u50cf\u30d5\u30a1" + 607 | "\u30a4\u30eb\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u3067\u3042\u308b" + 608 | "\u3002\r\n" 609 | ); 610 | // ... but not image/png 611 | assert.ok( 612 | !tree 613 | .get("2") 614 | .headers.get("Content-Type") 615 | .has("charset") 616 | ); 617 | assert.equal(tree.get("2").headers.charset, ""); 618 | assert.equal( 619 | tree.get("2").headers.get("Content-Description"), 620 | "\ufffdP\ufffdc\ufffd@\ufffd\ufffd\ufffdR\ufffdA\ufffdg\ufffd\ufffd" 621 | ); 622 | assert.equal( 623 | tree.get("2").headers.getRawHeader("Content-Description"), 624 | "\x83\x50\x83\x63\x83\x40\x83\x8b\x83\x52\x83\x41\x83\x67\x83\x8b" 625 | ); 626 | var imageData = 627 | "iVBORw0KGgoAAAANSUhEUgAAAIAAAABECAIAAADGJao+AAAAwklE" + 628 | "QVR4Xu3UgQbDMBRA0bc03f//b7N0VuqJEmwoc+KqNEkDh9b+2HuJu1KNO4f+AQCAAA" + 629 | "AQAAACAEAAAAgAAAEAIAAABACAAAAQAAACAEAAAAgAAAEAIAAAANReamRLlPWYfNH0" + 630 | "klxcPs+cP3NxWF+vi3lb7pa2R+vx6tHOtuN1O+a5lY3HzgM5ya/GM5N7ZjfPq7/5yS" + 631 | "8IgAAAEAAAAgBAAAAIAAABACAAAAQAgAAAEAAAAgBAAAAIAAABACAAAIw322gDIPvt" + 632 | "lmUAAAAASUVORK5CYII="; 633 | imageData = atob(imageData); 634 | var asArray = new Uint8Array(imageData.length); 635 | for (var i = 0; i < asArray.length; i++) { 636 | asArray[i] = imageData.charCodeAt(i); 637 | } 638 | assert.deepEqual(tree.get("2").body, asArray); 639 | 640 | // Touching the header charset should change the interpretation. 641 | tree.get("1").headers.charset = "Shift-JIS"; 642 | assert.equal(tree.get("1").headers.charset, "Shift-JIS"); 643 | assert.equal( 644 | tree.get("1").headers.get("Content-Description"), 645 | "\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb" 646 | ); 647 | }); 648 | }); 649 | it("Fallback charset decoding", function() { 650 | return buildTree(read_file("shift-jis-image"), { 651 | strformat: "unicode", 652 | charset: "ISO-8859-1", 653 | bodyformat: "decode", 654 | }).then(function(tree) { 655 | // text/plain should be transcoded... 656 | assert.equal( 657 | tree 658 | .get("1") 659 | .headers.get("Content-Type") 660 | .get("charset"), 661 | "Shift-JIS" 662 | ); 663 | assert.equal(tree.get("1").headers.charset, "Shift-JIS"); 664 | assert.equal( 665 | tree.get("1").headers.get("Content-Description"), 666 | "\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb" 667 | ); 668 | assert.equal( 669 | tree.get("1").body, 670 | "Portable Network Graphics\uff08" + 671 | "\u30dd\u30fc\u30bf\u30d6\u30eb\u30fb\u30cd\u30c3\u30c8\u30ef\u30fc" + 672 | "\u30af\u30fb\u30b0\u30e9\u30d5\u30a3\u30c3\u30af\u30b9\u3001PNG" + 673 | "\uff09\u306f\u30b3\u30f3\u30d4\u30e5\u30fc\u30bf\u3067\u30d3\u30c3" + 674 | "\u30c8\u30de\u30c3\u30d7\u753b\u50cf\u3092\u6271\u3046\u30d5\u30a1" + 675 | "\u30a4\u30eb\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u3067\u3042\u308b" + 676 | "\u3002\u5727\u7e2e\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3068\u3057" + 677 | "\u3066Deflate\u3092\u63a1\u7528\u3057\u3066\u3044\u308b\u3001" + 678 | "\u5727\u7e2e\u306b\u3088\u308b\u753b\u8cea\u306e\u52a3\u5316\u306e" + 679 | "\u306a\u3044\u53ef\u9006\u5727\u7e2e\u306e\u753b\u50cf\u30d5\u30a1" + 680 | "\u30a4\u30eb\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u3067\u3042\u308b" + 681 | "\u3002\r\n" 682 | ); 683 | // ... but not image/png 684 | assert.ok( 685 | !tree 686 | .get("2") 687 | .headers.get("Content-Type") 688 | .has("charset") 689 | ); 690 | assert.equal(tree.get("2").headers.charset, "ISO-8859-1"); 691 | assert.equal( 692 | tree.get("2").headers.get("Content-Description"), 693 | "\u0192P\u0192c\u0192@\u0192\u2039\u0192R\u0192A\u0192g\u0192\u2039" 694 | ); 695 | assert.equal( 696 | tree.get("2").headers.getRawHeader("Content-Description"), 697 | "\x83\x50\x83\x63\x83\x40\x83\x8b\x83\x52\x83\x41\x83\x67\x83\x8b" 698 | ); 699 | var imageData = 700 | "iVBORw0KGgoAAAANSUhEUgAAAIAAAABECAIAAADGJao+AAAAwklE" + 701 | "QVR4Xu3UgQbDMBRA0bc03f//b7N0VuqJEmwoc+KqNEkDh9b+2HuJu1KNO4f+AQCAAA" + 702 | "AQAAACAEAAAAgAAAEAIAAABACAAAAQAAACAEAAAAgAAAEAIAAAANReamRLlPWYfNH0" + 703 | "klxcPs+cP3NxWF+vi3lb7pa2R+vx6tHOtuN1O+a5lY3HzgM5ya/GM5N7ZjfPq7/5yS" + 704 | "8IgAAAEAAAAgBAAAAIAAABACAAAAQAgAAAEAAAAgBAAAAIAAABACAAAIw322gDIPvt" + 705 | "lmUAAAAASUVORK5CYII="; 706 | imageData = atob(imageData); 707 | var asArray = new Uint8Array(imageData.length); 708 | for (var i = 0; i < asArray.length; i++) { 709 | asArray[i] = imageData.charCodeAt(i); 710 | } 711 | assert.deepEqual(tree.get("2").body, asArray); 712 | 713 | // Touching the header charset should change the interpretation. 714 | tree.get("1").headers.charset = "Shift-JIS"; 715 | assert.equal(tree.get("1").headers.charset, "Shift-JIS"); 716 | assert.equal( 717 | tree.get("1").headers.get("Content-Description"), 718 | "\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb" 719 | ); 720 | }); 721 | }); 722 | it("Forced charset decoding", function() { 723 | return buildTree(read_file("shift-jis-image"), { 724 | strformat: "unicode", 725 | charset: "ISO-8859-1", 726 | "force-charset": true, 727 | bodyformat: "decode", 728 | }).then(function(tree) { 729 | // text/plain should be transcoded... 730 | assert.equal( 731 | tree 732 | .get("1") 733 | .headers.get("Content-Type") 734 | .get("charset"), 735 | "Shift-JIS" 736 | ); 737 | assert.equal(tree.get("1").headers.charset, "ISO-8859-1"); 738 | assert.equal( 739 | tree.get("1").headers.get("Content-Description"), 740 | "\u0192P\u0192c\u0192@\u0192\u2039\u0192R\u0192A\u0192g\u0192\u2039" 741 | ); 742 | assert.equal( 743 | tree.get("1").body, 744 | "Portable Network Graphics\u0081i" + 745 | "\u0192|\u0081[\u0192^\u0192u\u0192\u2039\u0081E\u0192l\u0192b" + 746 | "\u0192g\u0192\u008f\u0081[\u0192N\u0081E\u0192O\u0192\u2030\u0192t" + 747 | "\u0192B\u0192b\u0192N\u0192X\u0081APNG\u0081j\u201a\u00cd\u0192R" + 748 | "\u0192\u201c\u0192s\u0192\u2026\u0081[\u0192^\u201a\u00c5\u0192r" + 749 | "\u0192b\u0192g\u0192}\u0192b\u0192v\u2030\u00e6\u2018\u0153\u201a" + 750 | "\u00f0\u02c6\u00b5\u201a\u00a4\u0192t\u0192@\u0192C\u0192\u2039" + 751 | "\u0192t\u0192H\u0081[\u0192}\u0192b\u0192g\u201a\u00c5\u201a\u00a0" + 752 | "\u201a\u00e9\u0081B\u02c6\u00b3\u008fk\u0192A\u0192\u2039\u0192S" + 753 | "\u0192\u0160\u0192Y\u0192\u20ac\u201a\u00c6\u201a\u00b5\u201a" + 754 | "\u00c4Deflate\u201a\u00f0\u008d\u00cc\u2014p\u201a\u00b5\u201a" + 755 | "\u00c4\u201a\u00a2\u201a\u00e9\u0081A\u02c6\u00b3\u008fk\u201a" + 756 | "\u00c9\u201a\u00e6\u201a\u00e9\u2030\u00e6\u017d\u00bf\u201a\u00cc" + 757 | "\u2014\u00f2\u2030\u00bb\u201a\u00cc\u201a\u00c8\u201a\u00a2\u2030" + 758 | "\u00c2\u2039t\u02c6\u00b3\u008fk\u201a\u00cc\u2030\u00e6\u2018" + 759 | "\u0153\u0192t\u0192@\u0192C\u0192\u2039\u0192t\u0192H\u0081[\u0192" + 760 | "}\u0192b\u0192g\u201a\u00c5\u201a\u00a0\u201a\u00e9\u0081B\r\n" 761 | ); 762 | // ... but not image/png 763 | assert.ok( 764 | !tree 765 | .get("2") 766 | .headers.get("Content-Type") 767 | .has("charset") 768 | ); 769 | assert.equal(tree.get("2").headers.charset, "ISO-8859-1"); 770 | assert.equal( 771 | tree.get("2").headers.get("Content-Description"), 772 | "\u0192P\u0192c\u0192@\u0192\u2039\u0192R\u0192A\u0192g\u0192\u2039" 773 | ); 774 | assert.equal( 775 | tree.get("2").headers.getRawHeader("Content-Description"), 776 | "\x83\x50\x83\x63\x83\x40\x83\x8b\x83\x52\x83\x41\x83\x67\x83\x8b" 777 | ); 778 | var imageData = 779 | "iVBORw0KGgoAAAANSUhEUgAAAIAAAABECAIAAADGJao+AAAAwklE" + 780 | "QVR4Xu3UgQbDMBRA0bc03f//b7N0VuqJEmwoc+KqNEkDh9b+2HuJu1KNO4f+AQCAAA" + 781 | "AQAAACAEAAAAgAAAEAIAAABACAAAAQAAACAEAAAAgAAAEAIAAAANReamRLlPWYfNH0" + 782 | "klxcPs+cP3NxWF+vi3lb7pa2R+vx6tHOtuN1O+a5lY3HzgM5ya/GM5N7ZjfPq7/5yS" + 783 | "8IgAAAEAAAAgBAAAAIAAABACAAAAQAgAAAEAAAAgBAAAAIAAABACAAAIw322gDIPvt" + 784 | "lmUAAAAASUVORK5CYII="; 785 | imageData = atob(imageData); 786 | var asArray = new Uint8Array(imageData.length); 787 | for (var i = 0; i < asArray.length; i++) { 788 | asArray[i] = imageData.charCodeAt(i); 789 | } 790 | assert.deepEqual(tree.get("2").body, asArray); 791 | 792 | // Touching the header charset should change the interpretation. 793 | tree.get("1").headers.charset = "Shift-JIS"; 794 | assert.equal(tree.get("1").headers.charset, "Shift-JIS"); 795 | assert.equal( 796 | tree.get("1").headers.get("Content-Description"), 797 | "\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb" 798 | ); 799 | }); 800 | }); 801 | it("Charset conversion", function() { 802 | return buildTree(read_file("charsets"), { 803 | strformat: "unicode", 804 | bodyformat: "decode", 805 | }).then(function(tree) { 806 | var numParts = 14; 807 | for (let i = 1; i < numParts; i += 2) { 808 | assert.equal(tree.get("" + i).body, tree.get("" + (i + 1)).body); 809 | } 810 | assert.ok(!tree.has("" + (numParts + 1))); 811 | }); 812 | }); 813 | }); 814 | }); 815 | -------------------------------------------------------------------------------- /lib/headerEmitter.js: -------------------------------------------------------------------------------- 1 | /** 2 | * This module implements the code for emitting structured representations of 3 | * MIME headers into their encoded forms. The code here is a companion to, 4 | * but completely independent of, jsmime.headerparser: the structured 5 | * representations that are used as input to the functions in this file are the 6 | * same forms that would be parsed. 7 | */ 8 | 9 | import { kMonthNames, uint8ArrayToString } from "./utils"; 10 | 11 | // Get the default structured encoders and add them to the map 12 | import { spellings as preferredSpellings, encoders as headerEncoders} from "./structuredHeaders"; 13 | const encoders = new Map(); 14 | for (let [header, encoder] of headerEncoders) { 15 | addStructuredEncoder(header, encoder); 16 | } 17 | 18 | // Clamp a value in the range [min, max], defaulting to def 19 | // if the object[property] does not contain the value. 20 | function clamp(object, property, min, max, def) { 21 | if (!(property in object)) { 22 | return def; 23 | } 24 | let value = object[property]; 25 | if (value < min) { 26 | return min; 27 | } 28 | if (value > max) { 29 | return max; 30 | } 31 | return value; 32 | } 33 | 34 | /** 35 | * An object that can assemble structured header representations into their MIME 36 | * representation. 37 | * 38 | * The character-counting portion of this class operates using individual JS 39 | * characters as its representation of logical character, which is not the same 40 | * as the number of octets used as UTF-8. If non-ASCII characters are to be 41 | * included in headers without some form of encoding, then care should be taken 42 | * to set the maximum line length to account for the mismatch between character 43 | * counts and octet counts: the maximum line is 998 octets, which could be as 44 | * few as 332 JS characters (non-BMP characters, although they take up 4 octets 45 | * in UTF-8, count as 2 in JS strings). 46 | * 47 | * This code takes care to only insert line breaks at the higher-level breaking 48 | * points in a header (as recommended by RFC 5322), but it may need to resort to 49 | * including them more aggressively if this is not possible. If even aggressive 50 | * line-breaking cannot allow a header to be emitted without violating line 51 | * length restrictions, the methods will throw an exception to indicate this 52 | * situation. 53 | * 54 | * In general, this code does not attempt to modify its input; for example, it 55 | * does not attempt to change the case of any input characters, apply any 56 | * Unicode normalization algorithms, or convert email addresses to ACE where 57 | * applicable. The biggest exception to this rule is that most whitespace is 58 | * collapsed to a single space, even in unstructured headers, while most leading 59 | * and trailing whitespace is trimmed from inputs. 60 | * 61 | * @param {StreamHandler} handler The handler to which all output is sent. 62 | * @param {Function(String)} handler.deliverData Receives encoded data. 63 | * @param {Function()} handler.deliverEOF Sent when all text is sent. 64 | * @param {Object} options Options for the emitter. 65 | * @param [options.softMargin=78] {30 <= Integer <= 900} 66 | * The ideal maximum number of logical characters to include in a line, not 67 | * including the final CRLF pair. Lines may exceed this margin if parameters 68 | * are excessively long. 69 | * @param [options.hardMargin=332] {softMargin <= Integer <= 998} 70 | * The maximum number of logical characters that can be included in a line, 71 | * not including the final CRLF pair. If this count would be exceeded, then 72 | * an error will be thrown and encoding will not be possible. 73 | * @param [options.useASCII=true] {Boolean} 74 | * If true, then RFC 2047 and RFC 2231 encoding of headers will be performed 75 | * as needed to retain headers as ASCII. 76 | */ 77 | class HeaderEmitter { 78 | constructor(handler, options) { 79 | // The inferred value of options.useASCII 80 | this._useASCII = options.useASCII === undefined ? true : options.useASCII; 81 | this._sanitizeDate = 82 | options.sanitizeDate === undefined ? false : options.sanitizeDate; 83 | // The handler to use. 84 | this._handler = handler; 85 | /** 86 | * The current line being built; note that we may insert a line break in the 87 | * middle to keep under the maximum line length. 88 | * 89 | * @type String 90 | * @private 91 | */ 92 | this._currentLine = ""; 93 | 94 | // Our bounds for soft and margins are not completely arbitrary. The minimum 95 | // amount we need to encode is 20 characters, which can encode a single 96 | // non-BMP character with RFC 2047. The value of 30 is chosen to give some 97 | // breathing room for delimiters or other unbreakable characters. The maximum 98 | // length is 998 octets, per RFC 5322; soft margins are slightly lower to 99 | // allow for breathing room as well. The default of 78 for the soft margin is 100 | // recommended by RFC 5322. 101 | this._softMargin = clamp(options, "softMargin", 30, 900, 78); 102 | this._hardMargin = clamp( 103 | options, 104 | "hardMargin", 105 | this._softMargin, 106 | 998, 107 | 998 108 | ); 109 | 110 | /** 111 | * The index of the last preferred breakable position in the current line. 112 | * 113 | * @type Integer 114 | * @private 115 | */ 116 | this._preferredBreakpoint = 0; 117 | } 118 | 119 | // Low-level methods 120 | // ----------------- 121 | 122 | // Explanation of the emitter internals: 123 | // RFC 5322 requires that we wrap our lines, ideally at 78 characters and at 124 | // least by 998 octets. We can't wrap in arbitrary places, but wherever CFWS is 125 | // valid... and ideally wherever clients are likely to expect it. In theory, we 126 | // can break between every token (this is how RFC 822 operates), but, in RFC 127 | // 5322, many of those breaks are relegated to obsolete productions, mostly 128 | // because it is common to not properly handle breaks in those locations. 129 | // 130 | // So how do we do line breaking? The algorithm we implement is greedy, to 131 | // simplify implementation. There are two margins: the soft margin, which we 132 | // want to keep within, and the hard margin, which we absolutely have to keep 133 | // within. There are also two kinds of break points: preferred and emergency. 134 | // As long as we keep the line within the hard margin, we will only break at 135 | // preferred breakpoints; emergency breakpoints are only used if we would 136 | // otherwise exceed the hard margin. 137 | // 138 | // For illustration, here is an example header and where these break points are 139 | // located: 140 | // 141 | // To: John "The Rock" Smith 142 | // Preferred: ^ ^ ^ 143 | // Emergency: ^ ^ ^ ^^ ^ ^ ^ ^ ^ 144 | // 145 | // Preferred breakpoints are indicated by setting the mayBreakAfter parameter of 146 | // addText to true, while emergency breakpoints are set after every token passed 147 | // into addText. This is handled implicitly by only adding text to _currentLine 148 | // if it ends in an emergency breakpoint. 149 | // 150 | // Internally, the code keeps track of margins by use of two variables. The 151 | // _softMargin and _hardMargin variables encode the positions at which code must 152 | // absolutely break, and are set up from the initial options parameter. Breaking 153 | // happens when _currentLine.length approaches these values, as mentioned above. 154 | 155 | /** 156 | * Send a header line consisting of the first N characters to the handler. 157 | * 158 | * If the count parameter is missing, then we presume that the current header 159 | * value being emitted is done and therefore we should not send a continuation 160 | * space. Otherwise, we presume that we're still working, so we will send the 161 | * continuation space. 162 | * 163 | * @private 164 | * @param [count] {Integer} The number of characters in the current line to 165 | * include before wrapping. 166 | */ 167 | _commitLine(count) { 168 | let isContinuing = typeof count !== "undefined"; 169 | 170 | // Split at the point, and lop off whitespace immediately before and after. 171 | let firstN, lastN; 172 | if (isContinuing) { 173 | firstN = this._currentLine.slice(0, count).trimRight(); 174 | lastN = this._currentLine.slice(count).trimLeft(); 175 | } else { 176 | firstN = this._currentLine.trimRight(); 177 | lastN = ""; 178 | } 179 | 180 | // Send the line plus the final CRLF. 181 | this._handler.deliverData(firstN + "\r\n"); 182 | 183 | // Fill the start of the line with the new data. 184 | this._currentLine = lastN; 185 | 186 | // If this is a continuation, add an extra space at the beginning of the line. 187 | // Adjust the breakpoint shift amount as well. 188 | if (isContinuing) { 189 | this._currentLine = " " + this._currentLine; 190 | } 191 | 192 | // We will always break at a point at or after the _preferredBreakpoint, if it 193 | // exists, so this always gets reset to 0. 194 | this._preferredBreakpoint = 0; 195 | } 196 | 197 | /** 198 | * Reserve at least length characters in the current line. If there aren't 199 | * enough characters, insert a line break. 200 | * 201 | * @private 202 | * @param length {Integer} The number of characters to reserve space for. 203 | * @return {Boolean} Whether or not there is enough space for length characters. 204 | */ 205 | _reserveTokenSpace(length) { 206 | // We are not going to do a sanity check that length is within the wrap 207 | // margins. The rationale is that this lets code simply call this function to 208 | // force a higher-level line break than normal preferred line breaks (see 209 | // addAddress for an example use). The text that would be added may need to be 210 | // itself broken up, so it might not need all the length anyways, but it 211 | // starts the break already. 212 | 213 | // If we have enough space, we don't need to do anything. 214 | if (this._currentLine.length + length <= this._softMargin) { 215 | return true; 216 | } 217 | 218 | // If we have a preferred breakpoint, commit the line at that point, and see 219 | // if that is sufficient line-breaking. 220 | if (this._preferredBreakpoint > 0) { 221 | this._commitLine(this._preferredBreakpoint); 222 | if (this._currentLine.length + length <= this._softMargin) { 223 | return true; 224 | } 225 | } 226 | 227 | // At this point, we can no longer keep within the soft margin. Let us see if 228 | // we can fit within the hard margin. 229 | if (this._currentLine.length + length <= this._hardMargin) { 230 | return true; 231 | } 232 | 233 | // Adding the text to length would violate the hard margin as well. Break at 234 | // the last emergency breakpoint. 235 | if (this._currentLine.length > 0) { 236 | this._commitLine(this._currentLine.length); 237 | } 238 | 239 | // At this point, if there is still insufficient room in the hard margin, we 240 | // can no longer do anything to encode this word. Bail. 241 | return this._currentLine.length + length <= this._hardMargin; 242 | } 243 | 244 | /** 245 | * Adds a block of text to the current header, inserting a break if necessary. 246 | * If mayBreakAfter is true and text does not end in whitespace, a single space 247 | * character may be added to the output. If the text could not be added without 248 | * violating line length restrictions, an error is thrown instead. 249 | * 250 | * @protected 251 | * @param {String} text The text to add to the output. 252 | * @param {Boolean} mayBreakAfter If true, the end of this text is a preferred 253 | * breakpoint. 254 | */ 255 | addText(text, mayBreakAfter) { 256 | // Try to reserve space for the tokens. If we can't, give up. 257 | if (!this._reserveTokenSpace(text.length)) { 258 | throw new Error("Cannot encode " + text + " due to length."); 259 | } 260 | 261 | this._currentLine += text; 262 | if (mayBreakAfter) { 263 | // Make sure that there is an extra space if text could break afterwards. 264 | this._preferredBreakpoint = this._currentLine.length; 265 | if (text[text.length - 1] != " ") { 266 | this._currentLine += " "; 267 | } 268 | } 269 | } 270 | 271 | /** 272 | * Adds a block of text that may need quoting if it contains some character in 273 | * qchars. If it is already quoted, no quoting will be applied. If the text 274 | * cannot be added without violating maximum line length, an error is thrown 275 | * instead. 276 | * 277 | * @protected 278 | * @param {String} text The text to add to the output. 279 | * @param {String} qchars The set of characters that cannot appear 280 | * outside of a quoted string. 281 | * @param {Boolean} mayBreakAfter If true, the end of this text is a preferred 282 | * breakpoint. 283 | */ 284 | addQuotable( 285 | text, 286 | qchars, 287 | mayBreakAfter 288 | ) { 289 | // No text -> no need to be quoted (prevents strict warning errors). 290 | if (text.length == 0) { 291 | return; 292 | } 293 | 294 | // Figure out if we need to quote the string. Don't quote a string which 295 | // already appears to be quoted. 296 | let needsQuote = false; 297 | 298 | if (!(text[0] == '"' && text[text.length - 1] == '"') && qchars != "") { 299 | for (let i = 0; i < text.length; i++) { 300 | if (qchars.includes(text[i])) { 301 | needsQuote = true; 302 | break; 303 | } 304 | } 305 | } 306 | 307 | if (needsQuote) { 308 | text = '"' + text.replace(/["\\]/g, "\\$&") + '"'; 309 | } 310 | this.addText(text, mayBreakAfter); 311 | } 312 | 313 | /** 314 | * Adds a block of text that corresponds to the phrase production in RFC 5322. 315 | * Such text is a sequence of atoms, quoted-strings, or RFC-2047 encoded-words. 316 | * This method will preprocess input to normalize all space sequences to a 317 | * single space. If the text cannot be added without violating maximum line 318 | * length, an error is thrown instead. 319 | * 320 | * @protected 321 | * @param {String} text The text to add to the output. 322 | * @param {String} qchars The set of characters that cannot appear 323 | * outside of a quoted string. 324 | * @param {Boolean} mayBreakAfter If true, the end of this text is a preferred 325 | * breakpoint. 326 | */ 327 | addPhrase(text, qchars, mayBreakAfter) { 328 | // Collapse all whitespace spans into a single whitespace node. 329 | text = text.replace(/[ \t\r\n]+/g, " "); 330 | 331 | // If we have non-ASCII text, encode it using RFC 2047. 332 | if (this._useASCII && nonAsciiRe.test(text)) { 333 | this.encodeRFC2047Phrase(text, mayBreakAfter); 334 | return; 335 | } 336 | 337 | // If quoting the entire string at once could fit in the line length, then do 338 | // so. The check here is very loose, but this will inform is if we are going 339 | // to definitely overrun the soft margin. 340 | if (this._currentLine.length + text.length < this._softMargin) { 341 | try { 342 | this.addQuotable(text, qchars, mayBreakAfter); 343 | // If we don't have a breakpoint, and the text is encoded as a sequence of 344 | // atoms (and not a quoted-string), then make the last space we added a 345 | // breakpoint, regardless of the mayBreakAfter setting. 346 | if (this._preferredBreakpoint == 0 && text.includes(" ")) { 347 | if (this._currentLine[this._currentLine.length - 1] != '"') { 348 | this._preferredBreakpoint = this._currentLine.lastIndexOf(" "); 349 | } 350 | } 351 | return; 352 | } catch (e) { 353 | // If we get an error at this point, we failed to add the quoted string 354 | // because the string was too long. Fall through to the case where we know 355 | // that the input was too long to begin with. 356 | } 357 | } 358 | 359 | // If the text is too long, split the quotable string at space boundaries and 360 | // add each word individually. If we still can't add all those words, there is 361 | // nothing that we can do. 362 | let words = text.split(" "); 363 | for (let i = 0; i < words.length; i++) { 364 | this.addQuotable( 365 | words[i], 366 | qchars, 367 | i == words.length - 1 ? mayBreakAfter : true 368 | ); 369 | } 370 | } 371 | 372 | /** 373 | * Add a block of text as a single RFC 2047 encoded word. This does not try to 374 | * split words if they are too long. 375 | * 376 | * @private 377 | * @param {Uint8Array} encodedText The octets to encode. 378 | * @param {Boolean} useQP If true, use quoted-printable; if false, 379 | * use base64. 380 | * @param {Boolean} mayBreakAfter If true, the end of this text is a 381 | * preferred breakpoint. 382 | */ 383 | _addRFC2047Word( 384 | encodedText, 385 | useQP, 386 | mayBreakAfter 387 | ) { 388 | let binaryString = uint8ArrayToString(encodedText); 389 | let token; 390 | if (useQP) { 391 | token = qpPrelude; 392 | for (let i = 0; i < encodedText.length; i++) { 393 | if ( 394 | encodedText[i] < 0x20 || 395 | encodedText[i] >= 0x7f || 396 | qpForbidden.includes(binaryString[i]) 397 | ) { 398 | let ch = encodedText[i]; 399 | token += "=" + hexString[(ch & 0xf0) >> 4] + hexString[ch & 0x0f]; 400 | } else if (binaryString[i] == " ") { 401 | token += "_"; 402 | } else { 403 | token += binaryString[i]; 404 | } 405 | } 406 | token += "?="; 407 | } else { 408 | token = b64Prelude + btoa(binaryString) + "?="; 409 | } 410 | this.addText(token, mayBreakAfter); 411 | } 412 | 413 | /** 414 | * Add a block of text as potentially several RFC 2047 encoded-word tokens. 415 | * 416 | * @protected 417 | * @param {String} text The text to add to the output. 418 | * @param {Boolean} mayBreakAfter If true, the end of this text is a preferred 419 | * breakpoint. 420 | */ 421 | encodeRFC2047Phrase( 422 | text, 423 | mayBreakAfter 424 | ) { 425 | // Start by encoding the text into UTF-8 directly. 426 | let encodedText = new TextEncoder("UTF-8").encode(text); 427 | 428 | // Make sure there's enough room for a single token. 429 | let minLineLen = b64Prelude.length + 10; // Eight base64 characters plus ?= 430 | if (!this._reserveTokenSpace(minLineLen)) { 431 | this._commitLine(this._currentLine.length); 432 | } 433 | 434 | // Try to encode as much UTF-8 text as possible in each go. 435 | let b64Len = 0, 436 | qpLen = 0, 437 | start = 0; 438 | let maxChars = 439 | this._softMargin - this._currentLine.length - (b64Prelude.length + 2); 440 | for (let i = 0; i < encodedText.length; i++) { 441 | let b64Inc = 0, 442 | qpInc = 0; 443 | // The length we need for base64 is ceil(length / 3) * 4... 444 | if ((i - start) % 3 == 0) { 445 | b64Inc += 4; 446 | } 447 | 448 | // The length for quoted-printable is 3 chars only if encoded 449 | if ( 450 | encodedText[i] < 0x20 || 451 | encodedText[i] >= 0x7f || 452 | qpForbidden.includes(String.fromCharCode(encodedText[i])) 453 | ) { 454 | qpInc = 3; 455 | } else { 456 | qpInc = 1; 457 | } 458 | 459 | if (b64Len + b64Inc > maxChars && qpLen + qpInc > maxChars) { 460 | // Oops, we have too many characters! We need to encode everything through 461 | // the current character. However, we can't split in the middle of a 462 | // multibyte character. In UTF-8, characters that start with 10xx xxxx are 463 | // the middle of multibyte characters, so backtrack until the start 464 | // character is legal. 465 | while ((encodedText[i] & 0xc0) == 0x80) { 466 | --i; 467 | } 468 | 469 | // Add this part of the word and then make a continuation. 470 | this._addRFC2047Word( 471 | encodedText.subarray(start, i), 472 | b64Len >= qpLen, 473 | true 474 | ); 475 | 476 | // Reset the array for parsing. 477 | start = i; 478 | --i; // Reparse this character as well 479 | b64Len = qpLen = 0; 480 | maxChars = this._softMargin - b64Prelude.length - 3; 481 | } else { 482 | // Add the counts for the current variable to the count to encode. 483 | b64Len += b64Inc; 484 | qpLen += qpInc; 485 | } 486 | } 487 | 488 | // Add the entire array at this point. 489 | this._addRFC2047Word( 490 | encodedText.subarray(start), 491 | b64Len >= qpLen, 492 | mayBreakAfter 493 | ); 494 | } 495 | 496 | // High-level methods 497 | // ------------------ 498 | 499 | /** 500 | * Add the header name, with the colon and trailing space, to the output. 501 | * 502 | * @public 503 | * @param {String} name The name of the header. 504 | */ 505 | addHeaderName(name) { 506 | this._currentLine = this._currentLine.trimRight(); 507 | if (this._currentLine.length > 0) { 508 | this._commitLine(); 509 | } 510 | this.addText(name + ": ", false); 511 | } 512 | 513 | /** 514 | * Add a header and its structured value to the output. 515 | * 516 | * The name can be any case-insensitive variant of a known structured header; 517 | * the output will include the preferred name of the structure instead of the 518 | * case put into the name. If no structured encoder can be found, and the input 519 | * value is a string, then the header is assumed to be unstructured and the 520 | * value is added as if {@link addUnstructured} were called. 521 | * 522 | * @public 523 | * @param {String} name The name of the header. 524 | * @param value The structured value of the header. 525 | */ 526 | addStructuredHeader(name, value) { 527 | let lowerName = name.toLowerCase(); 528 | if (encoders.has(lowerName)) { 529 | this.addHeaderName(preferredSpellings.get(lowerName)); 530 | encoders.get(lowerName).call(this, value); 531 | } else if (typeof value === "string") { 532 | // Assume it's an unstructured header. 533 | // All-lower-case-names are ugly, so capitalize first letters. 534 | name = name.replace(/(^|-)[a-z]/g, function(match) { 535 | return match.toUpperCase(); 536 | }); 537 | this.addHeaderName(name); 538 | this.addUnstructured(value); 539 | } else { 540 | throw new Error("Unknown header " + name); 541 | } 542 | } 543 | 544 | /** 545 | * Add a single address to the header. The address is an object consisting of a 546 | * possibly-empty display name and an email address. 547 | * 548 | * @public 549 | * @param Address addr The address to be added. 550 | * @param {String} addr.name The (possibly-empty) name of the address to add. 551 | * @param {String} addr.email The email of the address to add. 552 | * @see headerparser.parseAddressingHeader 553 | */ 554 | addAddress(addr) { 555 | // If we have a display name, add that first. 556 | if (addr.name) { 557 | // This is a simple estimate that keeps names on one line if possible. 558 | this._reserveTokenSpace(addr.name.length + addr.email.length + 3); 559 | this.addPhrase(addr.name, ',()<>[]:;@."', true); 560 | 561 | // If we don't have an email address, don't write out the angle brackets for 562 | // the address. It's already an abnormal situation should this appear, and 563 | // this has better round-tripping properties. 564 | if (!addr.email) { 565 | return; 566 | } 567 | 568 | this.addText("<", false); 569 | } 570 | 571 | // Find the local-part and domain of the address, since the local-part may 572 | // need to be quoted separately. Note that the @ goes to the domain, so that 573 | // the local-part may be quoted if it needs to be. 574 | let at = addr.email.lastIndexOf("@"); 575 | let localpart = "", 576 | domain = ""; 577 | if (at == -1) { 578 | localpart = addr.email; 579 | } else { 580 | localpart = addr.email.slice(0, at); 581 | domain = addr.email.slice(at); 582 | } 583 | 584 | this.addQuotable(localpart, '()<>[]:;@\\," !', false); 585 | this.addText(domain + (addr.name ? ">" : ""), false); 586 | } 587 | 588 | /** 589 | * Add an array of addresses and groups to the output. Such an array may be 590 | * found as the output of {@link headerparser.parseAddressingHeader}. Each 591 | * element is either an address (an object with properties name and email), or a 592 | * group (an object with properties name and group). 593 | * 594 | * @public 595 | * @param {(Address|Group)[]} addrs A collection of addresses to add. 596 | * @param {String} addrs[i].name The (possibly-empty) name of the 597 | * address or the group to add. 598 | * @param {String} [addrs[i].email] The email of the address to add. 599 | * @param {Address[]} [addrs[i].group] A list of email addresses in the group. 600 | * @see HeaderEmitter.addAddress 601 | * @see headerparser.parseAddressingHeader 602 | */ 603 | addAddresses(addresses) { 604 | let needsComma = false; 605 | for (let addr of addresses) { 606 | // Add a comma if this is not the first element. 607 | if (needsComma) { 608 | this.addText(", ", true); 609 | } 610 | needsComma = true; 611 | 612 | if ("email" in addr) { 613 | this.addAddress(addr); 614 | } else { 615 | // A group has format name: member, member; 616 | // Note that we still add a comma after the group is completed. 617 | this.addPhrase(addr.name, ',()<>[]:;@."', false); 618 | this.addText(":", true); 619 | 620 | this.addAddresses(addr.group); 621 | this.addText(";", true); 622 | } 623 | } 624 | } 625 | 626 | /** 627 | * Add an unstructured header value to the output. This effectively means only 628 | * inserting line breaks were necessary, and using RFC 2047 encoding where 629 | * necessary. 630 | * 631 | * @public 632 | * @param {String} text The text to add to the output. 633 | */ 634 | addUnstructured(text) { 635 | if (text.length == 0) { 636 | return; 637 | } 638 | 639 | // Unstructured text is basically a phrase that can't be quoted. So, if we 640 | // have nothing in qchars, nothing should be quoted. 641 | this.addPhrase(text, "", false); 642 | } 643 | 644 | /** 645 | * Add a date/time field to the output, using the JS date object as the time 646 | * representation. The value will be output using the timezone offset of the 647 | * date object, which is usually the timezone of the user (modulo timezone and 648 | * DST changes). 649 | * 650 | * Note that if the date is an invalid date (its internal date parameter is a 651 | * NaN value), this method throws an error instead of generating an invalid 652 | * string. 653 | * 654 | * @public 655 | * @param {Date} date The date to be added to the output string. 656 | */ 657 | addDate(date) { 658 | // Rather than make a header plastered with NaN values, throw an error on 659 | // specific invalid dates. 660 | if (isNaN(date.getTime())) { 661 | throw new Error("Cannot encode an invalid date"); 662 | } 663 | 664 | let fullYear, 665 | month, 666 | dayOfMonth, 667 | dayOfWeek, 668 | hours, 669 | minutes, 670 | seconds, 671 | tzOffset; 672 | 673 | if (this._sanitizeDate) { 674 | fullYear = date.getUTCFullYear(); 675 | month = date.getUTCMonth(); 676 | dayOfMonth = date.getUTCDate(); 677 | dayOfWeek = date.getUTCDay(); 678 | hours = date.getUTCHours(); 679 | minutes = date.getUTCMinutes(); 680 | // To reduce the chance of fingerprinting the clock offset, 681 | // round the time down to the nearest minute. 682 | seconds = 0; 683 | tzOffset = 0; 684 | } else { 685 | fullYear = date.getFullYear(); 686 | month = date.getMonth(); 687 | dayOfMonth = date.getDate(); 688 | dayOfWeek = date.getDay(); 689 | hours = date.getHours(); 690 | minutes = date.getMinutes(); 691 | seconds = date.getSeconds(); 692 | tzOffset = date.getTimezoneOffset(); 693 | } 694 | 695 | // RFC 5322 says years can't be before 1900. The after 9999 is a bit that 696 | // derives from the specification saying that years have 4 digits. 697 | if (fullYear < 1900 || fullYear > 9999) { 698 | throw new Error("Date year is out of encodable range"); 699 | } 700 | 701 | // Start by computing the timezone offset for a day. We lack a good format, so 702 | // the the 0-padding is done by hand. Note that the tzoffset we output is in 703 | // the form ±hhmm, so we need to separate the offset (in minutes) into an hour 704 | // and minute pair. 705 | let tzOffHours = Math.abs(Math.trunc(tzOffset / 60)); 706 | let tzOffMinutes = Math.abs(tzOffset) % 60; 707 | let tzOffsetStr = 708 | (tzOffset > 0 ? "-" : "+") + 709 | padTo2Digits(tzOffHours) + 710 | padTo2Digits(tzOffMinutes); 711 | 712 | // Convert the day-time figure into a single value to avoid unwanted line 713 | // breaks in the middle. 714 | let dayTime = [ 715 | kDaysOfWeek[dayOfWeek] + ",", 716 | dayOfMonth, 717 | kMonthNames[month], 718 | fullYear, 719 | padTo2Digits(hours) + 720 | ":" + 721 | padTo2Digits(minutes) + 722 | ":" + 723 | padTo2Digits(seconds), 724 | tzOffsetStr, 725 | ].join(" "); 726 | this.addText(dayTime, false); 727 | } 728 | 729 | /** 730 | * Signal that the current header has been finished encoding. 731 | * 732 | * @public 733 | * @param {Boolean} deliverEOF If true, signal to the handler that no more text 734 | * will be arriving. 735 | */ 736 | finish(deliverEOF) { 737 | this._commitLine(); 738 | if (deliverEOF) { 739 | this._handler.deliverEOF(); 740 | } 741 | } 742 | } 743 | 744 | /** 745 | * Formatting helper to output numbers between 0-9 as 00-09 instead. 746 | */ 747 | function padTo2Digits(num) { 748 | return num < 10 ? "0" + num : num.toString(); 749 | } 750 | 751 | /** RFC 822 labels for days of the week. */ 752 | const kDaysOfWeek = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"]; 753 | 754 | // A regular expression for characters that need to be encoded. 755 | const nonAsciiRe = /[^\x20-\x7e]/; 756 | 757 | // The beginnings of RFC 2047 encoded-word 758 | const b64Prelude = "=?UTF-8?B?"; 759 | const qpPrelude = "=?UTF-8?Q?"; 760 | 761 | // A list of ASCII characters forbidden in RFC 2047 encoded-words 762 | const qpForbidden = "\"#$%&'(),.:;<=>?@[\\]^_`{|}~"; 763 | 764 | const hexString = "0123456789ABCDEF"; 765 | 766 | /** 767 | * Make a streaming header emitter that outputs on the given handler. 768 | * 769 | * @param {StreamHandler} handler The handler to consume output 770 | * @param options Options to pass into the HeaderEmitter 771 | * constructor. 772 | * @returns {HeaderEmitter} A header emitter constructed with the given options. 773 | */ 774 | export function makeStreamingEmitter(handler, options) { 775 | return new HeaderEmitter(handler, options); 776 | } 777 | 778 | class StringHandler { 779 | constructor() { 780 | this.value = ""; 781 | } 782 | 783 | deliverData(str) { 784 | this.value += str; 785 | } 786 | 787 | // eslint-disable-next-line class-methods-use-this 788 | deliverEOF() {} 789 | } 790 | 791 | /** 792 | * Given a header name and its structured value, output a string containing its 793 | * MIME-encoded value. The trailing CRLF for the header is included. 794 | * 795 | * @param {String} name The name of the structured header. 796 | * @param value The value of the structured header. 797 | * @param options Options for the HeaderEmitter constructor. 798 | * @returns {String} A MIME-encoded representation of the structured header. 799 | * @see HeaderEmitter.addStructuredHeader 800 | */ 801 | export function emitStructuredHeader(name, value, options) { 802 | let handler = new StringHandler(); 803 | let emitter = new HeaderEmitter(handler, options); 804 | emitter.addStructuredHeader(name, value); 805 | emitter.finish(true); 806 | return handler.value; 807 | } 808 | 809 | /** 810 | * Given a map of header names and their structured values, output a string 811 | * containing all of their headers and their MIME-encoded values. 812 | * 813 | * This method is designed to be able to emit header values given the headerData 814 | * values produced by MIME parsing. Thus, the values of the map are arrays 815 | * corresponding to header multiplicity. 816 | * 817 | * @param {Map(String->Object[])} headerValues A map of header names to arrays 818 | * of their structured values. 819 | * @param options Options for the HeaderEmitter 820 | * constructor. 821 | * @returns {String} A MIME-encoded representation of the structured header. 822 | * @see HeaderEmitter.addStructuredHeader 823 | */ 824 | export function emitStructuredHeaders(headerValues, options) { 825 | let handler = new StringHandler(); 826 | let emitter = new HeaderEmitter(handler, options); 827 | for (let instance of headerValues) { 828 | instance[1].forEach(function(e) { 829 | emitter.addStructuredHeader(instance[0], e); 830 | }); 831 | } 832 | emitter.finish(true); 833 | return handler.value; 834 | } 835 | 836 | /** 837 | * Add a custom structured MIME encoder to the set of known encoders. These 838 | * encoders are used for {@link emitStructuredHeader} and similar functions to 839 | * encode richer, more structured values instead of relying on string 840 | * representations everywhere. 841 | * 842 | * Structured encoders are functions which take in a single parameter 843 | * representing their structured value. The this parameter is set to be an 844 | * instance of {@link HeaderEmitter}, and it is intended that the several public 845 | * or protected methods on that class are useful for encoding values. 846 | * 847 | * There is a large set of structured encoders built-in to the jsmime library 848 | * already. 849 | * 850 | * @param {String} header The header name (in its preferred case) for 851 | * which the encoder will be used. 852 | * @param {Function(Value)} encoder The structured encoder function. 853 | */ 854 | export function addStructuredEncoder(header, encoder) { 855 | let lowerName = header.toLowerCase(); 856 | encoders.set(lowerName, encoder); 857 | if (!preferredSpellings.has(lowerName)) { 858 | preferredSpellings.set(lowerName, header); 859 | } 860 | } 861 | --------------------------------------------------------------------------------