├── .prettierignore ├── typings ├── regx │ └── index.d.ts └── planer │ └── index.d.ts ├── .eslintignore ├── .npmignore ├── .gitignore ├── jest.config.js ├── src ├── tests │ ├── prepareMessage │ │ ├── broken-links.input.html │ │ ├── email_11.input.html │ │ ├── empty-divs.input.html │ │ ├── tempo-signature.input.html │ │ ├── quote-string-before-blockquote.output-message.html │ │ ├── email_19.output-message.html │ │ ├── email_11.output-message.html │ │ ├── empty-divs.output-message.html │ │ ├── empty-divs.output-complete.html │ │ ├── email_11.output-complete.html │ │ ├── prepareMessageTestOptions.ts │ │ ├── quote-string-before-blockquote.input.html │ │ ├── broken-links.output-complete.html │ │ ├── broken-links.output-message.html │ │ ├── tempo-signature.output-complete.html │ │ ├── tempo-signature.output-message.html │ │ ├── attached-image.input.html │ │ ├── trimmed-reply-bug.output-message.html │ │ ├── no-empty-message.input.html │ │ ├── attached-image.output-complete.html │ │ ├── attached-image.output-message.html │ │ ├── github-emoji.input.html │ │ ├── prepareMessage.test.ts │ │ ├── github-emoji.output-complete.html │ │ ├── github-emoji.output-message.html │ │ ├── generateFixtureOutputs.ts │ │ ├── no-empty-message.output-complete.html │ │ ├── no-empty-message.output-message.html │ │ ├── fixtures.ts │ │ ├── all-in-one.output-message.html │ │ ├── all-in-one.input.html │ │ ├── all-in-one.output-complete.html │ │ ├── email_19.input.html │ │ ├── trimmed-reply-bug.input.html │ │ └── trimmed-reply-bug.output-complete.html │ ├── utils.ts │ ├── appendStyle.test.ts │ ├── walkBackwards.test.ts │ ├── linkify.test.ts │ ├── enforceViewport.test.ts │ ├── removeTrailingWhitespaces.test.ts │ ├── remoteContent │ │ ├── email-privacy-tester.html │ │ ├── blockRemoteContent.test.ts │ │ └── email-privacy-tester.eml │ └── removeQuotations.test.ts ├── index.ts ├── appendStyle.ts ├── walkBackwards.ts ├── enforceViewport.ts ├── enhanceLinks.ts ├── linkify.ts ├── fixBrokenHtml.ts ├── removeTrailingWhitespaces.ts ├── blockRemoteContent │ ├── index.ts │ ├── blockRemoteContentInStyle.ts │ └── blockRemoteContentInAttributes.ts ├── cheerio-utils.ts ├── findQuoteString.ts ├── removeQuotations.ts └── prepareMessage.ts ├── .prettierrc ├── tsconfig.json ├── scripts └── decode-quoted-printable.sh ├── benchmarks ├── emails │ ├── index.ts │ ├── basic-lorem-gmail.html │ ├── basic-lorem-gmail.eml │ ├── basic-lorem-gmail-replied-x1.html │ ├── basic-lorem-gmail-replied-x2.html │ ├── basic-lorem-gmail-replied-x1.eml │ └── basic-lorem-gmail-replied-x2.eml ├── message-splitter-output.txt ├── README.md ├── message-splitter.ts ├── libraries-output.txt ├── libraries.ts └── utils.ts ├── LICENSE.md ├── .circleci └── config.yml ├── .eslintrc.js ├── package.json ├── README.md └── CHANGELOG.md /.prettierignore: -------------------------------------------------------------------------------- 1 | src/tests/prepareMessage/ -------------------------------------------------------------------------------- /typings/regx/index.d.ts: -------------------------------------------------------------------------------- 1 | declare module 'regx'; 2 | -------------------------------------------------------------------------------- /typings/planer/index.d.ts: -------------------------------------------------------------------------------- 1 | declare module 'planer'; 2 | -------------------------------------------------------------------------------- /.eslintignore: -------------------------------------------------------------------------------- 1 | lib/ 2 | 3 | # Node 4 | node_modules 5 | 6 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | tsconfig.json 2 | tsconfig.tsbuildinfo 3 | src/ 4 | benchmarks/ 5 | old/ 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | lib/ 2 | tsconfig.tsbuildinfo 3 | 4 | # Node 5 | node_modules 6 | yarn-error.log 7 | 8 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | preset: 'ts-jest', 3 | modulePathIgnorePatterns: ['old/'], 4 | testEnvironment: 'node', 5 | }; 6 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/broken-links.input.html: -------------------------------------------------------------------------------- 1 | GitHub 2 | Some email 3 | Invalid link -------------------------------------------------------------------------------- /src/tests/prepareMessage/email_11.input.html: -------------------------------------------------------------------------------- 1 |
Hello
this is a quote
On December 3, 2019 at 05:01, Onno Schwanen wrote:
-------------------------------------------------------------------------------- /src/tests/prepareMessage/empty-divs.input.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | Hello. Empty divs should not become auto-closing div tags. It's not XML 5 | 6 | 7 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import linkify from './linkify'; 2 | import prepareMessage from './prepareMessage'; 3 | import { blockRemoteContent } from './blockRemoteContent'; 4 | 5 | export default prepareMessage; 6 | export { linkify, blockRemoteContent }; 7 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/tempo-signature.input.html: -------------------------------------------------------------------------------- 1 |

okokd sa d

2 |

dsa

3 | 4 | 5 | 6 |

--
I use Tempo to improve my focus

7 | 8 | 9 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "useTabs": true, 3 | "printWidth": 80, 4 | "tabWidth": 4, 5 | "singleQuote": true, 6 | "trailingComma": "es5", 7 | "jsxBracketSameLine": false, 8 | "semi": true, 9 | "overrides": [ 10 | { 11 | "files": ["*.html"], 12 | "options": { 13 | "printWidth": 120 14 | } 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/quote-string-before-blockquote.output-message.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 |

Hello

12 | 13 | 14 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/email_19.output-message.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 10 | 11 | 12 | and again 13 | 14 | 15 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/email_11.output-message.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 |
12 |
Hello
13 |
this is a quote
14 |
15 | 16 | 17 | -------------------------------------------------------------------------------- /src/appendStyle.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Add CSS style to the page 3 | */ 4 | function appendStyle( 5 | $: CheerioStatic, 6 | // Example: `.title { color: red; }` 7 | css: string 8 | ) { 9 | const styleElement = $(``); 10 | const head = $('head'); // Cheerio already makes sure head is present 11 | head.append(styleElement); 12 | } 13 | 14 | export default appendStyle; 15 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/empty-divs.output-message.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 |
12 | Hello. Empty divs should not become auto-closing div tags. It's not XML 13 | 14 | 15 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/empty-divs.output-complete.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 |
12 | Hello. Empty divs should not become auto-closing div tags. It's not XML 13 | 14 | 15 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/email_11.output-complete.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 |
12 |
Hello
13 |
this is a quote
14 |
On December 3, 2019 at 05:01, Onno Schwanen wrote:
15 |
16 | 17 | 18 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/prepareMessageTestOptions.ts: -------------------------------------------------------------------------------- 1 | import { PrepareMessageOptions } from "../../prepareMessage"; 2 | 3 | const testOptions: PrepareMessageOptions = { 4 | noQuotations: true, 5 | autolink: true, 6 | enhanceLinks: true, 7 | noRemoteContent: true, 8 | forceViewport: '', 9 | includeStyle: '.customStyle { background: red; }', 10 | }; 11 | 12 | export default testOptions; 13 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/quote-string-before-blockquote.input.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |

Hello

4 |
5 | On Tue, Dec 31, 2019 at 12:08 AM Nicolas Gaborit <hello@soreine.dev> wrote:
7 |
8 |
9 |
10 |

11 | This is the replied message 12 |

13 |
14 |
15 | 16 | 17 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/broken-links.output-complete.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 | GitHub 12 | Some email 13 | Invalid link 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/broken-links.output-message.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 | GitHub 12 | Some email 13 | Invalid link 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/tempo-signature.output-complete.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 |

okokd sa d

12 |

dsa

13 | 14 |

15 | --
I use Tempo to improve my focus 16 |

17 | 18 | 19 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/tempo-signature.output-message.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 |

okokd sa d

12 |

dsa

13 | 14 |

15 | --
I use Tempo to improve my focus 16 |

17 | 18 | 19 | -------------------------------------------------------------------------------- /src/walkBackwards.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Walk a CheerioElement hierarchy, depth-first and in reverse order. 3 | * Uses generators, so that it can be used in a for loop 4 | */ 5 | function* walkBackwards(el: CheerioElement): Generator { 6 | if (!el) { 7 | return; 8 | } 9 | if (el.children && el.children.length > 0) { 10 | for (let i = el.children.length - 1; i >= 0; i--) { 11 | yield* walkBackwards(el.children[i]); 12 | } 13 | } 14 | yield el; 15 | return; 16 | } 17 | 18 | export default walkBackwards; 19 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/attached-image.input.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 |
7 |

8 | Hello 9 |

10 |
11 | attached-image.jpg

Cheers!
Jonathan 17 |
18 |
19 | 20 | 21 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/trimmed-reply-bug.output-message.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 |

Hi Mette,

12 |

13 | I have attached our contracts for the USD 500.000 investment regarding BoostVC. Please let me know if thats what 14 | is needed. 15 |

16 |

Kind Regards

17 |

Henrique

18 |

--
I use Tempo to improve my focus

19 | 20 | 21 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/no-empty-message.input.html: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 |
5 |
6 | On Tue, Dec 31, 2019 at 12:08 AM Nicolas Gaborit <hello@soreine.dev> wrote:
8 |
9 |
13 |
14 |
15 |

16 | Replied message 17 |

18 |
19 |
20 |
21 |
22 |
23 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/attached-image.output-complete.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 10 | 11 | 12 |
13 |

14 | Hello 15 |

16 |
17 | attached-image.jpg

Cheers!
Jonathan 23 |
24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/attached-image.output-message.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 10 | 11 | 12 |
13 |

14 | Hello 15 |

16 |
17 | attached-image.jpg

Cheers!
Jonathan 23 |
24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": ["src/**/*"], 3 | "exclude": [ 4 | "lib/", 5 | "old/", 6 | "benchmarks/", 7 | "node_modules", 8 | "src/tests/", 9 | "**/*.test.ts" 10 | ], 11 | "compilerOptions": { 12 | "outDir": "lib/", 13 | "rootDir": "src/", 14 | "incremental": true, 15 | "target": "es5", 16 | "module": "commonjs", 17 | "lib": [], 18 | "allowJs": true, 19 | "skipLibCheck": true, 20 | "isolatedModules": true, 21 | "strict": true, 22 | /* Allow use of generators */ 23 | "downlevelIteration": true, 24 | "moduleResolution": "node", 25 | "typeRoots": ["./typings", "node_modules/@types"], 26 | "allowSyntheticDefaultImports": true, 27 | "esModuleInterop": true, 28 | "forceConsistentCasingInFileNames": true 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/github-emoji.input.html: -------------------------------------------------------------------------------- 1 |

2 | 3 |

@alexandersandberg approved this pull request.

4 | 5 |

Nice job! 😍 Really nice to have this on mobile as well!

6 | 7 |


You are receiving this because you authored the thread.
Reply to this email directly, view it on GitHub, or unsubscribe.

-------------------------------------------------------------------------------- /scripts/decode-quoted-printable.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #################################################### 4 | # Decode and rewrite a quoted-printable encoded file 5 | #################################################### 6 | 7 | set -o errexit 8 | set -o pipefail 9 | # set -o xtrace 10 | 11 | if ! type qprint > /dev/null; then 12 | echo 'This script requires qprint in PATH.'; 13 | echo 'https://brewinstall.org/Install-qprint-on-Mac-with-Brew/'; 14 | exit 1; 15 | fi 16 | 17 | INPUT_FILE_PATH=$1 18 | 19 | if [ -z "$1" ]; then 20 | echo 'Missing input file'; 21 | echo 'Usage: ./decode-quoted-printable.sh ./path/to/encoded/file'; 22 | exit 1; 23 | fi 24 | 25 | 26 | 27 | DECODED=$(qprint -d "$INPUT_FILE_PATH") 28 | 29 | cat <<< "$DECODED" > "$INPUT_FILE_PATH" 30 | 31 | echo 'Done'; 32 | exit 0; -------------------------------------------------------------------------------- /benchmarks/emails/index.ts: -------------------------------------------------------------------------------- 1 | import fs from 'fs'; 2 | import path from 'path'; 3 | 4 | function readFile(relativePath: string): string { 5 | return fs.readFileSync(path.join(__dirname, relativePath)).toString(); 6 | } 7 | 8 | const EMAILS = { 9 | /* 10 | BASICS are basic emails with inline formatting such as bold. `x1` and `x2` are self-replied versions of the first. They increase in size approximately by a linear factor of 1, 2 and 3. 11 | */ 12 | BASIC: readFile('basic-lorem-gmail.html'), // 1.7K 13 | BASIC_REPLIED_X1: readFile('basic-lorem-gmail-replied-x1.html'), // 3.7K 14 | BASIC_REPLIED_X2: readFile('basic-lorem-gmail-replied-x2.html'), // 6.2K 15 | 16 | /* 17 | This is a real-world marketing email. It has a large size, and contains heavy HTML and CSS. 18 | */ 19 | 20 | MARKETING: readFile('marketing-gmail.html'), // 102K 21 | }; 22 | 23 | export default EMAILS; 24 | -------------------------------------------------------------------------------- /benchmarks/message-splitter-output.txt: -------------------------------------------------------------------------------- 1 | removeQuotations # Marketing email 2 | 11.22 ms 3 | 89.11 ops/sec 4 | (69 runs sampled) 5 | Relative Margin of Error: ±2.67% 6 | 7 | linkify # Marketing email 8 | 9.07 ms 9 | 110.3 ops/sec 10 | (80 runs sampled) 11 | Relative Margin of Error: ±0.70% 12 | 13 | prepareMessage # Marketing email 14 | 15.53 ms 15 | 64.41 ops/sec 16 | (66 runs sampled) 17 | Relative Margin of Error: ±1.56% 18 | 19 | prepareMessage # Size 1 20 | 0.5944 ms 21 | 1,682 ops/sec 22 | (83 runs sampled) 23 | Relative Margin of Error: ±1.79% 24 | 25 | prepareMessage # Size 2 26 | 0.8725 ms 27 | 1,146 ops/sec 28 | (88 runs sampled) 29 | Relative Margin of Error: ±1.24% 30 | 31 | prepareMessage # Size 3 32 | 1.344 ms 33 | 743.9 ops/sec 34 | (85 runs sampled) 35 | Relative Margin of Error: ±2.57% 36 | 37 | ✨ Done in 35.54s. -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- 1 | ## How to run benchmarks 2 | 3 | To run benchmarks for `message-splitter` implementation 4 | 5 | ``` 6 | yarn run benchmarks 7 | ``` 8 | 9 | To run general benchmarks for external libraries 10 | 11 | ``` 12 | yarn run benchmarks:libraries 13 | ``` 14 | 15 | ## Understanding the results 16 | 17 | Each benchmark prints its results, showing: 18 | 19 | - The number of **operation per second**. This is the relevant value, that must be compared with values from different implementation. 20 | - The **number of samples** run. BenchmarkJS has a special heuristic to choose how many samples must be made. The results are more accurate with a high number of samples. Low samples count is often tied with high relative margin of error 21 | - The **relative margin of error** (rme) for the measure. The lower the value, the more accurate the results are. When compared with previous results, we display the average relative margin of error. 22 | -------------------------------------------------------------------------------- /src/enforceViewport.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Removes ALL existing viewport-tags from the email and 3 | * appends the following viewport-tag to the most top-level element 4 | * 5 | * 6 | * If the email does not contain a element then it will be created 7 | * just before the viewport-tag gets appended. 8 | */ 9 | function enforceViewport( 10 | $: CheerioStatic, 11 | desiredViewport = '' 12 | ) { 13 | const viewports = $('meta[name="viewport"]'); 14 | const hasViewport = viewports.length > 0; 15 | 16 | const viewportElement = $(desiredViewport); 17 | 18 | if (hasViewport) { 19 | // remove current viewports 20 | viewports.each((_, el) => { 21 | $(el).remove(); 22 | }); 23 | } 24 | 25 | // Insert a viewport 26 | const head = $('head'); // Cheerio already makes sure head is present 27 | head.append(viewportElement); 28 | } 29 | 30 | export default enforceViewport; 31 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/prepareMessage.test.ts: -------------------------------------------------------------------------------- 1 | import { expectHtml } from '../utils'; 2 | import prepareMessage from '../../prepareMessage'; 3 | import { listFixtures, Fixture } from './fixtures'; 4 | import testOptions from './prepareMessageTestOptions'; 5 | 6 | /** 7 | * Run tests for a fixture 8 | */ 9 | function checkFixture(fixture: Fixture) { 10 | describe(fixture.name, () => { 11 | const result = prepareMessage(fixture.input, testOptions); 12 | 13 | if (fixture.hasOutputComplete()) { 14 | it('completeHtml', () => { 15 | expectHtml(result.completeHtml, fixture.outputComplete); 16 | }); 17 | } 18 | 19 | if (fixture.hasOutputMessage()) { 20 | // console.log(result.messageHtml); 21 | it('messageHtml', () => { 22 | expectHtml(result.messageHtml, fixture.outputMessage); 23 | }); 24 | } 25 | }); 26 | } 27 | 28 | describe('prepareMessage', () => { 29 | const fixtures = listFixtures().filter( 30 | fixture => fixture.hasOutputComplete() || fixture.hasOutputMessage() 31 | ); 32 | 33 | fixtures.forEach(checkFixture); 34 | }); 35 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/github-emoji.output-complete.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 |

12 | 13 |

@alexandersandberg approved this pull request.

14 | 15 |

16 | Nice job! 17 | 😍 23 | Really nice to have this on mobile as well! 24 |

25 | 26 |

27 | —
You are receiving this because you authored the thread.
Reply to this email directly, 28 | view it on GitHub, or 29 | unsubscribe. 30 |

31 | 32 | 33 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/github-emoji.output-message.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 |

12 | 13 |

@alexandersandberg approved this pull request.

14 | 15 |

16 | Nice job! 17 | 😍 23 | Really nice to have this on mobile as well! 24 |

25 | 26 |

27 | —
You are receiving this because you authored the thread.
Reply to this email directly, 28 | view it on GitHub, or 29 | unsubscribe. 30 |

31 | 32 | 33 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/generateFixtureOutputs.ts: -------------------------------------------------------------------------------- 1 | import Fs from 'fs'; 2 | import { listFixtures } from './fixtures'; 3 | import prepareMessage from '../../prepareMessage'; 4 | import { formatHtml } from '../utils'; 5 | import testOptions from './prepareMessageTestOptions'; 6 | 7 | /************************************* 8 | 9 | Find all *.input.html that have no *.output-*.html, and write the output 10 | of `prepareMessage` for it. 11 | 12 | *************************************/ 13 | 14 | listFixtures() 15 | .filter( 16 | fixture => !fixture.hasOutputComplete() && !fixture.hasOutputMessage() 17 | ) 18 | .forEach(fixture => { 19 | console.log('Found lonely input fixture: ' + fixture.name); 20 | 21 | const result = prepareMessage(fixture.input, testOptions); 22 | 23 | Fs.writeFileSync( 24 | fixture.outputMessagePath, 25 | formatHtml(result.messageHtml) 26 | ); 27 | console.log('Wrote: ' + fixture.outputMessagePath); 28 | 29 | Fs.writeFileSync( 30 | fixture.outputCompletePath, 31 | formatHtml(result.completeHtml) 32 | ); 33 | console.log('Wrote: ' + fixture.outputCompletePath); 34 | }); 35 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Tempo GmbH 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/no-empty-message.output-complete.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 |
12 |
13 |
14 |
15 | On Tue, Dec 31, 2019 at 12:08 AM Nicolas Gaborit <hello@soreine.dev> wrote:
20 |
21 |
25 |
26 |
27 |

28 | Replied message 29 |

30 |
31 |
32 |
33 |
34 |
35 | 36 | 37 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/no-empty-message.output-message.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 |
12 |
13 |
14 |
15 | On Tue, Dec 31, 2019 at 12:08 AM Nicolas Gaborit <hello@soreine.dev> wrote:
20 |
21 |
25 |
26 |
27 |

28 | Replied message 29 |

30 |
31 |
32 |
33 |
34 |
35 | 36 | 37 | -------------------------------------------------------------------------------- /benchmarks/message-splitter.ts: -------------------------------------------------------------------------------- 1 | /* 2 | These benchmarks measures the performance of the message-splitter module 3 | */ 4 | 5 | import cheerio from 'cheerio'; 6 | import { createSuite } from './utils'; 7 | import prepareMessage, { linkify } from '../src'; 8 | import EMAILS from './emails'; 9 | import removeQuotations from '../src/removeQuotations'; 10 | 11 | const suite = createSuite(); 12 | 13 | // Measure against a real-world, HTML-heavy, marketing email 14 | suite 15 | .add('removeQuotations # Marketing email', () => { 16 | removeQuotations(cheerio.load(EMAILS.MARKETING)); 17 | }) 18 | .add('linkify # Marketing email', () => { 19 | linkify(EMAILS.MARKETING); 20 | }) 21 | .add('prepareMessage # Marketing email', () => { 22 | prepareMessage(EMAILS.MARKETING); 23 | }); 24 | 25 | // Using a linear scale of input complexity, we can see if the time complexity is linear. 26 | suite 27 | .add('prepareMessage # Size 1', () => { 28 | prepareMessage(EMAILS.BASIC); 29 | }) 30 | .add('prepareMessage # Size 2', () => { 31 | prepareMessage(EMAILS.BASIC_REPLIED_X1); 32 | }) 33 | .add('prepareMessage # Size 3', () => { 34 | prepareMessage(EMAILS.BASIC_REPLIED_X2); 35 | }); 36 | 37 | suite.run(); 38 | -------------------------------------------------------------------------------- /src/tests/utils.ts: -------------------------------------------------------------------------------- 1 | import fs from 'fs'; 2 | import path from 'path'; 3 | import prettier from 'prettier'; 4 | import expect from 'expect'; 5 | 6 | function formatHtml(html: string): string { 7 | return prettier.format(html, { 8 | parser: 'html', 9 | endOfLine: 'lf', 10 | printWidth: 120, 11 | }); 12 | } 13 | /** 14 | * Expect two HTMLs to be identical, disregarding formatting differences 15 | */ 16 | function expectHtml(actual: string, expected: string) { 17 | // Use prettier to avoid formatting discrepencies 18 | actual = formatHtml(actual); 19 | expected = formatHtml(expected); 20 | 21 | // console.log(actual); 22 | // console.log(expected); 23 | expect(actual).toBe(expected); 24 | } 25 | 26 | function readFile(...paths: string[]): string { 27 | return fs.readFileSync(path.join(...paths)).toString(); 28 | } 29 | 30 | function fileExists(...paths: string[]): boolean { 31 | return fs.existsSync(path.join(...paths)); 32 | } 33 | 34 | function readFileIfExists(...paths: string[]): string | null { 35 | if (!fileExists(...paths)) { 36 | return null; 37 | } else { 38 | return readFile(...paths); 39 | } 40 | } 41 | 42 | export { expectHtml, formatHtml, readFile, fileExists, readFileIfExists }; 43 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | orbs: 3 | node: circleci/node@1.1.6 4 | commands: 5 | yarn_install: 6 | steps: 7 | - restore_cache: 8 | name: Restore Yarn Package Cache 9 | keys: 10 | - yarn-packages-{{ checksum "yarn.lock" }} 11 | - run: 12 | name: Install Dependencies 13 | command: yarn install --frozen-lockfile 14 | - save_cache: 15 | name: Save Yarn Package Cache 16 | key: yarn-packages-{{ checksum "yarn.lock" }} 17 | paths: 18 | - ~/.cache/yarn 19 | 20 | jobs: 21 | build: 22 | executor: 23 | name: node/default 24 | steps: 25 | - checkout 26 | - yarn_install 27 | - run: 28 | name: Type check and lint 29 | command: yarn lint 30 | - run: 31 | name: Test 32 | command: yarn test 33 | - run: 34 | name: Build 35 | command: yarn build 36 | workflows: 37 | test-and-build: 38 | jobs: 39 | - build 40 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | env: { 3 | browser: true, 4 | commonjs: true, 5 | es6: true, 6 | node: true, 7 | }, 8 | extends: [ 9 | 'plugin:@typescript-eslint/recommended', // Base rules configuration from Eslint, adapted to Typescript 10 | 'prettier/@typescript-eslint', // disable ESLint rules from @typescript-eslint/eslint-plugin that would conflict with prettier 11 | 'plugin:prettier/recommended', // Enables eslint-plugin-prettier and eslint-config-prettier. This will display prettier errors as ESLint errors,, so that they can be fixed automatically with `eslint --fix`. Make sure this is always the last configuration in the extends array. 12 | ], 13 | parser: '@typescript-eslint/parser', 14 | parserOptions: { 15 | ecmaVersion: 2018, 16 | sourceType: 'module', 17 | }, 18 | plugins: ['@typescript-eslint', 'prettier'], 19 | rules: { 20 | indent: ['off', 'tab', { SwitchCase: 1 }], 21 | 'linebreak-style': ['error', 'unix'], 22 | 'no-useless-escape': [0, 'unix'], 23 | semi: ['error', 'always'], 24 | '@typescript-eslint/no-use-before-define': 'off', 25 | '@typescript-eslint/explicit-function-return-type': 'off', 26 | '@typescript-eslint/no-empty-function': 'off', 27 | '@typescript-eslint/no-unused-vars': 'error', 28 | }, 29 | ignorePatterns: ['./old/', './node_modules/'], 30 | }; 31 | -------------------------------------------------------------------------------- /src/enhanceLinks.ts: -------------------------------------------------------------------------------- 1 | import { validate as isValidEmail } from 'email-validator'; 2 | import isValidDomain from 'is-valid-domain'; 3 | 4 | export const hasProtocol = (link: string) => { 5 | const lowerCaseLink = link.toLowerCase(); 6 | return Boolean(lowerCaseLink.match(/^[a-z][a-z0-9+-.]*:/i)); 7 | }; 8 | 9 | /** 10 | * Scans the document for links that don't start 11 | * with a protocol and adds one automatically. 12 | */ 13 | const addHttpsToRelativeLinks = ($: CheerioStatic) => { 14 | $('a').each((_, link) => { 15 | const href = link.attribs.href; 16 | 17 | if (href && !hasProtocol(href)) { 18 | if (isValidDomain(href.split('/')[0])) { 19 | // Valid domains are prepended with https:// 20 | link.attribs.href = `https://${href}`; 21 | } else if (isValidEmail(href)) { 22 | // Valid email addresses get an extra mailto: 23 | link.attribs.href = `mailto:${href}`; 24 | } 25 | } 26 | }); 27 | }; 28 | 29 | const addTitlesToLinks = ($: CheerioStatic) => { 30 | $('a').each((_, link) => { 31 | if (link.attribs.title) { 32 | link.attribs.title += ` (${link.attribs.href})`; 33 | } else { 34 | link.attribs.title = link.attribs.href; 35 | } 36 | }); 37 | }; 38 | 39 | export const enhanceLinks = ($: CheerioStatic) => { 40 | addHttpsToRelativeLinks($); 41 | addTitlesToLinks($); 42 | }; 43 | -------------------------------------------------------------------------------- /benchmarks/libraries-output.txt: -------------------------------------------------------------------------------- 1 | Parse # JSDom # Marketing email 2 | 53.3 ms 3 | 18.76 ops/sec 4 | (39 runs sampled) 5 | Relative Margin of Error: ±5.76% 6 | 7 | Parse # Cheerio # Marketing email 8 | 1.808 ms 9 | 553.1 ops/sec 10 | (92 runs sampled) 11 | Relative Margin of Error: ±0.57% 12 | 13 | Parse # XmlDom # Marketing email 14 | 2.665 ms 15 | 375.3 ops/sec 16 | (90 runs sampled) 17 | Relative Margin of Error: ±0.63% 18 | 19 | Sanitizing # Marketing # DOMPurify 20 | 89.84 ms 21 | 11.13 ops/sec 22 | (32 runs sampled) 23 | Relative Margin of Error: ±2.51% 24 | 25 | Sanitizing # Marketing # sanitize-html 26 | 2.831 ms 27 | 353.2 ops/sec 28 | (87 runs sampled) 29 | Relative Margin of Error: ±1.94% 30 | 31 | Quotation # TalonJS # Marketing email 32 | 184.5 ms 33 | 5.419 ops/sec 34 | (18 runs sampled) 35 | Relative Margin of Error: ±2.01% 36 | 37 | Quotation # Planer with JSDom # Marketing email 38 | 1,096 ms 39 | 0.9127 ops/sec 40 | (7 runs sampled) 41 | Relative Margin of Error: ±4.44% 42 | 43 | Auto links # Marketing # autolink 44 | 6.578 ms 45 | 152 ops/sec 46 | (86 runs sampled) 47 | Relative Margin of Error: ±0.63% 48 | 49 | Auto links # Marketing # linkify 50 | 14.82 ms 51 | 67.47 ops/sec 52 | (70 runs sampled) 53 | Relative Margin of Error: ±0.70% 54 | 55 | Done in 62.65s. -------------------------------------------------------------------------------- /src/linkify.ts: -------------------------------------------------------------------------------- 1 | import Autolinker from 'autolinker'; 2 | 3 | const GH_EMOJI_URL = 'github.githubassets.com/images/icons/emoji'; 4 | 5 | /** 6 | * Wrap text links in anchor tags 7 | */ 8 | function linkify(inputHtml: string): string { 9 | const headOffset = inputHtml.indexOf(''); 10 | 11 | return Autolinker.link(inputHtml, { 12 | urls: { 13 | schemeMatches: true, 14 | wwwMatches: true, 15 | tldMatches: true, 16 | }, 17 | email: true, 18 | phone: true, 19 | mention: false, 20 | hashtag: false, 21 | 22 | stripPrefix: false, 23 | stripTrailingSlash: false, 24 | newWindow: true, 25 | 26 | className: '', 27 | 28 | replaceFn: function(match) { 29 | if (match.getType() === 'url') { 30 | // Don't autolink filenames 31 | // https://github.com/gregjacobs/Autolinker.js/issues/270#issuecomment-498878987 32 | const previousChar = inputHtml.charAt(match.getOffset() - 1); 33 | if (previousChar === '/') { 34 | return false; // don't autolink this match 35 | } 36 | 37 | // Don't autolink URLs for GitHub's Emoji 38 | if (match.getAnchorHref().includes(GH_EMOJI_URL)) { 39 | return false; 40 | } 41 | 42 | // Ignore URLs in head 43 | if (match.getOffset() < headOffset) { 44 | return false; 45 | } 46 | 47 | // To avoid tabnabbing 48 | const tag = match.buildTag(); 49 | tag.setAttr('rel', 'noopener noreferrer'); 50 | } 51 | }, 52 | }); 53 | } 54 | 55 | export default linkify; 56 | -------------------------------------------------------------------------------- /src/fixBrokenHtml.ts: -------------------------------------------------------------------------------- 1 | import * as htmlparser2 from 'htmlparser2'; 2 | 3 | /** 4 | * Fix various problems in input HTML before it can be parsed by cheerio. 5 | */ 6 | function fixBrokenHtml(inputHtml: string): string { 7 | const fixedHead = fixBrokenHead(inputHtml); 8 | 9 | return fixedHead; 10 | } 11 | 12 | function fixBrokenHead(inputHtml: string): string { 13 | const encounteredTags = new Set(); 14 | 15 | let isBroken = false; 16 | 17 | let headStartIndex: number | undefined; 18 | 19 | const parserDetect = new htmlparser2.Parser({ 20 | onopentag(name) { 21 | if (name === 'head') { 22 | headStartIndex = parserDetect.startIndex; 23 | } 24 | 25 | encounteredTags.add(name); 26 | 27 | const htmlTag = encounteredTags.has('html'); 28 | const headTag = encounteredTags.has('head'); 29 | const blockquoteTag = encounteredTags.has('blockquote'); 30 | 31 | // If there's a blockquote before the head tag, this is likely a quoted message 32 | if (!htmlTag && headTag && !blockquoteTag) { 33 | isBroken = true; 34 | parserDetect.reset(); // abort parsing 35 | } else if (htmlTag) { 36 | isBroken = false; 37 | parserDetect.reset(); // abort parsing 38 | } 39 | }, 40 | }); 41 | 42 | parserDetect.write(inputHtml); 43 | parserDetect.end(); 44 | 45 | if (!isBroken) { 46 | return inputHtml; 47 | } 48 | 49 | // Remove everything before head, and wrap in html 50 | let fixedHtml = inputHtml.slice(headStartIndex); 51 | fixedHtml = `${fixedHtml}`; 52 | return fixedHtml; 53 | } 54 | 55 | export default fixBrokenHtml; 56 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/fixtures.ts: -------------------------------------------------------------------------------- 1 | import Fs from 'fs'; 2 | import Path from 'path'; 3 | 4 | const FIXTURE_DIR = __dirname; 5 | 6 | class Fixture { 7 | name: string; 8 | 9 | constructor(name: string) { 10 | this.name = name; 11 | } 12 | 13 | get inputPath(): string { 14 | return Path.join(FIXTURE_DIR, this.name + '.input.html'); 15 | } 16 | get outputMessagePath(): string { 17 | return Path.join(FIXTURE_DIR, this.name + '.output-message.html'); 18 | } 19 | get outputCompletePath(): string { 20 | return Path.join(FIXTURE_DIR, this.name + '.output-complete.html'); 21 | } 22 | 23 | get input(): string { 24 | return Fs.readFileSync(this.inputPath).toString(); 25 | } 26 | get outputMessage(): string { 27 | return Fs.readFileSync(this.outputMessagePath).toString(); 28 | } 29 | get outputComplete(): string { 30 | return Fs.readFileSync(this.outputCompletePath).toString(); 31 | } 32 | 33 | hasInput(): boolean { 34 | return Fs.existsSync(this.inputPath); 35 | } 36 | hasOutputMessage(): boolean { 37 | return Fs.existsSync(this.outputMessagePath); 38 | } 39 | hasOutputComplete(): boolean { 40 | return Fs.existsSync(this.outputCompletePath); 41 | } 42 | } 43 | 44 | function listFixtures(): Fixture[] { 45 | const files: string[] = Fs.readdirSync(FIXTURE_DIR); 46 | 47 | const inputs = files.filter(path => { 48 | return path.endsWith('.input.html'); 49 | }); 50 | 51 | const fixtures = inputs 52 | .map(path => Path.basename(path, '.input.html')) 53 | .map(name => new Fixture(name)); 54 | 55 | return fixtures; 56 | } 57 | 58 | export { Fixture, listFixtures }; 59 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/all-in-one.output-message.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 11 | 12 | 13 | 18 | 19 | 20 |
21 |
22 |

23 | Here is the body of the message, with a text 24 | link.com 25 | 26 | 29 | 30 | An hoc usque quaque, aliter in vita? Terram, mihi crede, ea lanx et maria deprimet. Duo Reges: 31 | constructio interrete. Id est enim, de quo quaerimus. Parvi enim primo ortu sic iacent, tamquam omnino 32 | sine animo sint. Quis est tam dissimile homini. Claudii libidini, qui tum erat summo ne imperio, dederetur. 33 | Beatus sibi videtur esse moriens. 36 |

37 |
38 |
39 | 40 | 41 | -------------------------------------------------------------------------------- /src/removeTrailingWhitespaces.ts: -------------------------------------------------------------------------------- 1 | import { 2 | getTopLevelElement, 3 | isRootElement, 4 | isTextualElement, 5 | isEmptyish, 6 | } from './cheerio-utils'; 7 | 8 | /** 9 | * Remove trailing whitespaces in given element, using given cheerio context. 10 | * Returns true if the element was empty and removed completely 11 | */ 12 | function removeTrailingWhitespaces( 13 | $: CheerioStatic, 14 | el: CheerioElement = getTopLevelElement($) 15 | ): boolean { 16 | const hasChildren = el.childNodes && el.childNodes.length > 0; 17 | const isText = el.type === 'text'; 18 | const isComment = el.type === 'comment'; 19 | const isTextual = isTextualElement(el); 20 | 21 | if (isComment) { 22 | // Remove it 23 | $(el).remove(); 24 | return true; 25 | } else if (isText) { 26 | if (isEmptyish(el)) { 27 | $(el).remove(); 28 | // The element was removed completely 29 | return true; 30 | } else { 31 | const trimmed = (el.data as string).trimRight(); 32 | $(el).replaceWith(trimmed); 33 | // We're done trimming 34 | return false; 35 | } 36 | } else if (!isTextual) { 37 | // Contains content other than text, we stop trimming here 38 | return false; 39 | } else if (hasChildren) { 40 | // Textual element with children 41 | 42 | // Trim last child 43 | const wasEmpty = removeTrailingWhitespaces($, el.lastChild); 44 | if (wasEmpty) { 45 | // Continue trimming this element 46 | return removeTrailingWhitespaces($, el); 47 | } else { 48 | // The last element was trimmed as much as possible. 49 | // We stop here 50 | return false; 51 | } 52 | } else if (isRootElement(el)) { 53 | // Stop here 54 | return false; 55 | } else { 56 | // Empty textual element, we can remove it. 57 | $(el).remove(); 58 | return true; 59 | } 60 | } 61 | 62 | export default removeTrailingWhitespaces; 63 | -------------------------------------------------------------------------------- /benchmarks/emails/basic-lorem-gmail.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |

4 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. 5 | An hoc usque quaque, aliter in vita? Terram, mihi crede, ea 6 | lanx et maria deprimet. Duo Reges: constructio interrete. 7 | Id est enim, de quo quaerimus. Parvi enim primo ortu sic 8 | iacent, tamquam omnino sine animo sint. Quis est tam dissimile 9 | homini. Claudii libidini, qui tum erat summo ne imperio, dederetur. 10 | Beatus sibi videtur esse moriens. 13 |

14 | 15 |

16 | Hoc dixerit potius Ennius: Nimium boni est, cui nihil est mali. 17 | Egone quaeris, inquit, quid sentiam? Si mala non sunt, iacet omnis 18 | ratio Peripateticorum. Itaque rursus eadem ratione, qua sum paulo 19 | ante usus, haerebitis. Paulum, cum regem Persem captum adduceret, 20 | eodem flumine invectio? Mihi enim erit isdem istis fortasse iam 21 | utendum. Ab hoc autem quaedam non melius quam veteres, quaedam 22 | omnino relicta. Tamen a proposito, inquam, aberramus. 23 |

24 | 25 |

26 | Sic, et quidem diligentius saepiusque ista loquemur inter nos 27 | agemusque communiter. 28 | Quid sequatur, quid repugnet, vident. 31 | Serpere anguiculos, nare anaticulas, evolare merulas, cornibus uti 32 | videmus boves, nepas aculeis. Ego vero volo in virtute vim esse quam 33 | maximam; Aufert enim sensus actionemque tollit omnem. Qua tu etiam 34 | inprudens utebare non numquam. Apud ceteros autem philosophos, qui 35 | quaesivit aliquid, tacet; Totum autem id externum est, et quod 36 | externum, id in casu est. 37 |

38 |
39 |
40 | -------------------------------------------------------------------------------- /src/tests/appendStyle.test.ts: -------------------------------------------------------------------------------- 1 | import cheerio from 'cheerio'; 2 | import appendStyle from '../appendStyle'; 3 | import { expectHtml } from './utils'; 4 | 5 | describe('appendStyle', () => { 6 | it('should add style', () => { 7 | const email = ` 8 |
Hello
9 | `; 10 | 11 | const $ = cheerio.load(email); 12 | appendStyle( 13 | $, 14 | ` 15 | p { 16 | background: red; 17 | } 18 | 19 | .title { 20 | color: black; 21 | } 22 | ` 23 | ); 24 | 25 | const actual = $.html(); 26 | 27 | const expected = ` 28 | 29 | 30 | 39 | 40 | 41 |
Hello
42 | 43 | 44 | `; 45 | 46 | expectHtml(actual, expected); 47 | }); 48 | 49 | it('should append style after existing ones', () => { 50 | const email = ` 51 | 52 | 53 | 58 | 59 | 60 |
Hello
61 | 62 | 63 | `; 64 | 65 | const $ = cheerio.load(email); 66 | appendStyle( 67 | $, 68 | ` 69 | .title { 70 | color: black; 71 | } 72 | ` 73 | ); 74 | 75 | const actual = $.html(); 76 | 77 | const expected = ` 78 | 79 | 80 | 85 | 90 | 91 | 92 |
Hello
93 | 94 | 95 | `; 96 | 97 | expectHtml(actual, expected); 98 | }); 99 | }); 100 | -------------------------------------------------------------------------------- /src/blockRemoteContent/index.ts: -------------------------------------------------------------------------------- 1 | import cheerio from 'cheerio'; 2 | import blockRemoteContentInAttributes from './blockRemoteContentInAttributes'; 3 | import blockRemoteContentInStyle from './blockRemoteContentInStyle'; 4 | 5 | // This is a 1x100 transparent PNG used to replace images 6 | // Generated using http://png-pixel.com/ 7 | // Note: using a 1x1 square results in large square empty 8 | // spaces in many e-mails, because only the width is 9 | // defined in the HTML; and the height gets scaled 10 | // proportionally. Thus the 1x100 ratio instead 11 | // https://github.com/mailpile/Mailpile/blob/babc3e5c3e7dfa3326998d1628ffad5b0bbd27f5/shared-data/default-theme/html/jsapi/message/html-sandbox.js#L43-L47 12 | const TRANSPARENT_1X100_URL = 13 | 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII='; 14 | 15 | // What to replace remote URLs with 16 | export type ReplacementOptions = { 17 | image?: string; 18 | other?: string; 19 | }; 20 | 21 | /** 22 | * Replace all URLs that could be automatically fetched when displaying the HTML. These can be used for tracking, or can consume bandwidth. 23 | */ 24 | function blockRemoteContentCheerio( 25 | $: CheerioStatic, 26 | replacements: ReplacementOptions = {} 27 | ) { 28 | const { image = TRANSPARENT_1X100_URL, other = '#' } = replacements; 29 | 30 | // Block remote URLs in style tags 31 | blockRemoteContentInStyle($, image); 32 | // Block remote URLs in tags attributes 33 | blockRemoteContentInAttributes($, { image, other }); 34 | } 35 | 36 | /** 37 | * Same as blockRemoteContentCheerio, but to be used as a standalone. 38 | */ 39 | function blockRemoteContent( 40 | html: string, 41 | replacements: ReplacementOptions = {} 42 | ): string { 43 | const $ = cheerio.load(html); 44 | blockRemoteContentCheerio($, replacements); 45 | return $.html(); 46 | } 47 | 48 | export { blockRemoteContentCheerio, blockRemoteContent }; 49 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tempo-email-parser", 3 | "version": "0.7.7", 4 | "description": "Processes HTML email for display. Extracts quotations and more.", 5 | "main": "./lib/index.js", 6 | "types": "./lib/index.d.ts", 7 | "scripts": { 8 | "prepublishOnly": "yarn run build", 9 | "build": "tsc -D", 10 | "test": "jest", 11 | "generate:fixtures": "npx ts-node ./src/tests/prepareMessage/generateFixtureOutputs.ts", 12 | "lint": "tsc --noEmit && eslint . --ext .js,.jsx,.ts,.tsx", 13 | "benchmarks": "npx ts-node --transpile-only ./benchmarks/message-splitter.ts", 14 | "benchmarks:libraries": "npx ts-node --transpile-only ./benchmarks/libraries.ts" 15 | }, 16 | "repository": { 17 | "type": "git", 18 | "url": "git+https://github.com/yourtempo/tempo-email-parser.git" 19 | }, 20 | "author": "", 21 | "license": "ISC", 22 | "bugs": { 23 | "url": "https://github.com/yourtempo/tempo-email-parser/issues" 24 | }, 25 | "homepage": "https://github.com/yourtempo/tempo-email-parser#readme", 26 | "devDependencies": { 27 | "@types/benchmark": "^1.0.31", 28 | "@types/dompurify": "^2.0.1", 29 | "@types/jest": "^26.0.22", 30 | "@types/jsdom": "^12.2.4", 31 | "@types/linkifyjs": "^2.1.3", 32 | "@types/prettier": "^1.19.0", 33 | "@types/sanitize-html": "^1.20.2", 34 | "@types/xmldom": "^0.1.29", 35 | "@typescript-eslint/eslint-plugin": "^2.14.0", 36 | "@typescript-eslint/parser": "^2.14.0", 37 | "benchmark": "^2.1.4", 38 | "dompurify": "^2.0.7", 39 | "eslint": "^6.8.0", 40 | "eslint-config-prettier": "^6.9.0", 41 | "eslint-plugin-prettier": "^3.1.2", 42 | "jest": "^26.6.3", 43 | "jsdom": "^15.2.1", 44 | "linkifyjs": "^2.1.8", 45 | "planer": "^1.1.1", 46 | "prettier": "^1.19.1", 47 | "sanitize-html": "^1.20.1", 48 | "talonjs": "^1.0.24", 49 | "ts-jest": "^26.5.4", 50 | "typescript": "^4.2.3", 51 | "xmldom": "^0.2.1" 52 | }, 53 | "dependencies": { 54 | "@types/cheerio": "^0.22.15", 55 | "autolinker": "^3.11.1", 56 | "cheerio": "^1.0.0-rc.3", 57 | "email-validator": "^2.0.4", 58 | "htmlparser2": "^6.0.1", 59 | "is-valid-domain": "^0.0.20", 60 | "regx": "^1.0.4" 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /benchmarks/libraries.ts: -------------------------------------------------------------------------------- 1 | /* 2 | These benchmarks compare different libraries (not all are used by this project). 3 | This helps estimate the cost of using any of them. 4 | */ 5 | 6 | import { JSDOM } from 'jsdom'; 7 | import sanitizeHtml from 'sanitize-html'; 8 | import * as htmlparser2 from 'htmlparser2'; 9 | import createDOMPurify from 'dompurify'; 10 | import cheerio from 'cheerio'; 11 | import Talon from 'talonjs'; 12 | import XmlDom from 'xmldom'; 13 | import planer from 'planer'; 14 | import Autolinker from 'autolinker'; 15 | import linkifyHtml from 'linkifyjs/html'; 16 | 17 | import { createSuite } from './utils'; 18 | import EMAILS from './emails'; 19 | 20 | const suite = createSuite(); 21 | 22 | // Test parsing a real-world, HTML-heavy, marketing email 23 | suite 24 | .add('Parse # JSDom # Marketing email', () => { 25 | new JSDOM(EMAILS.MARKETING); 26 | }) 27 | .add('Parse # Cheerio # Marketing email', () => { 28 | cheerio.load(EMAILS.MARKETING); 29 | }) 30 | .add('Parse # HtmlParser2 # Marketing email', () => { 31 | htmlparser2.parseDOM(EMAILS.MARKETING); 32 | }) 33 | .add('Parse # XmlDom # Marketing email', () => { 34 | const parser = new XmlDom.DOMParser(); 35 | parser.parseFromString(EMAILS.MARKETING); 36 | }); 37 | 38 | const window = new JSDOM().window; 39 | const DOMPurify = createDOMPurify(window); 40 | 41 | suite 42 | .add('Sanitizing # Marketing # DOMPurify', () => { 43 | DOMPurify.sanitize(EMAILS.MARKETING, { WHOLE_DOCUMENT: true }); 44 | }) 45 | .add('Sanitizing # Marketing # sanitize-html', () => { 46 | sanitizeHtml(EMAILS.MARKETING); 47 | }); 48 | 49 | suite 50 | .add('Quotation # TalonJS # Marketing email', () => { 51 | Talon.quotations.extractFromHtml(EMAILS.MARKETING); 52 | }) 53 | .add('Quotation # Planer with JSDom # Marketing email', () => { 54 | const document = new JSDOM().window.document; 55 | planer.extractFrom(EMAILS.MARKETING, 'text/html', document); 56 | }); 57 | 58 | suite 59 | .add('Auto links # Marketing # autolink', () => { 60 | Autolinker.link(EMAILS.MARKETING); 61 | }) 62 | .add('Auto links # Marketing # linkify', () => { 63 | linkifyHtml(EMAILS.MARKETING); 64 | }); 65 | 66 | suite.run(); 67 | -------------------------------------------------------------------------------- /src/tests/walkBackwards.test.ts: -------------------------------------------------------------------------------- 1 | import cheerio from 'cheerio'; 2 | import expect from 'expect'; 3 | import walkBackwards from '../walkBackwards'; 4 | import { getTopLevelElement } from '../cheerio-utils'; 5 | 6 | function printEl(el: CheerioElement): string { 7 | if (el.type === 'text') { 8 | return (el.data as string).trim(); 9 | } else { 10 | return el.tagName; 11 | } 12 | } 13 | 14 | describe('walkBackwards', () => { 15 | it('should walk depth-first, in reverse order', () => { 16 | const $ = cheerio.load(` 17 | 18 | 19 | 20 | 21 | text-j 22 | 23 | text-h 24 | 25 | text-f 26 | 27 | text-e 28 | 29 | text-c 30 | 31 | text-b 32 | 33 | text-a 34 | 35 | 36 | 37 | `); 38 | 39 | const order = []; 40 | for (const el of walkBackwards(getTopLevelElement($))) { 41 | order.push(printEl(el)); 42 | } 43 | 44 | expect(order).toEqual([ 45 | '', 46 | 'text-a', 47 | 'text-b', 48 | 'text-c', 49 | 'd', 50 | 'text-e', 51 | 'text-f', 52 | 'g', 53 | 'text-h', 54 | 'i', 55 | 'text-j', 56 | 'k', 57 | '', 58 | 'body', 59 | ]); 60 | }); 61 | 62 | it('should be breakable', () => { 63 | const $ = cheerio.load(` 64 | 65 | text-j 66 | 67 | text-h 68 | 69 | text-f 70 | 71 | text-e 72 | 73 | text-c 74 | 75 | text-b 76 | 77 | text-a 78 | 79 | `); 80 | 81 | const order = []; 82 | for (const el of walkBackwards(getTopLevelElement($))) { 83 | if (printEl(el) === 'text-a') { 84 | continue; 85 | } 86 | 87 | order.push(printEl(el)); 88 | 89 | if (printEl(el) === 'text-e') { 90 | break; 91 | } 92 | } 93 | 94 | expect(order).toEqual([ 95 | '', 96 | // 'text-a', 97 | 'text-b', 98 | 'text-c', 99 | 'd', 100 | 'text-e', 101 | // 'text-f', 102 | // 'g', 103 | // 'text-h', 104 | // 'i', 105 | // 'text-j', 106 | // 'k', 107 | // '', 108 | // 'root', 109 | ]); 110 | }); 111 | }); 112 | -------------------------------------------------------------------------------- /src/blockRemoteContent/blockRemoteContentInStyle.ts: -------------------------------------------------------------------------------- 1 | import regx from 'regx'; 2 | 3 | /** 4 | * Disable all remote-content in styles, and replace images 5 | * with the given image URL. 6 | * 7 | * Non-image URLs that are replaced will no longer be valid, and ignored. 8 | * Dirty, but that's what we want. 9 | */ 10 | function blockRemoteContentInStyle( 11 | $: CheerioStatic, 12 | replacementImageUrl: string 13 | ) { 14 | // 12 | 15 | 16 | 17 |
18 |
19 | 20 | Sent from Mailspring 28 | 29 |

30 | Here is the body of the message, with a text link.com 31 | 32 | 33 | 34 | An hoc usque quaque, aliter in vita? Terram, mihi crede, ea lanx et maria deprimet. Duo 35 | Reges: constructio interrete. Id est enim, de quo quaerimus. Parvi enim primo ortu sic 36 | iacent, tamquam omnino sine animo sint. Quis est tam dissimile homini. Claudii libidini, qui tum 37 | erat summo ne imperio, dederetur. 38 | Beatus sibi videtur esse moriens. 39 |

40 | 41 |

42 | 43 |
44 |

45 |
46 |
47 |
48 |
49 | On Tue, Dec 31, 2019 at 12:08 AM Nicolas Gaborit <hello@soreine.dev> wrote:
52 |
53 |
57 |
58 |
59 |

60 | This is the replied message 61 |

62 |
63 |
64 |
65 |
66 |
67 |
--
68 |
69 |
70 |
Nicolas Gaborit (Soreine)
71 |
Web Developper
72 |
73 |
74 |
75 | 76 | 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tempo-email-parser 2 | 3 | Parse and extract main message from an HTML email. 4 | Also runs several transformations to the email so that it can be displayed safely and correctly inside a browser or Electron iframe for example. 5 | 6 | - Extract quotations (replies), signatures 7 | - Remove scripts, trackers 8 | - Convert text links into anchor tags 9 | - Remove trailing whitespaces 10 | - Block remote content 11 | 12 | ## Usage 13 | 14 | ```ts 15 | import prepareMessage, { 16 | blockRemoteContent, 17 | linkify, 18 | } from 'tempo-email-parser'; 19 | 20 | const emailHtml = ` 21 |
Hello there
22 | `; 23 | 24 | const remoteContentReplacements = { 25 | image: 'replacement-image-url', // Remote image URLs replacement. Default to 1x100 transparent image 26 | other: '#', // Other URLs replacements 27 | }; 28 | 29 | // All options default to false. 30 | const OPTIONS = { 31 | noQuotations: true, 32 | autolink: true, 33 | forceViewport: '', 34 | noRemoteContent: true, 35 | remoteContentReplacements, 36 | includeStyle: ` 37 | .custom-style { 38 | color: red; 39 | } 40 | `, 41 | }; 42 | 43 | const { 44 | // The extracted message 45 | messageHtml, 46 | // The whole message processed, including quotations and signature 47 | completeHtml, 48 | // Did we removed quotes or signature 49 | didFindQuotation, 50 | } = prepareMessage(emailHtml, OPTIONS); 51 | ``` 52 | 53 | Autolinking and remote-content blocking are available as separate functions as well. 54 | 55 | ```js 56 | const withLinks = linkify(messageHtml); 57 | 58 | const noRemoteContent = blockRemoteContent( 59 | messageHtml, 60 | remoteContentReplacements 61 | ); 62 | ``` 63 | 64 | ## Development 65 | 66 | For tests 67 | 68 | ``` 69 | yarn run test 70 | ``` 71 | 72 | The main function `prepareMessage` has a list of fixtures used for tests. The input HTML are files named `xxx.input.html`. The expected outputs are named `xxx.output-complete.html` and `xxx.output-message.html`. 73 | 74 | ### `yarn run generate:fixtures` 75 | 76 | This script generates the respective outputs files for any `.input.html` file found without corresponding outputs. 77 | 78 | To easily add a fixture from a real-world email, you can put the input HTML at `/src/tests/prepareMessage/my-test.input.html`, and then run `yarn run generate:fixtures` to generate the output files based on what `prepareMessage` produced. You now only have to check that the outputs look good and make adjustments if necessary. 79 | 80 | ### Benchmarks 81 | 82 | There are benchmarks to ensure the tool remains fast to not alter UI performance, and also to compare some external libraries. See the `benchmark` folder. -------------------------------------------------------------------------------- /src/tests/prepareMessage/all-in-one.output-complete.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 11 | 12 | 13 | 18 | 19 | 20 |
21 |
22 |

23 | Here is the body of the message, with a text 24 | link.com 25 | 26 | 29 | 30 | An hoc usque quaque, aliter in vita? Terram, mihi crede, ea lanx et maria deprimet. Duo Reges: 31 | constructio interrete. Id est enim, de quo quaerimus. Parvi enim primo ortu sic iacent, tamquam omnino 32 | sine animo sint. Quis est tam dissimile homini. Claudii libidini, qui tum erat summo ne imperio, dederetur. 33 | Beatus sibi videtur esse moriens. 36 |

37 | 38 |

39 |
40 |

41 |
42 |
43 |
44 |
45 | On Tue, Dec 31, 2019 at 12:08 AM Nicolas Gaborit <hello@soreine.dev> wrote:
50 |
51 |
55 |
56 |
57 |

58 | This is the replied message 59 |

60 |
61 |
62 |
63 |
64 |
65 |
--
66 |
67 |
68 |
Nicolas Gaborit (Soreine)
69 |
Web Developper
70 |
71 |
72 |
73 | 74 | 75 | -------------------------------------------------------------------------------- /src/findQuoteString.ts: -------------------------------------------------------------------------------- 1 | import regx from 'regx'; 2 | import { 3 | isDocument, 4 | isText, 5 | isEmpty, 6 | getTopLevelElement, 7 | } from './cheerio-utils'; 8 | import walkBackwards from './walkBackwards'; 9 | 10 | // https://github.com/quentez/talonjs/blob/26de2941d9ea739e12853534717a820c72a6f8e9/src/Regexp.ts#L9:L9 11 | const ON_REGEXP = regx('i')` 12 | ^\s*(${ 13 | // Beginning of the line. 14 | [ 15 | 'On', // English, 16 | 'Le', // French 17 | 'W dniu', // Polish 18 | 'Op', // Dutch 19 | 'Am', // German 20 | 'På', // Norwegian 21 | 'Den', // Swedish, Danish, 22 | 'Em', // Portuguese 23 | 'El', // Spanish 24 | ].join('|') 25 | }) 26 | \s 27 | `; 28 | 29 | const WROTE_REGEXP = regx('i')` 30 | \s(${ 31 | // Ending of the line. 32 | [ 33 | 'wrote', 34 | 'sent', // English 35 | 'a écrit', // French 36 | 'napisał', // Polish 37 | 'schreef', 38 | 'verzond', 39 | 'geschreven', // Dutch 40 | 'schrieb', // German 41 | 'skrev', // Norwegian, Swedish 42 | 'escreveu', // Portuguese 43 | 'escribió', // Spanish 44 | ].join('|') 45 | }) 46 | \s?:?\s*$ 47 | `; 48 | 49 | function isQuoteHeaderStart(el: CheerioElement): boolean { 50 | return ON_REGEXP.test(el.data as string); 51 | } 52 | 53 | function isQuoteHeaderEnd(el: CheerioElement): boolean { 54 | return WROTE_REGEXP.test(el.data as string); 55 | } 56 | 57 | /** 58 | * Loop through doc DOM-element starting from the bottom and search for a string like: 59 | * "On Friday, 27 November 2015, Your Tempo wrote:" 60 | * These nodes are returned to be deleted. 61 | */ 62 | function findQuoteString($: CheerioStatic): CheerioElement[] { 63 | const nodesToRemove: CheerioElement[] = []; 64 | 65 | // If we have seen a "... wrote:" yet 66 | let seenQuoteHeaderEnd = false; 67 | 68 | const top = getTopLevelElement($); 69 | 70 | // loop through childNodes backwards 71 | for (const el of walkBackwards(top)) { 72 | if (isDocument(el)) { 73 | continue; 74 | } 75 | 76 | if (isText(el)) { 77 | if (isEmpty(el)) { 78 | // Ignore empty texts 79 | continue; 80 | } 81 | 82 | if (!seenQuoteHeaderEnd) { 83 | if (isQuoteHeaderEnd(el)) { 84 | seenQuoteHeaderEnd = true; 85 | nodesToRemove.push(el); 86 | 87 | // Check if On... + wrote... are in the same node... 88 | if (isQuoteHeaderStart(el)) { 89 | // We're done. Stop iterating 90 | break; 91 | } else { 92 | continue; 93 | } 94 | } else { 95 | // We have reached content. Stop iterating 96 | break; 97 | } 98 | } else { 99 | // We are inside the quote header. So we remove everything 100 | nodesToRemove.push(el); 101 | // Until we reach the start of the header 102 | if (isQuoteHeaderStart(el)) { 103 | // This node is also the start of the header. We're done 104 | break; 105 | } else { 106 | continue; 107 | } 108 | } 109 | } else { 110 | // It's not a text 111 | if (seenQuoteHeaderEnd) { 112 | // It's inside the quote 113 | nodesToRemove.push(el); 114 | } 115 | continue; 116 | } 117 | } 118 | 119 | return nodesToRemove; 120 | } 121 | 122 | export default findQuoteString; 123 | -------------------------------------------------------------------------------- /src/removeQuotations.ts: -------------------------------------------------------------------------------- 1 | import { isText, isImage, toArray, isEmptyish } from './cheerio-utils'; 2 | import findQuoteString from './findQuoteString'; 3 | 4 | /** 5 | * Remove quotations (replied messages) and signatures from the HTML 6 | */ 7 | function removeQuotations($: CheerioStatic): { didFindQuotation: boolean } { 8 | let didFindQuotation = false; 9 | 10 | // Remove blockquote elements 11 | const quoteElements = findAllQuotes($); 12 | didFindQuotation = didFindQuotation || quoteElements.length > 0; 13 | quoteElements.each((i, el) => $(el).remove()); 14 | 15 | // When all blockquotes are removed, remove any remaining quote header text 16 | const remainingQuoteNodes = findQuoteString($); 17 | didFindQuotation = didFindQuotation || remainingQuoteNodes.length > 0; 18 | remainingQuoteNodes.forEach(el => $(el).remove()); 19 | 20 | return { didFindQuotation }; 21 | } 22 | 23 | /** 24 | * Returns a selection of all quote elements that should be removed 25 | */ 26 | function findAllQuotes($: CheerioStatic): Cheerio { 27 | const quoteElements: Cheerio = $( 28 | [ 29 | '.gmail_quote', 30 | 'blockquote', 31 | // Signatures. 32 | '.gmail_signature', 33 | 'signature', 34 | // ENHANCEMENT: Add findQuotesAfterMessageHeaderBlock 35 | // ENHANCEMENT: Add findQuotesAfter__OriginalMessage__ 36 | ].join(', ') 37 | ); 38 | 39 | // Ignore inline quotes. Quotes that are followed by non-quote blocks. 40 | const quoteElementsSet = new Set(toArray(quoteElements)); 41 | const withoutInlineQuotes = quoteElements.filter( 42 | (i, el) => !isInlineQuote(el, quoteElementsSet) 43 | ); 44 | 45 | return withoutInlineQuotes; 46 | } 47 | 48 | /** 49 | * Returns true if the element looks like an inline quote: 50 | * it is followed by unquoted elements 51 | * 52 | * Works best if non-meaningful content were stripped before, like tracking pixels. 53 | * 54 | * Based on 55 | * https://github.com/Foundry376/Mailspring/blob/aa125f0136c093e0aa3deb7c46bb6433f6ede6b9/app/src/services/quoted-html-transformer.ts#L228:L228 56 | */ 57 | function isInlineQuote( 58 | el: CheerioElement, 59 | quoteSet: Set 60 | ): boolean { 61 | const seen = new Set(); 62 | let head = el; 63 | 64 | while (head) { 65 | // advance to the next sibling, or the parent's next sibling 66 | while (head && !head.nextSibling) { 67 | head = head.parentNode; 68 | } 69 | if (!head) { 70 | break; 71 | } 72 | head = head.nextSibling; 73 | 74 | // search this branch of the tree for any text nodes / images that 75 | // are not contained within a matched quoted text block. We mark 76 | // the subtree as "seen" because we traverse upwards, and would 77 | // re-evaluate the subtree on each iteration otherwise. 78 | const pile = [head]; 79 | let node = null; 80 | 81 | while ((node = pile.pop())) { 82 | if (seen.has(node)) { 83 | continue; 84 | } 85 | if (quoteSet.has(node)) { 86 | continue; 87 | } 88 | if (node.childNodes) { 89 | pile.push(...node.childNodes); 90 | } 91 | if (isImage(node)) { 92 | return true; 93 | } 94 | if (isText(node) && !isEmptyish(node)) { 95 | return true; 96 | } 97 | } 98 | seen.add(head); 99 | } 100 | 101 | return false; 102 | } 103 | 104 | export default removeQuotations; 105 | -------------------------------------------------------------------------------- /src/tests/linkify.test.ts: -------------------------------------------------------------------------------- 1 | import expect from 'expect'; 2 | import linkify from '../linkify'; 3 | 4 | describe('linkify', () => { 5 | it('should linkify URL in paragraph', () => { 6 | const email = `

7 | Quis est tam dissimile 8 | homini. Claudii libidini, qui tum erat summo ne imperio, dederetur. 9 | loripsum.net 10 |

`; 11 | 12 | expect(linkify(email)).toBe(`

13 | Quis est tam dissimile 14 | homini. Claudii libidini, qui tum erat summo ne imperio, dederetur. 15 | loripsum.net 16 |

`); 17 | }); 18 | 19 | it('should not linkify URLs in anchor tags', () => { 20 | const email = `

21 | Quis est tam dissimile 22 | homini. Claudii libidini, qui tum erat summo ne imperio, dederetur. 23 | loripsum.net 24 |

`; 25 | 26 | expect(linkify(email)).toBe(email); 27 | }); 28 | 29 | it('should linkify other kinds of URLs', () => { 30 | const email = `

31 | hello@email.com 32 | yourtempo.co 33 | http://yourtempo.co 34 | https://yourtempo.co 35 | ftp://yourtempo.co 36 | +33578758785 37 |

`; 38 | 39 | expect(linkify(email)).toBe(`

40 | hello@email.com 41 | yourtempo.co 42 | http://yourtempo.co 43 | https://yourtempo.co 44 | ftp://yourtempo.co 45 | +33578758785 46 |

`); 47 | }); 48 | 49 | it('should ignore script tags, style tags, and head', () => { 50 | const email = ` 51 | 52 | 53 | 54 | Not a link.com 55 | 60 | 61 | 62 | Hello 63 | 68 | 71 | 72 | 73 | `; 74 | 75 | expect(linkify(email)).toBe(email); 76 | }); 77 | 78 | it('should work fine with DOCTYPE', () => { 79 | const email = ` 80 | 81 | 82 | 83 | 84 | 85 | Not a link.com 86 | 91 | 92 | 93 | Hello 94 | 99 | 102 | 103 | 104 | `; 105 | 106 | expect(linkify(email)).toBe(email); 107 | }); 108 | }); 109 | -------------------------------------------------------------------------------- /src/tests/enforceViewport.test.ts: -------------------------------------------------------------------------------- 1 | import cheerio from 'cheerio'; 2 | import enforceViewport from '../enforceViewport'; 3 | import { expectHtml } from './utils'; 4 | 5 | describe('enforceViewport', () => { 6 | it('should add missing viewport', () => { 7 | const email = ` 8 | 9 | 10 |
Hello
11 | 12 | `; 13 | 14 | const $ = cheerio.load(email); 15 | enforceViewport($); 16 | 17 | const actual = $.html(); 18 | 19 | const expected = ` 20 | 21 | 22 | 26 | 27 | 28 |
Hello
29 | 30 | 31 | `; 32 | 33 | expectHtml(actual, expected); 34 | }); 35 | 36 | it('should add missing head tag', () => { 37 | const email = ` 38 | 39 |
Hello
40 | 41 | `; 42 | 43 | const $ = cheerio.load(email); 44 | enforceViewport($); 45 | 46 | const actual = $.html(); 47 | 48 | const expected = ` 49 | 50 | 51 | 55 | 56 | 57 |
Hello
58 | 59 | 60 | `; 61 | 62 | expectHtml(actual, expected); 63 | }); 64 | 65 | it('should add missing html tag', () => { 66 | const email = ` 67 |
Hello
68 | `; 69 | 70 | const $ = cheerio.load(email); 71 | enforceViewport($); 72 | 73 | const actual = $.html(); 74 | 75 | const expected = ` 76 | 77 | 78 | 82 | 83 | 84 |
Hello
85 | 86 | 87 | `; 88 | 89 | expectHtml(actual, expected); 90 | }); 91 | 92 | it('should replace existing viewports', () => { 93 | const email = ` 94 | 95 | 96 | 97 | 98 | 99 | 100 |
Hello
101 | 102 | 103 | `; 104 | 105 | const $ = cheerio.load(email); 106 | enforceViewport($); 107 | 108 | const actual = $.html(); 109 | 110 | const expected = ` 111 | 112 | 113 | 117 | 118 | 119 | 120 |
Hello
121 | 122 | 123 | `; 124 | 125 | expectHtml(actual, expected); 126 | }); 127 | 128 | it('should handle invalid HTML', () => { 129 | const email = ` 130 |
131 | 132 |

Forwarding you a message

133 |
134 | `; 135 | 136 | const $ = cheerio.load(email); 137 | enforceViewport($); 138 | 139 | const actual = $.html(); 140 | 141 | const expected = ` 142 | 143 | 144 | 148 | 149 | 150 |
151 |

Forwarding you a message

152 |
153 | 154 | 155 | `; 156 | 157 | expectHtml(actual, expected); 158 | }); 159 | }); 160 | -------------------------------------------------------------------------------- /src/blockRemoteContent/blockRemoteContentInAttributes.ts: -------------------------------------------------------------------------------- 1 | import { ReplacementOptions } from './'; 2 | 3 | // https://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value 4 | const TAGS_THAT_HAVE_URL_ATTRIBUTES: { [key: string]: string[] } = { 5 | // Keep this one, since it won't be fetched unless clicked. 6 | // a: ['href'], 7 | applet: ['codebase'], 8 | area: ['href'], 9 | audio: ['src'], 10 | base: ['href'], 11 | blockquote: ['cite'], 12 | body: ['background'], 13 | button: ['formaction'], 14 | command: ['icon'], 15 | del: ['cite'], 16 | embed: ['src'], 17 | form: ['action'], 18 | frame: ['longdesc', 'src'], 19 | head: ['profile'], 20 | html: ['manifest'], 21 | iframe: ['longdesc', 'src'], 22 | img: ['longdesc', 'src', 'usemap'], 23 | input: ['src', 'usemap', 'formaction'], 24 | ins: ['cite'], 25 | link: ['href'], 26 | meta: ['content'], 27 | object: ['classid', 'codebase', 'data', 'usemap'], 28 | q: ['cite'], 29 | script: ['src'], 30 | source: ['src'], 31 | track: ['src'], 32 | video: ['poster', 'src'], 33 | }; 34 | 35 | /** 36 | * Replace all remote URLs in tags' attributes 37 | */ 38 | function blockRemoteContentInAttributes( 39 | $: CheerioStatic, 40 | replacements: Required 41 | ) { 42 | const query = Object.keys(TAGS_THAT_HAVE_URL_ATTRIBUTES).join(','); 43 | 44 | $(query).each((_, el: CheerioElement) => { 45 | const $el = $(el); 46 | 47 | if (el.tagName === 'meta') { 48 | if (isMetaRefresh($el)) { 49 | $el.remove(); 50 | } 51 | return; 52 | } 53 | 54 | getUrlAttributes(el.tagName, $el) 55 | .filter(attr => { 56 | const value = $el.attr(attr); 57 | return !value || isRemoteUrl(value); 58 | }) 59 | .forEach(attr => { 60 | const replacement = isImageAttribute(attr) 61 | ? replacements.image 62 | : replacements.other; 63 | $el.attr(attr, replacement); 64 | }); 65 | }); 66 | } 67 | 68 | function isMetaRefresh(meta: Cheerio): boolean { 69 | // https://www.emailprivacytester.com/testDescription?test=metaRefresh 70 | const httpEquiv = meta.attr('http-equiv') || ''; 71 | const content = meta.attr('content') || ''; 72 | return /^refresh$/i.test(httpEquiv) || /^\d*;\s*URL=/.test(content); 73 | } 74 | 75 | /** 76 | * Returns the list of URL attributes declared on this element 77 | */ 78 | function getUrlAttributes( 79 | tagName: string, 80 | // Cheerio scoped on the element 81 | $el: Cheerio 82 | ): string[] { 83 | const attrs = $el.attr(); 84 | const potentialAttributes: string[] = 85 | TAGS_THAT_HAVE_URL_ATTRIBUTES[tagName] || []; 86 | 87 | return potentialAttributes.filter( 88 | Object.prototype.hasOwnProperty.bind(attrs) 89 | ); 90 | } 91 | 92 | const IMAGE_ATTRIBUTES = new Set([ 93 | 'background', 94 | 'icon', 95 | 'placeholder', 96 | 'poster', 97 | 'src', 98 | 'srcset', 99 | ]); 100 | 101 | function isImageAttribute(attr: string): boolean { 102 | return IMAGE_ATTRIBUTES.has(attr); 103 | } 104 | 105 | function isRemoteUrl(attributeValue: string) { 106 | // There can be several URLs. We consider them remote then. 107 | // (for example img srcset: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img#attr-srcset) 108 | const isLocal = /^(?:data:|cid:)\S*$/.test(attributeValue); 109 | return !isLocal; 110 | } 111 | 112 | export default blockRemoteContentInAttributes; 113 | -------------------------------------------------------------------------------- /benchmarks/utils.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * This file contains utils to work with BenchmarkJS results. 3 | * Those are adapted from past projects of mines (Soreine). 4 | */ 5 | 6 | import Benchmark from 'benchmark'; 7 | 8 | type SuccessResult = { 9 | name: string; 10 | stats: { 11 | rme: number; 12 | mean: number; 13 | hz: number; 14 | runs: number; 15 | }; 16 | }; 17 | 18 | type ErrorResult = { 19 | name: string; 20 | error: string; 21 | }; 22 | 23 | type BenchmarkResult = SuccessResult | ErrorResult; 24 | 25 | type BenchmarkCycleEvent = any; 26 | 27 | /** 28 | * Create a BenchmarkJS suite 29 | */ 30 | function createSuite(): Benchmark.Suite { 31 | const suite = new Benchmark.Suite(); 32 | 33 | // On each benchmark completion 34 | suite.on('cycle', (event: any) => { 35 | const result = extractResult(event); 36 | printResult(result); 37 | }); 38 | 39 | return suite; 40 | } 41 | 42 | interface Suite { 43 | on(eventName: string, handler: (event: any) => void); 44 | add(title: string, fn: () => void): Suite; 45 | run(); 46 | } 47 | 48 | /** 49 | * Create a basic benchmark suite that follows the same API than BenchmarkJS, 50 | * but only run the code once (not feasible with BenchmarkJS). 51 | * 52 | * This is useful to avoid the "warmup" effect of V8, which sometimes optimize 53 | * any piece of code execution the more it runs it. 54 | */ 55 | 56 | function createBasicSuite(): Suite { 57 | return { 58 | on() {}, 59 | add(title, fn) { 60 | console.log(title); 61 | console.time('Time'); 62 | fn(); 63 | console.timeEnd('Time'); 64 | console.log(''); 65 | return this; 66 | }, 67 | run() {}, 68 | }; 69 | } 70 | 71 | /** 72 | * Creates a result object for a benchmark cycle event 73 | */ 74 | function extractResult(event: Benchmark.Event): BenchmarkResult { 75 | const { target } = event; 76 | const { error, name, hz, stats } = target as any; 77 | 78 | if (error) { 79 | return { 80 | name, 81 | error, 82 | }; 83 | } else { 84 | const { rme, mean, sample } = stats; 85 | 86 | return { 87 | name, 88 | stats: { 89 | hz, 90 | rme, 91 | mean, 92 | runs: sample.length, 93 | }, 94 | }; 95 | } 96 | } 97 | 98 | function isSuccess(result: BenchmarkResult): result is SuccessResult { 99 | return !(result as ErrorResult).error; 100 | } 101 | 102 | /** 103 | * Pretty print a benchmark result. 104 | * Mean difference, and rme computations inspired from 105 | * https://github.com/facebook/immutable-js/blob/master/resources/bench.js 106 | */ 107 | 108 | function printResult(result: BenchmarkResult) { 109 | const { name } = result; 110 | 111 | print(name); 112 | formatPerf(result).map(s => print(indent(2), s)); 113 | print(''); // newline 114 | } 115 | 116 | /** 117 | * Pretty format a benchmark's ops/sec along with its sample size 118 | * @param {Object} result 119 | * @return {String} 120 | */ 121 | 122 | function formatPerf(result: BenchmarkResult): string[] { 123 | if (!isSuccess(result)) return [result.error]; 124 | const { hz, runs } = result.stats; 125 | const opsSec = Benchmark.formatNumber(+`${hz.toPrecision(4)}`); 126 | const opDuration = Benchmark.formatNumber(+`${(1000 / hz).toPrecision(4)}`); 127 | 128 | return [ 129 | `${opDuration} ms`, 130 | `${opsSec} ops/sec`, 131 | `(${runs} runs sampled)`, 132 | `Relative Margin of Error: \xb1${result.stats.rme.toFixed(2)}%`, 133 | ]; 134 | } 135 | 136 | function indent(level = 0) { 137 | return Array(level + 1).join(' '); 138 | } 139 | 140 | function print(...strs: any[]) { 141 | console.log(...strs); 142 | } 143 | 144 | export { createBasicSuite, createSuite, extractResult, printResult }; 145 | -------------------------------------------------------------------------------- /benchmarks/emails/basic-lorem-gmail.eml: -------------------------------------------------------------------------------- 1 | MIME-Version: 1.0 2 | Date: Tue, 31 Dec 2019 00:08:12 +0100 3 | Message-ID: 4 | Subject: [TEST] Sample Gmail email for tempo-email-parser 5 | From: Nicolas Gaborit 6 | To: Nicolas Gaborit 7 | Content-Type: multipart/alternative; boundary="000000000000f777c2059af3ea8a" 8 | 9 | --000000000000f777c2059af3ea8a 10 | Content-Type: text/plain; charset="UTF-8" 11 | 12 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. *An hoc usque 13 | quaque, aliter in vita?* Terram, mihi crede, ea lanx et maria deprimet. Duo 14 | Reges: constructio interrete. *Id est enim, de quo quaerimus.* Parvi enim 15 | primo ortu sic iacent, tamquam omnino sine animo sint. Quis est tam 16 | dissimile homini. Claudii libidini, qui tum erat summo ne imperio, 17 | dederetur. Beatus sibi videtur esse moriens. 18 | 19 | Hoc dixerit potius Ennius: Nimium boni est, cui nihil est mali. Egone 20 | quaeris, inquit, quid sentiam? Si mala non sunt, iacet omnis ratio 21 | Peripateticorum. Itaque rursus eadem ratione, qua sum paulo ante usus, 22 | haerebitis. Paulum, cum regem Persem captum adduceret, eodem flumine 23 | invectio? Mihi enim erit isdem istis fortasse iam utendum. Ab hoc autem 24 | quaedam non melius quam veteres, quaedam omnino relicta. Tamen a proposito, 25 | inquam, aberramus. 26 | 27 | Sic, et quidem diligentius saepiusque ista loquemur inter nos agemusque 28 | communiter. Quid sequatur, quid repugnet, vident. 29 | Serpere anguiculos, nare anaticulas, evolare merulas, cornibus uti videmus 30 | boves, nepas aculeis. Ego vero volo in virtute vim esse quam maximam; 31 | Aufert enim sensus actionemque tollit omnem. Qua tu etiam inprudens utebare 32 | non numquam. Apud ceteros autem philosophos, qui quaesivit aliquid, tacet; 33 | Totum autem id externum est, et quod externum, id in casu est. 34 | 35 | --000000000000f777c2059af3ea8a 36 | Content-Type: text/html; charset="UTF-8" 37 | Content-Transfer-Encoding: quoted-printable 38 | 39 |
Lorem ipsum dolor sit amet, consectetur adipiscing elit. An hoc usque q= 41 | uaque, aliter in vita? Terram, mihi crede, ea lanx et maria deprimet. D= 42 | uo Reges: constructio interrete. Id est enim, de quo quaerimus. 43 | Parvi enim primo ortu sic iacent, tamquam omnino sine animo sint. Quis=20 44 | est tam dissimile homini. Claudii libidini, qui tum erat summo ne=20 45 | imperio, dederetur. Beat= 46 | us sibi videtur esse moriens.

47 | 48 |

Hoc dixerit potius Ennius: Nimium boni est, cui nihil est mali. Egone qu= 49 | aeris, inquit, quid sentiam? 50 | Si mala non sunt, iacet omnis ratio Peripateticorum. Itaque rursus=20 51 | eadem ratione, qua sum paulo ante usus, haerebitis. Paulum, cum regem=20 52 | Persem captum adduceret, eodem flumine invectio? Mihi enim erit isdem=20 53 | istis fortasse iam utendum. Ab hoc autem quaedam non melius quam=20 54 | veteres, quaedam omnino relicta. Tamen a proposito, inquam, aberramus.

55 | 56 |

Sic, et quidem diligentius saepiusque ista loquemur inter nos agemusque = 57 | communiter. Quid sequatu= 58 | r, quid repugnet, vident. 59 | Serpere anguiculos, nare anaticulas, evolare merulas, cornibus uti=20 60 | videmus boves, nepas aculeis. Ego vero volo in virtute vim esse quam=20 61 | maximam; Aufert enim sensus actionemque tollit omnem. Qua tu etiam=20 62 | inprudens utebare non numquam. Apud ceteros autem philosophos, qui=20 63 | quaesivit aliquid, tacet; Totum autem id externum est, et quod externum, 64 | id in casu est.

65 | 66 |
67 | 68 | --000000000000f777c2059af3ea8a-- -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [unreleased] 9 | 10 | ## [0.7.7] - 2021-09-29 11 | 12 | - #32 Added exception to accept inside a blockquote (for when replying) 13 | 14 | ## [0.7.6] - 2021-09-22 15 | 16 | - #35 Avoid linkifying URLs in GitHub's emoji components 17 | 18 | ## [0.7.5] - 2021-03-30 19 | 20 | - #31 Automatically fix emails with a broken . 21 | 22 | ## [0.7.4] - 2020-12-28 23 | 24 | - #30 Fixed unwanted removal of spaces near the end of emails. 25 | 26 | ## [0.7.3] - 2020-09-25 27 | 28 | - #29 Fixed a bug where the trimming of messages would break the HTML structure and fail to extract quotes 29 | 30 | ## [0.7.2] - 2020-03-25 31 | 32 | - #26 `enforceMobileViewport` should always insert the viewport-tag to the head element in the top 33 | 34 | ## [0.7.0] - 2020-01-20 35 | 36 | - Changed package name to `@yourtempo/tempo-email-parser` 37 | - Changed all default options for `prepareMessage` to be false. You now need to explicitly enable each feature. 38 | - `enforceMobileViewport` option is now `forceViewport` and is an optional HTML string for the viewport meta tag to set. 39 | - Add option to include custom style 40 | 41 | ## [0.6.1] - 2020-01-20 42 | 43 | - Added option for remote-content replacements in `prepareMessage` 44 | 45 | ## [0.6.0] - 2020-01-17 46 | 47 | - Only block refresh meta tags when blocking remote content 48 | - Export standalone `blockRemoteContent` function 49 | 50 | ## [0.5.0] - 2020-01-17 51 | 52 | - #25 Upgraded to cheerio@1.0.0-rc3, which fix cases of breaking HTML 53 | - Replaced linkifyjs dependency with autolink. Faster, better, stronger. 54 | - #23 Detect "On date, somebody wrote:" in different languages. 55 | - Do not consider attachements URLs as remote content (cid: URLs) 56 | - Fix whitespace trimming which was dumping nodes by mistake 57 | 58 | ## [0.4.0] - 2020-01-15 59 | 60 | - #20 Added detection of inline quotes, as to not remove them 61 | - #22 Fixed bug with detection of reply headers (On ... wrote:) 62 | - Removed options `noTrailingWhitespaces`, `noScript`, and `noTracker` until they're actually necessary 63 | 64 | ## [0.3.0] - 2020-01-14 65 | 66 | Replaced TalonJS with old quote removal implementation 67 | 68 | ## [0.2.0] - 2020-01-13 69 | 70 | Added remote-content blocking 71 | 72 | ## [0.1.0] - 2020-01-09 73 | 74 | First version for integration tests. Include the following features 75 | 76 | - Extract quotations 77 | - Extract basic signatures 78 | - Remove script tags and comments 79 | - Remove pixel trackers 80 | - Remove trailing whitespaces 81 | - Force viewport for mobile device 82 | 83 | [unreleased]: https://github.com/yourtempo/tempo-email-parser/compare/v0.7.7...HEAD 84 | [0.7.7]: https://github.com/yourtempo/tempo-email-parser/compare/v0.7.7...HEAD 85 | [0.7.6]: https://github.com/yourtempo/tempo-email-parser/compare/v0.7.6...HEAD 86 | [0.7.5]: https://github.com/yourtempo/tempo-email-parser/compare/v0.7.5...HEAD 87 | [0.7.4]: https://github.com/yourtempo/tempo-email-parser/releases/tag/v0.7.4 88 | [0.7.3]: https://github.com/yourtempo/tempo-email-parser/releases/tag/v0.7.3 89 | [0.7.2]: https://github.com/yourtempo/tempo-email-parser/releases/tag/v0.7.2 90 | [0.7.0]: https://github.com/yourtempo/tempo-email-parser/releases/tag/v0.7.0 91 | [0.6.1]: https://github.com/yourtempo/tempo-email-parser/releases/tag/v0.6.1 92 | [0.6.0]: https://github.com/yourtempo/tempo-email-parser/releases/tag/v0.6.0 93 | [0.5.0]: https://github.com/yourtempo/tempo-email-parser/releases/tag/v0.5.0 94 | [0.4.0]: https://github.com/yourtempo/tempo-email-parser/releases/tag/v0.4.0 95 | [0.3.0]: https://github.com/yourtempo/tempo-email-parser/releases/tag/v0.3.0 96 | [0.2.0]: https://github.com/yourtempo/tempo-email-parser/releases/tag/v0.2.0 97 | [0.1.0]: https://github.com/yourtempo/tempo-email-parser/releases/tag/v0.1.0 98 | -------------------------------------------------------------------------------- /benchmarks/emails/basic-lorem-gmail-replied-x1.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |

4 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. 5 | An hoc usque quaque, aliter in vita? Terram, mihi crede, ea 6 | lanx et maria deprimet. Duo Reges: constructio interrete. 7 | Id est enim, de quo quaerimus. Parvi enim primo ortu sic 8 | iacent, tamquam omnino sine animo sint. Quis est tam dissimile 9 | homini. Claudii libidini, qui tum erat summo ne imperio, dederetur. 10 | Beatus sibi videtur esse moriens. 13 |

14 | 15 |

16 | Hoc dixerit potius Ennius: Nimium boni est, cui nihil est mali. 17 | Egone quaeris, inquit, quid sentiam? Si mala non sunt, iacet omnis 18 | ratio Peripateticorum. Itaque rursus eadem ratione, qua sum paulo 19 | ante usus, haerebitis. Paulum, cum regem Persem captum adduceret, 20 | eodem flumine invectio? Mihi enim erit isdem istis fortasse iam 21 | utendum. Ab hoc autem quaedam non melius quam veteres, quaedam 22 | omnino relicta. Tamen a proposito, inquam, aberramus. 23 |

24 | 25 |

26 | Sic, et quidem diligentius saepiusque ista loquemur inter nos 27 | agemusque communiter. 28 | Quid sequatur, quid repugnet, vident. 31 | Serpere anguiculos, nare anaticulas, evolare merulas, cornibus uti 32 | videmus boves, nepas aculeis. Ego vero volo in virtute vim esse quam 33 | maximam; Aufert enim sensus actionemque tollit omnem. Qua tu etiam 34 | inprudens utebare non numquam. Apud ceteros autem philosophos, qui 35 | quaesivit aliquid, tacet; Totum autem id externum est, et quod 36 | externum, id in casu est. 37 |

38 |
39 |
40 |
41 |
42 | On Tue, Dec 31, 2019 at 12:08 AM Nicolas Gaborit <hello@soreine.dev> wrote:
46 |
47 |
51 |
52 |
53 |

54 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. 55 | An hoc usque quaque, aliter in vita? Terram, mihi 56 | crede, ea lanx et maria deprimet. Duo Reges: constructio 57 | interrete. Id est enim, de quo quaerimus. Parvi 58 | enim primo ortu sic iacent, tamquam omnino sine animo 59 | sint. Quis est tam dissimile homini. Claudii libidini, 60 | qui tum erat summo ne imperio, dederetur. 61 | Beatus sibi videtur esse moriens. 64 |

65 | 66 |

67 | Hoc dixerit potius Ennius: Nimium boni est, cui nihil 68 | est mali. Egone quaeris, inquit, quid sentiam? Si mala 69 | non sunt, iacet omnis ratio Peripateticorum. Itaque 70 | rursus eadem ratione, qua sum paulo ante usus, 71 | haerebitis. Paulum, cum regem Persem captum adduceret, 72 | eodem flumine invectio? Mihi enim erit isdem istis 73 | fortasse iam utendum. Ab hoc autem quaedam non melius 74 | quam veteres, quaedam omnino relicta. Tamen a proposito, 75 | inquam, aberramus. 76 |

77 | 78 |

79 | Sic, et quidem diligentius saepiusque ista loquemur 80 | inter nos agemusque communiter. 81 | Quid sequatur, quid repugnet, vident. 84 | Serpere anguiculos, nare anaticulas, evolare merulas, 85 | cornibus uti videmus boves, nepas aculeis. Ego vero volo 86 | in virtute vim esse quam maximam; Aufert enim sensus 87 | actionemque tollit omnem. Qua tu etiam inprudens utebare 88 | non numquam. Apud ceteros autem philosophos, qui 89 | quaesivit aliquid, tacet; Totum autem id externum est, 90 | et quod externum, id in casu est. 91 |

92 |
93 |
94 |
95 |
96 |
97 | -------------------------------------------------------------------------------- /src/tests/prepareMessage/email_19.input.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | and again
7 |
8 | Sent from 10 | 13 | Mailspring, the extensible, open source mail client. 22 |
23 |
24 | On Nov 4 2016, at 2:28 pm, Juan Tejada <juan@nylas.com> wrote:
25 |
26 | hey evan sorry to spam you so much
27 |
28 | Sent from 29 | 32 | Mailspring, the extensible, open source mail client. 34 | 40 |
41 |
42 | On Nov 4 2016, at 2:20 pm, Juan Tejada <juan@nylas.com> wrote:
43 |
44 | wat
45 |
46 | Sent from 47 | 50 | Mailspring, the extensible, open source mail client. 57 |
58 |
59 | On Nov 4 2016, at 1:19 pm, Juan Tejada <juan@nylas.com> wrote:
60 |
61 | this should only happen once
62 |
63 | Sent from 64 | 67 | Mailspring, the extensible, open source mail client. 74 |
75 |
76 | On Nov 4 2016, at 1:13 pm, Juan Tejada <juan@nylas.com> wrote:
77 |
78 | and again
79 |
80 | Sent from 81 | 84 | Mailspring, the extensible, open source mail client. 91 |
92 |
93 | On Nov 4 2016, at 1:13 pm, Juan Tejada <juan@nylas.com> wrote:
94 |
97 | and some other stuff
98 |
99 | Sent from Mailspring, the 100 | extensible, open source mail client. 101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 | 112 | 113 | -------------------------------------------------------------------------------- /src/prepareMessage.ts: -------------------------------------------------------------------------------- 1 | import cheerio from 'cheerio'; 2 | import removeQuotations from './removeQuotations'; 3 | import removeTrailingWhitespaces from './removeTrailingWhitespaces'; 4 | import linkify from './linkify'; 5 | import enforceViewport from './enforceViewport'; 6 | import { 7 | blockRemoteContentCheerio, 8 | ReplacementOptions, 9 | } from './blockRemoteContent'; 10 | import { containsEmptyText, getTopLevelElement } from './cheerio-utils'; 11 | import appendStyle from './appendStyle'; 12 | import fixBrokenHtml from './fixBrokenHtml'; 13 | import { enhanceLinks as _enhanceLinks } from './enhanceLinks'; 14 | 15 | export interface PrepareMessageOptions { 16 | /** Remove quotations and signatures. Only affects the result messageHtml */ 17 | noQuotations?: boolean; 18 | /** Automatically convert text links to anchor tags */ 19 | autolink?: boolean; 20 | /** Fix broken links and add the href to the title tag */ 21 | enhanceLinks?: boolean; 22 | /** Specific viewport to enforce. For example "" */ 23 | forceViewport?: string; 24 | /** Replace remote images with a transparent image, and replace other remote URLs with '#' */ 25 | noRemoteContent?: boolean; 26 | /** Replace remote content with custom URLs */ 27 | remoteContentReplacements?: ReplacementOptions; 28 | /** Append the given style to the HTML */ 29 | includeStyle?: string; 30 | } 31 | 32 | /** 33 | * Parse an HTML email and make transformation needed before displaying it to the user. 34 | * Returns the extracted body of the message, and the complete message for reference. 35 | * 36 | * Beside the optional, this always: 37 | * - Remove comments 38 | * - Remove scripts 39 | * - Remove tracking pixels 40 | * - Remove trailing whitespaces 41 | */ 42 | function prepareMessage( 43 | emailHtml: string, 44 | options: PrepareMessageOptions = {} 45 | ): { 46 | /** The complete message. */ 47 | completeHtml: string; 48 | /** The body of the message, stripped from secondary information */ 49 | messageHtml: string; 50 | /** True if a quote or signature was found and stripped */ 51 | didFindQuotation: boolean; 52 | } { 53 | const { 54 | noQuotations = false, 55 | autolink = false, 56 | enhanceLinks = false, 57 | forceViewport = false, 58 | noRemoteContent = false, 59 | includeStyle = false, 60 | remoteContentReplacements = {}, 61 | } = options; 62 | 63 | const result = { 64 | messageHtml: emailHtml, 65 | completeHtml: emailHtml, 66 | didFindQuotation: false, 67 | }; 68 | 69 | result.completeHtml = fixBrokenHtml(result.completeHtml); 70 | result.messageHtml = result.completeHtml; 71 | 72 | if (autolink) { 73 | result.completeHtml = linkify(result.completeHtml); 74 | result.messageHtml = result.completeHtml; 75 | } 76 | 77 | const $ = cheerio.load(result.completeHtml); 78 | 79 | // Comments are useless, better remove them 80 | removeComments($); 81 | removeScripts($); 82 | removeTrackers($); 83 | 84 | if (enhanceLinks) { 85 | _enhanceLinks($); 86 | } 87 | 88 | if (noRemoteContent) { 89 | blockRemoteContentCheerio($, remoteContentReplacements); 90 | } 91 | 92 | if (forceViewport) { 93 | enforceViewport($, forceViewport); 94 | } 95 | 96 | if (includeStyle) { 97 | appendStyle($, includeStyle); 98 | } 99 | 100 | removeTrailingWhitespaces($); 101 | result.completeHtml = $.html(); 102 | result.messageHtml = result.completeHtml; 103 | 104 | // Remove quotations 105 | if (noQuotations) { 106 | const { didFindQuotation } = removeQuotations($); 107 | 108 | // if the actions above have resulted in an empty body, 109 | // then we should not remove quotations 110 | if (containsEmptyText(getTopLevelElement($))) { 111 | // Don't remove anything. 112 | } else { 113 | result.didFindQuotation = didFindQuotation; 114 | 115 | removeTrailingWhitespaces($); 116 | result.messageHtml = $.html(); 117 | } 118 | } 119 | 120 | return result; 121 | } 122 | 123 | function removeTrackers($: CheerioStatic): void { 124 | const TRACKERS_SELECTORS = [ 125 | // TODO: Improve by looking at inline styles as well 126 | 'img[width="0"]', 127 | 'img[width="1"]', 128 | 'img[height="0"]', 129 | 'img[height="1"]', 130 | 'img[src*="http://mailstat.us"]', 131 | ]; 132 | 133 | const query = TRACKERS_SELECTORS.join(', '); 134 | 135 | $(query).each((_, el) => { 136 | $(el).remove(); 137 | }); 138 | } 139 | 140 | function removeScripts($: CheerioStatic): void { 141 | $('script').each((_, el) => { 142 | $(el).remove(); 143 | }); 144 | } 145 | 146 | function removeComments($: CheerioStatic): void { 147 | $('*') 148 | .contents() 149 | .each((_, el) => { 150 | if (el.type === 'comment') { 151 | $(el).remove(); 152 | } 153 | }); 154 | } 155 | 156 | export default prepareMessage; 157 | -------------------------------------------------------------------------------- /src/tests/removeTrailingWhitespaces.test.ts: -------------------------------------------------------------------------------- 1 | import cheerio from 'cheerio'; 2 | import removeTrailingWhitespaces from '../removeTrailingWhitespaces'; 3 | import { expectHtml } from './utils'; 4 | import { getTopLevelElement } from '../cheerio-utils'; 5 | 6 | describe('removeTrailingWhitespaces', () => { 7 | it('should trim an empty body', () => { 8 | check( 9 | ` 10 | 11 | `, 12 | ` 13 | 14 | ` 15 | ); 16 | }); 17 | 18 | it('should trim an empty div', () => { 19 | check( 20 | ` 21 |
22 | `, 23 | `` 24 | ); 25 | }); 26 | 27 | it('should trim text', () => { 28 | check( 29 | ` 30 |
Hello
31 | `, 32 | ` 33 |
Hello
34 | ` 35 | ); 36 | }); 37 | 38 | it('should trim br, and hr', () => { 39 | check( 40 | ` 41 |
Hello


42 | `, 43 | ` 44 |
Hello
45 | ` 46 | ); 47 | }); 48 | 49 | it('should trim inside a body', () => { 50 | check( 51 | ` 52 |
Hello


53 | `, 54 | ` 55 |
Hello
56 | ` 57 | ); 58 | }); 59 | 60 | it('should not change trimmed content', () => { 61 | check( 62 | ` 63 |
Hello
64 | `, 65 | ` 66 |
Hello
67 | ` 68 | ); 69 | }); 70 | 71 | it('should not trim left side of last text', () => { 72 | check( 73 | ` 74 |

--
I use Tempo to improve my focus

75 | 76 | `, 77 | ` 78 |

--
I use Tempo to improve my focus

79 | ` 80 | ); 81 | }); 82 | 83 | it('should not trim pre', () => { 84 | check( 85 | ` 86 |
Hello
Hi, this is code  
87 | `, 88 | ` 89 |
Hello
Hi, this is code  
90 | ` 91 | ); 92 | }); 93 | 94 | it('should stop trimming at img', () => { 95 | check( 96 | ` 97 |
Hello
98 | `, 99 | ` 100 |
Hello
101 | ` 102 | ); 103 | }); 104 | 105 | it('should trim recursively up the HTML tree', () => { 106 | check( 107 | ` 108 |
Hello

109 | `, 110 | ` 111 |
Hello
112 | ` 113 | ); 114 | }); 115 | 116 | it('should trim remnants of signature', () => { 117 | check( 118 | ` 119 |
Hello


--
120 | `, 121 | ` 122 |
Hello
123 | ` 124 | ); 125 | }); 126 | 127 | it('should trim comments', () => { 128 | check( 129 | ` 130 | 131 | 132 | 133 |
134 |
Hello
135 |

136 | 137 |
138 |

139 |
140 | 141 | 142 | `, 143 | ` 144 | 145 | 146 | 147 |
148 |
Hello
149 |
150 | 151 | 152 | ` 153 | ); 154 | }); 155 | 156 | it('should not trim images', () => { 157 | check( 158 | ` 159 | 160 | 161 | 165 | 166 | 169 |
170 |

171 | Hello 172 |

173 |
174 | attached-image.jpg

Cheers!
Jonathan 180 |
181 |
182 | 183 | 184 | `, 185 | ` 186 | 187 | 188 | 192 | 193 | 196 |
197 |

198 | Hello 199 |

200 |
201 | attached-image.jpg

Cheers!
Jonathan 207 |
208 |
209 | 210 | 211 | ` 212 | ); 213 | }); 214 | }); 215 | 216 | function check(before: string, after: string): void { 217 | const $ = cheerio.load(before); 218 | removeTrailingWhitespaces($); 219 | 220 | const result = $.html(); 221 | expectHtml(result, after); 222 | 223 | // Check that it did not create text nodes as parent of themselves 224 | // which messes up the next steps 225 | const top = getTopLevelElement($); 226 | function checkForNoCircularTextReference(parent: CheerioElement) { 227 | if (parent.type === 'text') { 228 | parent.children?.forEach(child => { 229 | expect(child.type).not.toEqual('text'); 230 | }); 231 | } else { 232 | parent.children?.forEach(checkForNoCircularTextReference); 233 | } 234 | } 235 | checkForNoCircularTextReference(top); 236 | } 237 | -------------------------------------------------------------------------------- /src/tests/remoteContent/email-privacy-tester.html: -------------------------------------------------------------------------------- 1 | 5 | 6 | 7 | 8 | 9 | Email Privacy Tester 10 | 15 | 20 | 24 | 29 | 30 | 35 | 38 | 39 | 40 |

41 | Email Privacy Tester

This is 42 | your test email from the Email Privacy Tester. Please ignore everything after this line. Clicking on 43 | anything will skew the results. 44 |

45 |

48 |

51 |

54 | 55 |
56 | 113 |

116 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /benchmarks/emails/basic-lorem-gmail-replied-x2.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |

4 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. 5 | An hoc usque quaque, aliter in vita? Terram, mihi crede, ea 6 | lanx et maria deprimet. Duo Reges: constructio interrete. 7 | Id est enim, de quo quaerimus. Parvi enim primo ortu sic 8 | iacent, tamquam omnino sine animo sint. Quis est tam dissimile 9 | homini. Claudii libidini, qui tum erat summo ne imperio, dederetur. 10 | Beatus sibi videtur esse moriens. 13 |

14 | 15 |

16 | Hoc dixerit potius Ennius: Nimium boni est, cui nihil est mali. 17 | Egone quaeris, inquit, quid sentiam? Si mala non sunt, iacet omnis 18 | ratio Peripateticorum. Itaque rursus eadem ratione, qua sum paulo 19 | ante usus, haerebitis. Paulum, cum regem Persem captum adduceret, 20 | eodem flumine invectio? Mihi enim erit isdem istis fortasse iam 21 | utendum. Ab hoc autem quaedam non melius quam veteres, quaedam 22 | omnino relicta. Tamen a proposito, inquam, aberramus. 23 |

24 | 25 |

26 | Sic, et quidem diligentius saepiusque ista loquemur inter nos 27 | agemusque communiter. 28 | Quid sequatur, quid repugnet, vident. 31 | Serpere anguiculos, nare anaticulas, evolare merulas, cornibus uti 32 | videmus boves, nepas aculeis. Ego vero volo in virtute vim esse quam 33 | maximam; Aufert enim sensus actionemque tollit omnem. Qua tu etiam 34 | inprudens utebare non numquam. Apud ceteros autem philosophos, qui 35 | quaesivit aliquid, tacet; Totum autem id externum est, et quod 36 | externum, id in casu est. 37 |

38 |
39 |
40 |
41 |
42 | On Tue, Dec 31, 2019 at 2:59 PM Nicolas Gaborit <hello@soreine.dev> wrote:
46 |
47 |
51 |
52 |
53 |

54 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. 55 | An hoc usque quaque, aliter in vita? Terram, mihi 56 | crede, ea lanx et maria deprimet. Duo Reges: constructio 57 | interrete. Id est enim, de quo quaerimus. Parvi 58 | enim primo ortu sic iacent, tamquam omnino sine animo 59 | sint. Quis est tam dissimile homini. Claudii libidini, 60 | qui tum erat summo ne imperio, dederetur. 61 | Beatus sibi videtur esse moriens. 64 |

65 | 66 |

67 | Hoc dixerit potius Ennius: Nimium boni est, cui nihil 68 | est mali. Egone quaeris, inquit, quid sentiam? Si mala 69 | non sunt, iacet omnis ratio Peripateticorum. Itaque 70 | rursus eadem ratione, qua sum paulo ante usus, 71 | haerebitis. Paulum, cum regem Persem captum adduceret, 72 | eodem flumine invectio? Mihi enim erit isdem istis 73 | fortasse iam utendum. Ab hoc autem quaedam non melius 74 | quam veteres, quaedam omnino relicta. Tamen a proposito, 75 | inquam, aberramus. 76 |

77 | 78 |

79 | Sic, et quidem diligentius saepiusque ista loquemur 80 | inter nos agemusque communiter. 81 | Quid sequatur, quid repugnet, vident. 84 | Serpere anguiculos, nare anaticulas, evolare merulas, 85 | cornibus uti videmus boves, nepas aculeis. Ego vero volo 86 | in virtute vim esse quam maximam; Aufert enim sensus 87 | actionemque tollit omnem. Qua tu etiam inprudens utebare 88 | non numquam. Apud ceteros autem philosophos, qui 89 | quaesivit aliquid, tacet; Totum autem id externum est, 90 | et quod externum, id in casu est. 91 |

92 |
93 |
94 |
95 |
96 | On Tue, Dec 31, 2019 at 12:08 AM Nicolas Gaborit <hello@soreine.dev> wrote:
101 |
102 |
106 |
107 |
110 |

111 | Lorem ipsum dolor sit amet, consectetur 112 | adipiscing elit. 113 | An hoc usque quaque, aliter in vita? 114 | Terram, mihi crede, ea lanx et maria 115 | deprimet. Duo Reges: constructio interrete. 116 | Id est enim, de quo quaerimus. Parvi 117 | enim primo ortu sic iacent, tamquam omnino 118 | sine animo sint. Quis est tam dissimile 119 | homini. Claudii libidini, qui tum erat summo 120 | ne imperio, dederetur. 121 | Beatus sibi videtur esse moriens. 126 |

127 | 128 |

129 | Hoc dixerit potius Ennius: Nimium boni est, 130 | cui nihil est mali. Egone quaeris, inquit, 131 | quid sentiam? Si mala non sunt, iacet omnis 132 | ratio Peripateticorum. Itaque rursus eadem 133 | ratione, qua sum paulo ante usus, 134 | haerebitis. Paulum, cum regem Persem captum 135 | adduceret, eodem flumine invectio? Mihi enim 136 | erit isdem istis fortasse iam utendum. Ab 137 | hoc autem quaedam non melius quam veteres, 138 | quaedam omnino relicta. Tamen a proposito, 139 | inquam, aberramus. 140 |

141 | 142 |

143 | Sic, et quidem diligentius saepiusque ista 144 | loquemur inter nos agemusque communiter. 145 | Quid sequatur, quid repugnet, 149 | vident. 151 | Serpere anguiculos, nare anaticulas, evolare 152 | merulas, cornibus uti videmus boves, nepas 153 | aculeis. Ego vero volo in virtute vim esse 154 | quam maximam; Aufert enim sensus actionemque 155 | tollit omnem. Qua tu etiam inprudens utebare 156 | non numquam. Apud ceteros autem philosophos, 157 | qui quaesivit aliquid, tacet; Totum autem id 158 | externum est, et quod externum, id in casu 159 | est. 160 |

161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 | -------------------------------------------------------------------------------- /src/tests/remoteContent/blockRemoteContent.test.ts: -------------------------------------------------------------------------------- 1 | import { expectHtml, readFile } from '../utils'; 2 | import { blockRemoteContent } from '../../blockRemoteContent'; 3 | 4 | const EMAIL_PRIVACY_TESTER = readFile(__dirname, 'email-privacy-tester.html'); 5 | 6 | describe('remote-content', () => { 7 | it('should replace remote content URLs in all style declarations', () => { 8 | const input = ` 9 | 10 | 11 | 18 | 19 | 20 |

23 | Hello 24 |

25 | 38 | 39 | 40 | `; 41 | 42 | const expected = ` 43 | 44 | 45 | 52 | 53 | 54 |

57 | Hello 58 |

59 | 72 | 73 | 74 | `; 75 | 76 | const actual = blockRemoteContent(input, { image: 'REPLACED_IMAGE' }); 77 | 78 | expectHtml(actual, expected); 79 | }); 80 | 81 | it('should not replace embedded image (data:)', () => { 82 | const input = ` 83 | 84 | 85 | 86 |
87 | attached-image.jpg

Cheers!
Jonathan 93 |
94 | 95 | 96 | `; 97 | 98 | const expected = input; 99 | 100 | const actual = blockRemoteContent(input, { image: 'REPLACED_IMAGE' }); 101 | 102 | expectHtml(actual, expected); 103 | }); 104 | it('should not replace image attachment (cid:) URLs', () => { 105 | const input = ` 106 | 107 | 108 | 109 |
110 | attached-image.jpg

Cheers!
Jonathan 116 |
117 | 118 | 119 | `; 120 | 121 | const expected = input; 122 | 123 | const actual = blockRemoteContent(input, { image: 'REPLACED_IMAGE' }); 124 | 125 | expectHtml(actual, expected); 126 | }); 127 | 128 | it('should replace remote content URLs in the email privacy tester', () => { 129 | const input = EMAIL_PRIVACY_TESTER; 130 | 131 | const actual = blockRemoteContent(input, { 132 | image: 'REPLACED_IMAGE', 133 | other: 'REPLACED_URL', 134 | }); 135 | 136 | const expected = ` 137 | 138 | 139 | 140 | 141 | 142 | Email Privacy Tester 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 |

152 | Email Privacy Tester

This is 153 | your test email from the Email Privacy Tester. Please ignore everything after this line. Clicking on 154 | anything will skew the results. 155 |

156 |

157 |

158 |

159 | 160 |
161 | 165 |

166 | 167 | 168 | 169 | `; 170 | 171 | expectHtml(actual, expected); 172 | 173 | const containsApiCalls = /;test=(?!cssExpression)/g.test(actual); 174 | expect(containsApiCalls).toBe(false); 175 | }); 176 | }); 177 | -------------------------------------------------------------------------------- /benchmarks/emails/basic-lorem-gmail-replied-x1.eml: -------------------------------------------------------------------------------- 1 | MIME-Version: 1.0 2 | Date: Tue, 31 Dec 2019 14:59:34 +0100 3 | References: 4 | In-Reply-To: 5 | Message-ID: 6 | Subject: Re: [TEST] Sample Gmail email for tempo-email-parser 7 | From: Nicolas Gaborit 8 | To: Nicolas Gaborit 9 | Content-Type: multipart/alternative; boundary="000000000000ba9101059b005e71" 10 | 11 | --000000000000ba9101059b005e71 12 | Content-Type: text/plain; charset="UTF-8" 13 | 14 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. *An hoc usque 15 | quaque, aliter in vita?* Terram, mihi crede, ea lanx et maria deprimet. Duo 16 | Reges: constructio interrete. *Id est enim, de quo quaerimus.* Parvi enim 17 | primo ortu sic iacent, tamquam omnino sine animo sint. Quis est tam 18 | dissimile homini. Claudii libidini, qui tum erat summo ne imperio, 19 | dederetur. Beatus sibi videtur esse moriens. 20 | 21 | Hoc dixerit potius Ennius: Nimium boni est, cui nihil est mali. Egone 22 | quaeris, inquit, quid sentiam? Si mala non sunt, iacet omnis ratio 23 | Peripateticorum. Itaque rursus eadem ratione, qua sum paulo ante usus, 24 | haerebitis. Paulum, cum regem Persem captum adduceret, eodem flumine 25 | invectio? Mihi enim erit isdem istis fortasse iam utendum. Ab hoc autem 26 | quaedam non melius quam veteres, quaedam omnino relicta. Tamen a proposito, 27 | inquam, aberramus. 28 | 29 | Sic, et quidem diligentius saepiusque ista loquemur inter nos agemusque 30 | communiter. Quid sequatur, quid repugnet, vident. 31 | Serpere anguiculos, nare anaticulas, evolare merulas, cornibus uti videmus 32 | boves, nepas aculeis. Ego vero volo in virtute vim esse quam maximam; 33 | Aufert enim sensus actionemque tollit omnem. Qua tu etiam inprudens utebare 34 | non numquam. Apud ceteros autem philosophos, qui quaesivit aliquid, tacet; 35 | Totum autem id externum est, et quod externum, id in casu est. 36 | 37 | On Tue, Dec 31, 2019 at 12:08 AM Nicolas Gaborit wrote: 38 | 39 | > Lorem ipsum dolor sit amet, consectetur adipiscing elit. *An hoc usque 40 | > quaque, aliter in vita?* Terram, mihi crede, ea lanx et maria deprimet. 41 | > Duo Reges: constructio interrete. *Id est enim, de quo quaerimus.* Parvi 42 | > enim primo ortu sic iacent, tamquam omnino sine animo sint. Quis est tam 43 | > dissimile homini. Claudii libidini, qui tum erat summo ne imperio, 44 | > dederetur. Beatus sibi videtur esse moriens. 45 | > 46 | > Hoc dixerit potius Ennius: Nimium boni est, cui nihil est mali. Egone 47 | > quaeris, inquit, quid sentiam? Si mala non sunt, iacet omnis ratio 48 | > Peripateticorum. Itaque rursus eadem ratione, qua sum paulo ante usus, 49 | > haerebitis. Paulum, cum regem Persem captum adduceret, eodem flumine 50 | > invectio? Mihi enim erit isdem istis fortasse iam utendum. Ab hoc autem 51 | > quaedam non melius quam veteres, quaedam omnino relicta. Tamen a proposito, 52 | > inquam, aberramus. 53 | > 54 | > Sic, et quidem diligentius saepiusque ista loquemur inter nos agemusque 55 | > communiter. Quid sequatur, quid repugnet, vident. 56 | > Serpere anguiculos, nare anaticulas, evolare merulas, cornibus uti videmus 57 | > boves, nepas aculeis. Ego vero volo in virtute vim esse quam maximam; 58 | > Aufert enim sensus actionemque tollit omnem. Qua tu etiam inprudens utebare 59 | > non numquam. Apud ceteros autem philosophos, qui quaesivit aliquid, tacet; 60 | > Totum autem id externum est, et quod externum, id in casu est. 61 | > 62 | 63 | --000000000000ba9101059b005e71 64 | Content-Type: text/html; charset="UTF-8" 65 | Content-Transfer-Encoding: quoted-printable 66 | 67 |

Lorem ipsum dolor sit amet, consectetu= 68 | r adipiscing elit. An hoc usque quaque, aliter in vita? Terram, mihi= 69 | crede, ea lanx et maria deprimet. Duo Reges: constructio interrete. Id = 70 | est enim, de quo quaerimus. 71 | Parvi enim primo ortu sic iacent, tamquam omnino sine animo sint. Quis=20 72 | est tam dissimile homini. Claudii libidini, qui tum erat summo ne=20 73 | imperio, dederetur. Beat= 74 | us sibi videtur esse moriens.

75 | 76 |

Hoc dixerit potius Ennius: Nimium boni est, cui nihil est mali. Egone qu= 77 | aeris, inquit, quid sentiam? 78 | Si mala non sunt, iacet omnis ratio Peripateticorum. Itaque rursus=20 79 | eadem ratione, qua sum paulo ante usus, haerebitis. Paulum, cum regem=20 80 | Persem captum adduceret, eodem flumine invectio? Mihi enim erit isdem=20 81 | istis fortasse iam utendum. Ab hoc autem quaedam non melius quam=20 82 | veteres, quaedam omnino relicta. Tamen a proposito, inquam, aberramus.

83 | 84 |

Sic, et quidem diligentius saepiusque ista loquemur inter nos agemusque = 85 | communiter. Quid sequatu= 86 | r, quid repugnet, vident. 87 | Serpere anguiculos, nare anaticulas, evolare merulas, cornibus uti=20 88 | videmus boves, nepas aculeis. Ego vero volo in virtute vim esse quam=20 89 | maximam; Aufert enim sensus actionemque tollit omnem. Qua tu etiam=20 90 | inprudens utebare non numquam. Apud ceteros autem philosophos, qui=20 91 | quaesivit aliquid, tacet; Totum autem id externum est, et quod externum, 92 | id in casu est.


On Tue, Dec 31, 2019 at 12:08 AM Nicolas Gaborit <<= 94 | a href=3D"mailto:hello@soreine.dev">hello@soreine.dev> wrote:

Lorem ipsum dolor = 98 | sit amet, consectetur adipiscing elit. An hoc usque quaque, aliter in vi= 99 | ta? Terram, mihi crede, ea lanx et maria deprimet. Duo Reges: construct= 100 | io interrete. Id est enim, de quo quaerimus. 101 | Parvi enim primo ortu sic iacent, tamquam omnino sine animo sint. Quis=20 102 | est tam dissimile homini. Claudii libidini, qui tum erat summo ne=20 103 | imperio, dederetur. Beat= 104 | us sibi videtur esse moriens.

105 | 106 |

Hoc dixerit potius Ennius: Nimium boni est, cui nihil est mali. Egone qu= 107 | aeris, inquit, quid sentiam? 108 | Si mala non sunt, iacet omnis ratio Peripateticorum. Itaque rursus=20 109 | eadem ratione, qua sum paulo ante usus, haerebitis. Paulum, cum regem=20 110 | Persem captum adduceret, eodem flumine invectio? Mihi enim erit isdem=20 111 | istis fortasse iam utendum. Ab hoc autem quaedam non melius quam=20 112 | veteres, quaedam omnino relicta. Tamen a proposito, inquam, aberramus.

113 | 114 |

Sic, et quidem diligentius saepiusque ista loquemur inter nos agemusque = 115 | communiter. Quid sequatu= 116 | r, quid repugnet, vident. 117 | Serpere anguiculos, nare anaticulas, evolare merulas, cornibus uti=20 118 | videmus boves, nepas aculeis. Ego vero volo in virtute vim esse quam=20 119 | maximam; Aufert enim sensus actionemque tollit omnem. Qua tu etiam=20 120 | inprudens utebare non numquam. Apud ceteros autem philosophos, qui=20 121 | quaesivit aliquid, tacet; Totum autem id externum est, et quod externum, 122 | id in casu est.

123 | 124 |
125 |
126 | 127 | --000000000000ba9101059b005e71-- -------------------------------------------------------------------------------- /src/tests/prepareMessage/trimmed-reply-bug.input.html: -------------------------------------------------------------------------------- 1 |

Hi Mette,

2 |

I have attached our contracts for the USD 500.000 investment regarding BoostVC. Please let me know if thats what is needed.

3 |

Kind Regards

4 |

Henrique

5 |

--
I use Tempo to improve my focus

6 |

7 | On Tue, Apr 13 2021 at 10:37 AM (GMT+2), mette.randers@dk.gt.com wrote: 8 |

9 |
10 | 11 | 12 | 13 | 52 | 53 |
54 |
55 |

Hi Henrique.

56 |

 

57 |

I hope you’re doing well.

58 |

 

59 |

Reviewing the bookkeeping for the first quarter in 2021, could you please send me the agreement regarding enclosed, USD 500.000 received on 20. Jan?

60 |

 

61 |

Thanks in advance.

62 |

 

63 |
64 |

Med venlig hilsen
65 |
66 |
Mette Randers
67 |
Regnskabskonsulent

68 |

D 69 | +4535275268
70 |
T +4533110220  71 |
72 |
M +4526881988 73 |
74 |
Mette.Randers@dk.gt.com
75 |
76 |
www.grantthornton.dk |  Chat 77 | with me on Teams!
78 | __________________________________________
79 |
80 |
Grant Thornton
81 | Stockholmsgade 45
82 | 2100 København Ø

83 |

84 |
Grant Thornton International Ltd (GTIL) is a company limited by guarantee incorporated in England and Wales with registered number 05523714 (registered 85 | office: Grant Thornton House, 20 Fenchurch Street, Level 25 London EC3M 3BY, UK). GTIL and the member firms are not a worldwide partnership. Services are delivered by the member firms. GTIL and its member firms are not agents of, and do not obligate, one another 86 | and are not liable for one another’s acts or omissions. Please see www.GrantThornton.global for further details.
87 |
88 | The name “Grant Thornton”, the Grant Thornton logo, including the Mobius symbol/device, and “Instinct for Growth” are trademarks of GTIL. All copyright is owned by GTIL, including the copyright in the Grant Thornton logo; all rights are reserved.
89 |
90 | This email (and any attachments) is confidential and may also be legally privileged. Anything in this email (and any attachments) which does not relate to official business of GTIL is neither given nor endorsed by GTIL. If you have received this email in error, 91 | please notify the sender immediately, delete it from your system and destroy any copies of it (and any attachments).
92 |

93 |

 

94 |
95 |
96 |
97 |
98 | 99 |
-------------------------------------------------------------------------------- /src/tests/removeQuotations.test.ts: -------------------------------------------------------------------------------- 1 | import expect from 'expect'; 2 | import cheerio from 'cheerio'; 3 | import { expectHtml } from './utils'; 4 | import removeQuotations from '../removeQuotations'; 5 | 6 | describe('removeQuotations', () => { 7 | it('should remove quotation from basic email', () => { 8 | const email = ` 9 |
10 |
11 |

12 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. 13 | An hoc usque quaque, aliter in vita? Terram, mihi 14 | crede, ea lanx et maria deprimet. Duo Reges: constructio 15 | interrete. Id est enim, de quo quaerimus. Parvi 16 | enim primo ortu sic iacent, tamquam omnino sine animo 17 | sint. Quis est tam dissimile homini. Claudii libidini, 18 | qui tum erat summo ne imperio, dederetur. 19 | Beatus sibi videtur esse moriens. 22 |

23 |
24 |
25 |
26 |
27 | On Tue, Dec 31, 2019 at 12:08 AM Nicolas Gaborit <hello@soreine.dev> wrote:
31 |
32 |
36 |
37 |
40 |

41 | Replied message 42 |

43 |
44 |
45 |
46 |
47 |
48 | `; 49 | 50 | const $ = cheerio.load(email); 51 | const result = removeQuotations($); 52 | const actual = $.html(); 53 | 54 | expectHtml( 55 | actual, 56 | ` 57 | 58 | 59 | 60 |
61 |
62 |

63 | Lorem ipsum dolor sit amet, consectetur 64 | adipiscing elit. 65 | An hoc usque quaque, aliter in vita? Terram, mihi crede, ea lanx et maria deprimet. Duo Reges: 66 | constructio interrete. Id est enim, de quo quaerimus. Parvi enim primo ortu sic iacent, tamquam omnino 67 | sine animo sint. Quis est tam dissimile homini. Claudii libidini, qui tum erat summo ne imperio, dederetur. 68 | Beatus sibi videtur esse moriens. 73 |

74 |
75 |
76 |
77 | 78 | 79 | ` 80 | ); 81 | 82 | expect(result).toMatchObject({ 83 | didFindQuotation: true, 84 | }); 85 | }); 86 | 87 | it('should remove signature from basic email', () => { 88 | const email = ` 89 |
90 |
91 |

92 | Hello 93 |

94 |
95 |
96 |
--
97 |
98 |
99 |
Nicolas Gaborit (Soreine)
100 |
Web Developper
101 |
102 |
103 |
104 | 105 | 106 | `; 107 | 108 | const $ = cheerio.load(email); 109 | const result = removeQuotations($); 110 | const actual = $.html(); 111 | 112 | expectHtml( 113 | actual, 114 | ` 115 | 116 | 117 | 118 |
119 |
120 |

121 | Hello 122 |

123 |
124 |
125 |
--
126 |
127 | 128 | 129 | ` 130 | ); 131 | 132 | expect(result).toMatchObject({ 133 | didFindQuotation: true, 134 | }); 135 | }); 136 | 137 | it('should remove both signature and quotations from basic email', () => { 138 | const email = ` 139 | 140 | 141 | 142 |
143 |
144 |

145 | Hello 146 |

147 |
148 |
149 |
150 |
151 | On Tue, Dec 31, 2019 at 12:08 AM Nicolas Gaborit <hello@soreine.dev> wrote:
155 |
156 |
160 |
161 |
164 |

165 | This is the replied message 166 |

167 |
168 |
169 |
170 |
171 |
172 |
--
173 |
174 |
175 |
Nicolas Gaborit (Soreine)
176 |
Web Developper
177 |
178 |
179 |
180 | 181 | 182 | `; 183 | 184 | const $ = cheerio.load(email); 185 | const result = removeQuotations($); 186 | const actual = $.html(); 187 | 188 | expectHtml( 189 | actual, 190 | ` 191 | 192 | 193 | 194 |
195 |
196 |

197 | Hello 198 |

199 |
200 |
201 | 202 |
203 |
--
204 |
205 | 206 | 207 | ` 208 | ); 209 | 210 | expect(result).toMatchObject({ 211 | didFindQuotation: true, 212 | }); 213 | }); 214 | 215 | it('should not wrap body in body', () => { 216 | const email = ` 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | Hello 225 | 226 | 227 | `; 228 | 229 | const $ = cheerio.load(email); 230 | const result = removeQuotations($); 231 | const actual = $.html(); 232 | 233 | expectHtml( 234 | actual, 235 | ` 236 | 237 | 238 | 239 | 240 | 241 | 242 | Hello 243 | 244 | 245 | ` 246 | ); 247 | 248 | expect(result).toMatchObject({ 249 | didFindQuotation: false, 250 | }); 251 | }); 252 | 253 | it('should preserve inline quotes', () => { 254 | const email = ` 255 |
256 |

257 | Hello. 258 |

259 |
Here is an inline quote
260 |

Hope you liked it

261 |
262 |
263 |
264 | On Tue, Dec 31, 2019 at 12:08 AM Nicolas Gaborit <hello@soreine.dev> wrote:
268 |
269 |
273 |
274 |
277 |

278 | This is the replied message 279 |

280 |
281 |
282 |
283 |
284 |
285 |
--
286 |
287 |
288 |
Nicolas Gaborit (Soreine)
289 |
Web Developper
290 |
291 |
292 |
293 | `; 294 | 295 | const $ = cheerio.load(email); 296 | const result = removeQuotations($); 297 | const actual = $.html(); 298 | 299 | expectHtml( 300 | actual, 301 | ` 302 | 303 | 304 | 305 |
306 |

307 | Hello. 308 |

309 |
Here is an inline quote
310 |

Hope you liked it

311 |
312 | 313 |
314 |
--
315 |
316 | 317 | 318 | ` 319 | ); 320 | 321 | expect(result).toMatchObject({ 322 | didFindQuotation: true, 323 | }); 324 | }); 325 | 326 | it('should remove "On... wrote:" in different languages', () => { 327 | const email = ` 328 | 329 | 330 |

Hello

331 |
332 | Le lun. 26 janvier 2019 à 17:02, <registration-calm@mahi.dhamma.org> a écrit :
336 |
337 |
338 |
339 |

340 | This is the replied message 341 |

342 |
343 |
344 | 345 | 346 | `; 347 | 348 | const $ = cheerio.load(email); 349 | const result = removeQuotations($); 350 | const actual = $.html(); 351 | 352 | expectHtml( 353 | actual, 354 | ` 355 | 356 | 357 | 358 |

Hello

359 |

360 | 361 | 362 | ` 363 | ); 364 | 365 | expect(result).toMatchObject({ 366 | didFindQuotation: true, 367 | }); 368 | }); 369 | 370 | it('should remove "On... wrote:" when it is the end of the email', () => { 371 | const email = ` 372 | 373 | 374 | 375 | 380 | 381 | 382 |
383 |
Hello
384 |
this is a quote
385 |
386 | On December 3, 2019 at 05:01, Onno Schwanen wrote: 387 |
388 |
389 | 390 | 391 | `; 392 | 393 | const $ = cheerio.load(email); 394 | const result = removeQuotations($); 395 | const actual = $.html(); 396 | 397 | expectHtml( 398 | actual, 399 | ` 400 | 401 | 402 | 403 | 408 | 409 | 410 |
411 |
Hello
412 |
this is a quote
413 |
414 |
415 | 416 | 417 | ` 418 | ); 419 | 420 | expect(result).toMatchObject({ 421 | didFindQuotation: true, 422 | }); 423 | }); 424 | }); 425 | -------------------------------------------------------------------------------- /benchmarks/emails/basic-lorem-gmail-replied-x2.eml: -------------------------------------------------------------------------------- 1 | MIME-Version: 1.0 2 | Date: Tue, 31 Dec 2019 14:59:48 +0100 3 | References: 4 | 5 | In-Reply-To: 6 | Message-ID: 7 | Subject: Re: [TEST] Sample Gmail email for tempo-email-parser 8 | From: Nicolas Gaborit 9 | To: Nicolas Gaborit 10 | Content-Type: multipart/alternative; boundary="00000000000092d55a059b005ff2" 11 | 12 | --00000000000092d55a059b005ff2 13 | Content-Type: text/plain; charset="UTF-8" 14 | 15 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. *An hoc usque 16 | quaque, aliter in vita?* Terram, mihi crede, ea lanx et maria deprimet. Duo 17 | Reges: constructio interrete. *Id est enim, de quo quaerimus.* Parvi enim 18 | primo ortu sic iacent, tamquam omnino sine animo sint. Quis est tam 19 | dissimile homini. Claudii libidini, qui tum erat summo ne imperio, 20 | dederetur. Beatus sibi videtur esse moriens. 21 | 22 | Hoc dixerit potius Ennius: Nimium boni est, cui nihil est mali. Egone 23 | quaeris, inquit, quid sentiam? Si mala non sunt, iacet omnis ratio 24 | Peripateticorum. Itaque rursus eadem ratione, qua sum paulo ante usus, 25 | haerebitis. Paulum, cum regem Persem captum adduceret, eodem flumine 26 | invectio? Mihi enim erit isdem istis fortasse iam utendum. Ab hoc autem 27 | quaedam non melius quam veteres, quaedam omnino relicta. Tamen a proposito, 28 | inquam, aberramus. 29 | 30 | Sic, et quidem diligentius saepiusque ista loquemur inter nos agemusque 31 | communiter. Quid sequatur, quid repugnet, vident. 32 | Serpere anguiculos, nare anaticulas, evolare merulas, cornibus uti videmus 33 | boves, nepas aculeis. Ego vero volo in virtute vim esse quam maximam; 34 | Aufert enim sensus actionemque tollit omnem. Qua tu etiam inprudens utebare 35 | non numquam. Apud ceteros autem philosophos, qui quaesivit aliquid, tacet; 36 | Totum autem id externum est, et quod externum, id in casu est. 37 | 38 | On Tue, Dec 31, 2019 at 2:59 PM Nicolas Gaborit wrote: 39 | 40 | > Lorem ipsum dolor sit amet, consectetur adipiscing elit. *An hoc usque 41 | > quaque, aliter in vita?* Terram, mihi crede, ea lanx et maria deprimet. 42 | > Duo Reges: constructio interrete. *Id est enim, de quo quaerimus.* Parvi 43 | > enim primo ortu sic iacent, tamquam omnino sine animo sint. Quis est tam 44 | > dissimile homini. Claudii libidini, qui tum erat summo ne imperio, 45 | > dederetur. Beatus sibi videtur esse moriens. 46 | > 47 | > Hoc dixerit potius Ennius: Nimium boni est, cui nihil est mali. Egone 48 | > quaeris, inquit, quid sentiam? Si mala non sunt, iacet omnis ratio 49 | > Peripateticorum. Itaque rursus eadem ratione, qua sum paulo ante usus, 50 | > haerebitis. Paulum, cum regem Persem captum adduceret, eodem flumine 51 | > invectio? Mihi enim erit isdem istis fortasse iam utendum. Ab hoc autem 52 | > quaedam non melius quam veteres, quaedam omnino relicta. Tamen a proposito, 53 | > inquam, aberramus. 54 | > 55 | > Sic, et quidem diligentius saepiusque ista loquemur inter nos agemusque 56 | > communiter. Quid sequatur, quid repugnet, vident. 57 | > Serpere anguiculos, nare anaticulas, evolare merulas, cornibus uti videmus 58 | > boves, nepas aculeis. Ego vero volo in virtute vim esse quam maximam; 59 | > Aufert enim sensus actionemque tollit omnem. Qua tu etiam inprudens utebare 60 | > non numquam. Apud ceteros autem philosophos, qui quaesivit aliquid, tacet; 61 | > Totum autem id externum est, et quod externum, id in casu est. 62 | > 63 | > On Tue, Dec 31, 2019 at 12:08 AM Nicolas Gaborit 64 | > wrote: 65 | > 66 | >> Lorem ipsum dolor sit amet, consectetur adipiscing elit. *An hoc usque 67 | >> quaque, aliter in vita?* Terram, mihi crede, ea lanx et maria deprimet. 68 | >> Duo Reges: constructio interrete. *Id est enim, de quo quaerimus.* Parvi 69 | >> enim primo ortu sic iacent, tamquam omnino sine animo sint. Quis est tam 70 | >> dissimile homini. Claudii libidini, qui tum erat summo ne imperio, 71 | >> dederetur. Beatus sibi videtur esse moriens. 72 | >> 73 | >> Hoc dixerit potius Ennius: Nimium boni est, cui nihil est mali. Egone 74 | >> quaeris, inquit, quid sentiam? Si mala non sunt, iacet omnis ratio 75 | >> Peripateticorum. Itaque rursus eadem ratione, qua sum paulo ante usus, 76 | >> haerebitis. Paulum, cum regem Persem captum adduceret, eodem flumine 77 | >> invectio? Mihi enim erit isdem istis fortasse iam utendum. Ab hoc autem 78 | >> quaedam non melius quam veteres, quaedam omnino relicta. Tamen a proposito, 79 | >> inquam, aberramus. 80 | >> 81 | >> Sic, et quidem diligentius saepiusque ista loquemur inter nos agemusque 82 | >> communiter. Quid sequatur, quid repugnet, vident. 83 | >> Serpere anguiculos, nare anaticulas, evolare merulas, cornibus uti videmus 84 | >> boves, nepas aculeis. Ego vero volo in virtute vim esse quam maximam; 85 | >> Aufert enim sensus actionemque tollit omnem. Qua tu etiam inprudens utebare 86 | >> non numquam. Apud ceteros autem philosophos, qui quaesivit aliquid, tacet; 87 | >> Totum autem id externum est, et quod externum, id in casu est. 88 | >> 89 | > 90 | 91 | --00000000000092d55a059b005ff2 92 | Content-Type: text/html; charset="UTF-8" 93 | Content-Transfer-Encoding: quoted-printable 94 | 95 |

Lorem ipsum dolor sit amet, consectetu= 96 | r adipiscing elit. An hoc usque quaque, aliter in vita? Terram, mihi= 97 | crede, ea lanx et maria deprimet. Duo Reges: constructio interrete. Id = 98 | est enim, de quo quaerimus. 99 | Parvi enim primo ortu sic iacent, tamquam omnino sine animo sint. Quis=20 100 | est tam dissimile homini. Claudii libidini, qui tum erat summo ne=20 101 | imperio, dederetur. Beat= 102 | us sibi videtur esse moriens.

103 | 104 |

Hoc dixerit potius Ennius: Nimium boni est, cui nihil est mali. Egone qu= 105 | aeris, inquit, quid sentiam? 106 | Si mala non sunt, iacet omnis ratio Peripateticorum. Itaque rursus=20 107 | eadem ratione, qua sum paulo ante usus, haerebitis. Paulum, cum regem=20 108 | Persem captum adduceret, eodem flumine invectio? Mihi enim erit isdem=20 109 | istis fortasse iam utendum. Ab hoc autem quaedam non melius quam=20 110 | veteres, quaedam omnino relicta. Tamen a proposito, inquam, aberramus.

111 | 112 |

Sic, et quidem diligentius saepiusque ista loquemur inter nos agemusque = 113 | communiter. Quid sequatu= 114 | r, quid repugnet, vident. 115 | Serpere anguiculos, nare anaticulas, evolare merulas, cornibus uti=20 116 | videmus boves, nepas aculeis. Ego vero volo in virtute vim esse quam=20 117 | maximam; Aufert enim sensus actionemque tollit omnem. Qua tu etiam=20 118 | inprudens utebare non numquam. Apud ceteros autem philosophos, qui=20 119 | quaesivit aliquid, tacet; Totum autem id externum est, et quod externum, 120 | id in casu est.


On Tue, Dec 31, 2019 at 2:59 PM Nicolas Gaborit <hello@soreine.dev> wrote:

Lorem ipsum dolor sit amet, consectetur adipiscing elit. A= 126 | n hoc usque quaque, aliter in vita? Terram, mihi crede, ea lanx et mari= 127 | a deprimet. Duo Reges: constructio interrete. Id est enim, de quo quaeri= 128 | mus. 129 | Parvi enim primo ortu sic iacent, tamquam omnino sine animo sint. Quis=20 130 | est tam dissimile homini. Claudii libidini, qui tum erat summo ne=20 131 | imperio, dederetur. Beat= 132 | us sibi videtur esse moriens.

133 | 134 |

Hoc dixerit potius Ennius: Nimium boni est, cui nihil est mali. Egone qu= 135 | aeris, inquit, quid sentiam? 136 | Si mala non sunt, iacet omnis ratio Peripateticorum. Itaque rursus=20 137 | eadem ratione, qua sum paulo ante usus, haerebitis. Paulum, cum regem=20 138 | Persem captum adduceret, eodem flumine invectio? Mihi enim erit isdem=20 139 | istis fortasse iam utendum. Ab hoc autem quaedam non melius quam=20 140 | veteres, quaedam omnino relicta. Tamen a proposito, inquam, aberramus.

141 | 142 |

Sic, et quidem diligentius saepiusque ista loquemur inter nos agemusque = 143 | communiter. Quid sequatu= 144 | r, quid repugnet, vident. 145 | Serpere anguiculos, nare anaticulas, evolare merulas, cornibus uti=20 146 | videmus boves, nepas aculeis. Ego vero volo in virtute vim esse quam=20 147 | maximam; Aufert enim sensus actionemque tollit omnem. Qua tu etiam=20 148 | inprudens utebare non numquam. Apud ceteros autem philosophos, qui=20 149 | quaesivit aliquid, tacet; Totum autem id externum est, et quod externum, 150 | id in casu est.


On Tue, Dec 31, 2019 at 12:08 AM Nicolas Gaborit <<= 152 | a href=3D"mailto:hello@soreine.dev" target=3D"_blank">hello@soreine.dev= 153 | > wrote:

Lorem ipsum dolor sit amet, consectetur adipisci= 157 | ng elit. An hoc usque quaque, aliter in vita? Terram, mihi crede, ea= 158 | lanx et maria deprimet. Duo Reges: constructio interrete. Id est enim, = 159 | de quo quaerimus. 160 | Parvi enim primo ortu sic iacent, tamquam omnino sine animo sint. Quis=20 161 | est tam dissimile homini. Claudii libidini, qui tum erat summo ne=20 162 | imperio, dederetur. Beat= 163 | us sibi videtur esse moriens.

164 | 165 |

Hoc dixerit potius Ennius: Nimium boni est, cui nihil est mali. Egone qu= 166 | aeris, inquit, quid sentiam? 167 | Si mala non sunt, iacet omnis ratio Peripateticorum. Itaque rursus=20 168 | eadem ratione, qua sum paulo ante usus, haerebitis. Paulum, cum regem=20 169 | Persem captum adduceret, eodem flumine invectio? Mihi enim erit isdem=20 170 | istis fortasse iam utendum. Ab hoc autem quaedam non melius quam=20 171 | veteres, quaedam omnino relicta. Tamen a proposito, inquam, aberramus.

172 | 173 |

Sic, et quidem diligentius saepiusque ista loquemur inter nos agemusque = 174 | communiter. Quid sequatu= 175 | r, quid repugnet, vident. 176 | Serpere anguiculos, nare anaticulas, evolare merulas, cornibus uti=20 177 | videmus boves, nepas aculeis. Ego vero volo in virtute vim esse quam=20 178 | maximam; Aufert enim sensus actionemque tollit omnem. Qua tu etiam=20 179 | inprudens utebare non numquam. Apud ceteros autem philosophos, qui=20 180 | quaesivit aliquid, tacet; Totum autem id externum est, et quod externum, 181 | id in casu est.

182 | 183 |
184 |
185 |
186 | 187 | --00000000000092d55a059b005ff2-- -------------------------------------------------------------------------------- /src/tests/remoteContent/email-privacy-tester.eml: -------------------------------------------------------------------------------- 1 | Delivered-To: hello@soreine.dev 2 | Received: by 2002:a67:cb08:0:0:0:0:0 with SMTP id b8csp2275501vsl; 3 | Thu, 9 Jan 2020 09:16:52 -0800 (PST) 4 | X-Google-Smtp-Source: APXvYqzsjCB/SRGubYJkV//QQcPDzLhHmNOAiNosAgOivFxAgc/GWEH0OCgqre6DqtCbBwaBfAmK 5 | X-Received: by 2002:a1c:f20c:: with SMTP id s12mr6261492wmc.173.1578590212449; 6 | Thu, 09 Jan 2020 09:16:52 -0800 (PST) 7 | ARC-Seal: i=1; a=rsa-sha256; t=1578590212; cv=none; 8 | d=google.com; s=arc-20160816; 9 | b=qlotwcuPCdLvW5FV2DbEVvLY4An/l/mZymVf+ki39NgTdGJ+nK+t3v64ToGzgmtxST 10 | q/HAHz98mfkPUR9KWW572PbYzxgIaebb2gv8Z+QZ6Nka1+2KRIOnnbwWUP7S64KkL2UE 11 | QabXotbmTkBtyvzIZZQ/WhCNEOVYomcUL8CXG4jpRcJULH3LFU8btsKXI3lVMP4d8k/E 12 | cOVP38WNMOCrJuD7Q8Dn3thvtSVpLfdUcLNhB4zA7MnSZzjFQ7NAk8JbmzQnO7vDjKPE 13 | JBQVCQq1jRxy/GGXoT3vUAgcKYtUbCGtX5CEkLrCbbfJ1a98VqlQHiXkOU/VVyWbdRag 14 | +urw== 15 | ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; 16 | h=mime-version:date:list-unsubscribe:message-id:subject:to:from 17 | :dkim-signature; 18 | bh=rkF3oQo+O7T5MlShxMzGVXq8shPc1pQc+SDBamqsENE=; 19 | b=xnKY7BFD2+IQtK38g/jpNHebbDiiCmZkBzkUAQDx6l+F+SXx0Efqux9F8IPVY+NPz+ 20 | s4ezdM0WMzk2U54OyD7gGr+/nfD067tLlL8Bn8rBmG1QmXFTZk7c4gvNGkpXh9/GIeLt 21 | F9S+oiMoj6MiotXgvjYQ5YwhKRcRBV4YddVI6Z7llyA25BOvQtckyD9VrdSiTF6shtgH 22 | pPx7AMwcVBPjqe8baPzhcCE1Qh05zKCq0dyUvlD+sCAJy4yuDRgc/xY5HH45kWsujVjA 23 | UEuQLw6GHdzgSk+VtQo6kMrd86RmoS+KTpi3e8pDdXasxaqkNX+BxXh3WOEMKINH85q3 24 | 4v+Q== 25 | ARC-Authentication-Results: i=1; mx.google.com; 26 | dkim=pass header.i=@emailprivacytester.com header.s=ned1 header.b=VkB5PFt7; 27 | spf=pass (google.com: domain of noreply@emailprivacytester.com designates 51.91.158.226 as permitted sender) smtp.mailfrom=noreply@emailprivacytester.com; 28 | dmarc=pass (p=REJECT sp=REJECT dis=NONE) header.from=emailprivacytester.com 29 | Return-Path: 30 | Received: from ned.grepular.com (ned.grepular.com. [51.91.158.226]) 31 | by mx.google.com with ESMTPS id g10si5877504wrx.163.2020.01.09.09.16.52 32 | for 33 | (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); 34 | Thu, 09 Jan 2020 09:16:52 -0800 (PST) 35 | Received-SPF: pass (google.com: domain of noreply@emailprivacytester.com designates 51.91.158.226 as permitted sender) client-ip=51.91.158.226; 36 | Authentication-Results: mx.google.com; 37 | dkim=pass header.i=@emailprivacytester.com header.s=ned1 header.b=VkB5PFt7; 38 | spf=pass (google.com: domain of noreply@emailprivacytester.com designates 51.91.158.226 as permitted sender) smtp.mailfrom=noreply@emailprivacytester.com; 39 | dmarc=pass (p=REJECT sp=REJECT dis=NONE) header.from=emailprivacytester.com 40 | DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=emailprivacytester.com; s=ned1; h=MIME-Version:Date:List-Unsubscribe: Message-ID:Subject:To:From:Content-Type:Sender:Cc:Content-Transfer-Encoding: Content-ID:Content-Description:Resent-Date:Resent-From:Resent-Sender: Resent-To:Resent-Cc:Resent-Message-ID:In-Reply-To:References:List-Id: List-Help:List-Subscribe:List-Post:List-Owner:List-Archive:Reply-To; bh=rkF3oQo+O7T5MlShxMzGVXq8shPc1pQc+SDBamqsENE=; b=VkB5PFt7UNM20L7pAdeyQh+n0y TT45oTPjpy5NarC6M8h/1Wys64Isp9F/Y2wPc+02xU8VjVh/QTAYm8siW9TUF/jHnAYe5b3ZenW7t ISKB1KKsiFMXATLRUG7Hy6Uy4ea6N+mu1UX1HYFt7Cl3kpjxdDG1SnDj4UffSUbECuw0=; 41 | Received: from [172.26.0.1] (port=46340 helo=ned.grepular.com) by ned.grepular.com with esmtps (TLS1.3:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.92) (envelope-from ) id 1ipbQS-0006O7-37 for hello@soreine.dev; Thu, 09 Jan 2020 17:16:52 +0000 42 | Received: from [172.21.0.4] (helo=[127.0.0.1]) by ned.grepular.com with esmtp (Exim 4.92) (envelope-from ) id 1ipbQP-0003nO-Ua for hello@soreine.dev; Thu, 09 Jan 2020 17:16:50 +0000 43 | Content-Type: multipart/related; type="text/html"; boundary="----sinikael-?=_1-15785902099420.9555825560775772" 44 | From: "Email Privacy Tester " 45 | To: hello@soreine.dev 46 | Subject: EPT - Your Test Email - https://www.emailprivacytester.com/test?code=5e175f8d8af4d70022fc5832 47 | Message-ID: 48 | X-Mailer: nodemailer (2.7.0; +https://nodemailer.com/; SMTP/2.7.2[client:2.12.0]) 49 | List-Unsubscribe: (Opt out from further emails from https://www.emailprivacytester.com) 50 | Date: Thu, 09 Jan 2020 17:16:49 +0000 51 | MIME-Version: 1.0 52 | 53 | ------sinikael-?=_1-15785902099420.9555825560775772 54 | Content-Type: text/html; charset=utf-8 55 | Content-Transfer-Encoding: quoted-printable 56 | 57 | Email Privacy Tester

Email Privacy Teste= 78 | r

This is your test email from the Email Privacy Tester. Pl= 79 | ease ignore everything after this line. Clicking on anything will skew the = 80 | results.

= 88 |