├── .gitignore ├── .prettierrc ├── .travis.yml ├── README.md ├── example.js ├── package.json ├── sample.js ├── src ├── __tests__ │ ├── __snapshots__ │ │ ├── index.test.ts.snap │ │ ├── read-index.test.ts.snap │ │ ├── read-message.test.ts.snap │ │ ├── read-month.test.ts.snap │ │ └── stringify.test.ts.snap │ ├── index.test.ts │ ├── read-index.test.ts │ ├── read-message.test.ts │ ├── read-month.test.ts │ └── stringify.test.ts ├── html.ts ├── index.ts ├── read-index.ts ├── read-message.ts ├── read-month.ts └── stringify.ts └── tsconfig.json /.gitignore: -------------------------------------------------------------------------------- 1 | lib-cov 2 | *.seed 3 | *.log 4 | *.csv 5 | *.dat 6 | *.out 7 | *.pid 8 | *.gz 9 | 10 | pids 11 | logs 12 | results 13 | 14 | npm-debug.log 15 | 16 | node_modules 17 | output 18 | /lib 19 | /coverage 20 | package-lock.json -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "bracketSpacing": false, 3 | "singleQuote": true, 4 | "trailingComma": "all" 5 | } 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "6" 4 | - "8" 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pipermail 2 | 3 | node.js utilities for reading pipermail archives such as es-discuss 4 | 5 | [![Build Status](https://img.shields.io/travis/esdiscuss/pipermail/master.svg)](https://travis-ci.org/esdiscuss/pipermail) 6 | [![Dependency Status](https://img.shields.io/david/esdiscuss/pipermail.svg)](https://david-dm.org/esdiscuss/pipermail) 7 | [![NPM version](https://img.shields.io/npm/v/pipermail.svg)](https://www.npmjs.com/package/pipermail) 8 | 9 | ## Basic Usage 10 | 11 | ```javascript 12 | var pipermail = require('pipermail'); 13 | 14 | var options = {}; 15 | 16 | //`pipermail` returns a stream of JSON objects. 17 | //This can't be directly written to a file 18 | var parsed = pipermail('https://mail.mozilla.org/pipermail/es-discuss/', options); 19 | 20 | //convert the stream of json objects into a stream of JSON text seperated by new lines. 21 | var stringified = parsed.pipe(pipermail.stringify()); 22 | 23 | //pipe to a file 24 | stringified.pipe(require('fs').createWriteStream('res.txt')); 25 | 26 | //compress to a file 27 | stringified.pipe(require('zlib').createGzip()) 28 | .pipe(require('fs').createWriteStream('res.txt.gz')); 29 | ``` 30 | 31 | The resulting `res.txt` would look something like: 32 | 33 | ```javascript 34 | {"url":"https://mail.mozilla.org/pipermail/es-discuss/2006-June/003436.html","header":{"from":{"email":"baz@example.com","name":"Brendan Eich"},"date":"Sat, 3 Jun 2006 12:35:18 -0700","subject":"Welcome to the ECMAScript Edition 4 discussion list"},"body":"Thanks to Graydon Hoare for setting it up.\n\n/be"} 35 | {"url":"https://mail.mozilla.org/pipermail/es-discuss/2006-June/003437.html","header":{"from":{"email":"bar@example.com","name":"Olav Junker Kjær"},"date":"Tue, 06 Jun 2006 15:40:48 +0200","subject":"ES4 translator"},"body":"Hello,\nI'm very pleased to s the new public specs for ES4"} 36 | {"url":"https://mail.mozilla.org/pipermail/es-discuss/2006-June/003438.html","header":{"from":{"email":"foo@example.com","name":"Robert Sayre"},"date":"Wed, 7 Jun 2006 11:43:37 -0400","subject":"date literals"},"body":"I think the date literal should allow a trailing 'Z' to substitute for\n'+00:00'.\n\nRobert Sayre"} 37 | ``` 38 | 39 | I've shortened the bodies and renamed the e-mails but other than that it's the first few lines generated by the above code. 40 | 41 | ## Options 42 | 43 | - filterMonth: a function that gets the month's url as its argument and returns `true` or `false` to indicate whether the month should be included (or returns a promise if it's asynchronous) 44 | - filterMessage: a function that gets the message's url as its argument and returns `true` or `false` to indicate whether the message should be downloaded (or returns a promise if it's asynchronous) 45 | - months: the maximum number of months to download. If set, only the most recent n months will be downloaded. 46 | - parallel: the maximum number of messages to download in parallel, defaults to `10` 47 | - parallelMonths: the maximum number of month index pages to download in parallel, defaults to `2` 48 | - archiveUrlRegex : the regex used to look for message archives on the index page of the pipermail `/\d\d\d\d\-[a-z]+\.txt(?:\.gz)?/gi` 49 | 50 | ## License 51 | 52 | MIT 53 | -------------------------------------------------------------------------------- /example.js: -------------------------------------------------------------------------------- 1 | var pipermail = require('./') 2 | 3 | //`pipermail` returns a stream of JSON objects. 4 | //This can't be directly written to a file 5 | var parsed = pipermail('https://mail.mozilla.org/pipermail/es-discuss/') 6 | 7 | //convert the stream of json objects into a stream of JSON text seperated by new lines. 8 | var stringified = parsed.pipe(pipermail.stringify()) 9 | 10 | //pipe to stdout 11 | stringified.pipe(process.stdout) -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pipermail", 3 | "version": "5.0.1", 4 | "description": 5 | "node.js utilities for reading pipermail archives such as es-discuss", 6 | "main": "./lib/index.js", 7 | "types": "./lib/index.d.ts", 8 | "files": ["lib/"], 9 | "dependencies": { 10 | "@types/ent": "^2.2.1", 11 | "@types/htmlparser2": "^3.7.31", 12 | "@types/node": "^10.0.3", 13 | "barrage": "^2.0.3", 14 | "ent": "https://github.com/esdiscuss/ent/archive/fixed.tar.gz", 15 | "htmlparser2": "~3.9.2", 16 | "promise": "~8.0.0", 17 | "then-request": "^6.0.0" 18 | }, 19 | "devDependencies": { 20 | "@types/jest": "*", 21 | "husky": "*", 22 | "jest": "*", 23 | "lint-staged": "*", 24 | "prettier": "*", 25 | "ts-jest": "*", 26 | "typescript": "*" 27 | }, 28 | "scripts": { 29 | "precommit": "lint-staged", 30 | "prepublish": "npm run build", 31 | "prettier": "prettier --write \"src/**/*.{ts,tsx}\"", 32 | "prettier:check": "prettier --list-different \"src/**/*.{ts,tsx}\"", 33 | "build": "tsc", 34 | "test": "jest ./src --coverage", 35 | "watch": "jest ./src --coverage --watch" 36 | }, 37 | "lint-staged": { 38 | "*.{ts,tsx}": ["prettier --write", "git add"] 39 | }, 40 | "jest": { 41 | "testEnvironment": "node", 42 | "moduleFileExtensions": ["ts", "tsx", "js"], 43 | "transform": { 44 | "\\.(ts|tsx)$": "/node_modules/ts-jest/preprocessor.js" 45 | }, 46 | "testMatch": ["**/*.test.(ts|tsx|js)"] 47 | }, 48 | "repository": { 49 | "type": "git", 50 | "url": "https://github.com/esdiscuss/pipermail.git" 51 | }, 52 | "author": "ForbesLindesay", 53 | "license": "MIT" 54 | } -------------------------------------------------------------------------------- /sample.js: -------------------------------------------------------------------------------- 1 | var pipermail = require('./'); 2 | 3 | //`pipermail` returns a stream of JSON objects. 4 | //This can't be directly written to a file 5 | var parsed = pipermail('https://mail.mozilla.org/pipermail/es-discuss/'); 6 | 7 | //convert the stream of json objects into a stream of JSON text seperated by new lines. 8 | var stringified = parsed.pipe(pipermail.stringify()); 9 | 10 | //pipe to a file 11 | stringified.pipe(require('fs').createWriteStream('res.txt')); 12 | 13 | //compress to a file 14 | stringified 15 | .pipe(require('zlib').createGzip()) 16 | .pipe(require('fs').createWriteStream('res.txt.gz')); 17 | -------------------------------------------------------------------------------- /src/__tests__/__snapshots__/read-index.test.ts.snap: -------------------------------------------------------------------------------- 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP 2 | 3 | exports[`parses the index to return an array of month urls 1`] = ` 4 | Array [ 5 | "https://mail.mozilla.org/pipermail/es-discuss/2006-June", 6 | "https://mail.mozilla.org/pipermail/es-discuss/2006-July", 7 | "https://mail.mozilla.org/pipermail/es-discuss/2006-August", 8 | "https://mail.mozilla.org/pipermail/es-discuss/2006-September", 9 | "https://mail.mozilla.org/pipermail/es-discuss/2006-October", 10 | "https://mail.mozilla.org/pipermail/es-discuss/2006-November", 11 | "https://mail.mozilla.org/pipermail/es-discuss/2006-December", 12 | "https://mail.mozilla.org/pipermail/es-discuss/2007-January", 13 | "https://mail.mozilla.org/pipermail/es-discuss/2007-February", 14 | "https://mail.mozilla.org/pipermail/es-discuss/2007-March", 15 | "https://mail.mozilla.org/pipermail/es-discuss/2007-April", 16 | "https://mail.mozilla.org/pipermail/es-discuss/2007-May", 17 | "https://mail.mozilla.org/pipermail/es-discuss/2007-June", 18 | "https://mail.mozilla.org/pipermail/es-discuss/2007-July", 19 | "https://mail.mozilla.org/pipermail/es-discuss/2007-August", 20 | "https://mail.mozilla.org/pipermail/es-discuss/2007-September", 21 | "https://mail.mozilla.org/pipermail/es-discuss/2007-October", 22 | "https://mail.mozilla.org/pipermail/es-discuss/2007-November", 23 | "https://mail.mozilla.org/pipermail/es-discuss/2007-December", 24 | "https://mail.mozilla.org/pipermail/es-discuss/2008-January", 25 | "https://mail.mozilla.org/pipermail/es-discuss/2008-February", 26 | "https://mail.mozilla.org/pipermail/es-discuss/2008-March", 27 | "https://mail.mozilla.org/pipermail/es-discuss/2008-April", 28 | "https://mail.mozilla.org/pipermail/es-discuss/2008-May", 29 | "https://mail.mozilla.org/pipermail/es-discuss/2008-June", 30 | "https://mail.mozilla.org/pipermail/es-discuss/2008-July", 31 | "https://mail.mozilla.org/pipermail/es-discuss/2008-August", 32 | "https://mail.mozilla.org/pipermail/es-discuss/2008-September", 33 | "https://mail.mozilla.org/pipermail/es-discuss/2008-October", 34 | "https://mail.mozilla.org/pipermail/es-discuss/2008-November", 35 | "https://mail.mozilla.org/pipermail/es-discuss/2008-December", 36 | "https://mail.mozilla.org/pipermail/es-discuss/2009-January", 37 | "https://mail.mozilla.org/pipermail/es-discuss/2009-February", 38 | "https://mail.mozilla.org/pipermail/es-discuss/2009-March", 39 | "https://mail.mozilla.org/pipermail/es-discuss/2009-April", 40 | "https://mail.mozilla.org/pipermail/es-discuss/2009-May", 41 | "https://mail.mozilla.org/pipermail/es-discuss/2009-June", 42 | "https://mail.mozilla.org/pipermail/es-discuss/2009-July", 43 | "https://mail.mozilla.org/pipermail/es-discuss/2009-August", 44 | "https://mail.mozilla.org/pipermail/es-discuss/2009-September", 45 | "https://mail.mozilla.org/pipermail/es-discuss/2009-October", 46 | "https://mail.mozilla.org/pipermail/es-discuss/2009-November", 47 | "https://mail.mozilla.org/pipermail/es-discuss/2009-December", 48 | "https://mail.mozilla.org/pipermail/es-discuss/2010-January", 49 | "https://mail.mozilla.org/pipermail/es-discuss/2010-February", 50 | "https://mail.mozilla.org/pipermail/es-discuss/2010-March", 51 | "https://mail.mozilla.org/pipermail/es-discuss/2010-April", 52 | "https://mail.mozilla.org/pipermail/es-discuss/2010-May", 53 | "https://mail.mozilla.org/pipermail/es-discuss/2010-June", 54 | "https://mail.mozilla.org/pipermail/es-discuss/2010-July", 55 | "https://mail.mozilla.org/pipermail/es-discuss/2010-August", 56 | "https://mail.mozilla.org/pipermail/es-discuss/2010-September", 57 | "https://mail.mozilla.org/pipermail/es-discuss/2010-October", 58 | "https://mail.mozilla.org/pipermail/es-discuss/2010-November", 59 | "https://mail.mozilla.org/pipermail/es-discuss/2010-December", 60 | "https://mail.mozilla.org/pipermail/es-discuss/2011-January", 61 | "https://mail.mozilla.org/pipermail/es-discuss/2011-February", 62 | "https://mail.mozilla.org/pipermail/es-discuss/2011-March", 63 | "https://mail.mozilla.org/pipermail/es-discuss/2011-April", 64 | "https://mail.mozilla.org/pipermail/es-discuss/2011-May", 65 | "https://mail.mozilla.org/pipermail/es-discuss/2011-June", 66 | "https://mail.mozilla.org/pipermail/es-discuss/2011-July", 67 | "https://mail.mozilla.org/pipermail/es-discuss/2011-August", 68 | "https://mail.mozilla.org/pipermail/es-discuss/2011-September", 69 | "https://mail.mozilla.org/pipermail/es-discuss/2011-October", 70 | "https://mail.mozilla.org/pipermail/es-discuss/2011-November", 71 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December", 72 | "https://mail.mozilla.org/pipermail/es-discuss/2012-January", 73 | "https://mail.mozilla.org/pipermail/es-discuss/2012-February", 74 | "https://mail.mozilla.org/pipermail/es-discuss/2012-March", 75 | ] 76 | `; 77 | -------------------------------------------------------------------------------- /src/__tests__/__snapshots__/read-message.test.ts.snap: -------------------------------------------------------------------------------- 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP 2 | 3 | exports[`https://mail.mozilla.org/pipermail/es-discuss/2008-October/007920.html 1`] = ` 4 | Object { 5 | "body": "", 6 | "header": Object { 7 | "date": 2008-10-27T19:12:23.000Z, 8 | "from": Object { 9 | "email": "waldemar@google.com", 10 | "name": "Waldemar Horwat", 11 | }, 12 | "reply": "mailto:es-discuss%40mozilla.org?Subject=Re:%20Re%3A%20ES3.1%27s%20object%20model%20and%20const&In-Reply-To=%3C49061297.8060007%40google.com%3E", 13 | "subject": "ES3.1's object model and const", 14 | }, 15 | "url": "https://mail.mozilla.org/pipermail/es-discuss/2008-October/007920.html", 16 | } 17 | `; 18 | 19 | exports[`https://mail.mozilla.org/pipermail/es-discuss/2013-April/029615.html 1`] = ` 20 | Object { 21 | "body": "Hi Ohad, 22 | 23 | Le 05/04/2013 12:00, Assulin, Ohad a écrit : 24 | > 25 | > Quick question, I would love to submit a feature proposal for 26 | > ECMAScript 7 (Function Annotation) 27 | > 28 | > What is the exact process? 29 | > 30 | People usually just write their proposal in a post to es-discuss or as a 31 | gist [1] in markdown format and post the link to es-discuss. But use 32 | whatever is most convenient to you as long as it's easy to read for 33 | everyone. Some proposals start in the wiki [2], but only members of TC39 34 | have write access to it. 35 | As far as a \\"formal\\" process up to the spec, the proposal will be 36 | discussed by TC39 at one or several meetings (one every 2 months) and 37 | formal decisions of acceptance are decided there. In the vast majority 38 | of cases, a lot of discussion happens before these meetings on 39 | es-discuss (and after based on the meeting notes). 40 | 41 | > Is there any template I can use? 42 | > 43 | I don't think so. You can read pages on the wiki (accepted proposals 44 | [3]) to see how they are usually structured, but I think it's more a 45 | convention. 46 | As far as content is concerned, I recommend explaining use cases, 47 | explaining how the feature is a significant improvement, explore related 48 | work. For function annotations, I imagine some folks have already done 49 | such work with custom comment grammar and it'd be nice to see what 50 | people came up with, hopefully what error they made so the built-in 51 | feature doesn't reproduce the same errors, etc. 52 | 53 | Hope that helps, 54 | 55 | David 56 | 57 | [1] https://gist.github.com/ 58 | [2] http://wiki.ecmascript.org/doku.php 59 | [3] http://wiki.ecmascript.org/doku.php?id=harmony:proposals 60 | -------------- next part -------------- 61 | An HTML attachment was scrubbed... 62 | URL: ", 63 | "header": Object { 64 | "date": 2013-04-05T10:27:37.000Z, 65 | "from": Object { 66 | "email": "bruant.d@gmail.com", 67 | "name": "David Bruant", 68 | }, 69 | "reply": "mailto:es-discuss%40mozilla.org?Subject=Re:%20Re%3A%20how%20to%20submit%20a%20proposal%20for%20ECMAScript%207%3F&In-Reply-To=%3C515EA719.5000702%40gmail.com%3E", 70 | "subject": "how to submit a proposal for ECMAScript 7?", 71 | }, 72 | "url": "https://mail.mozilla.org/pipermail/es-discuss/2013-April/029615.html", 73 | } 74 | `; 75 | -------------------------------------------------------------------------------- /src/__tests__/__snapshots__/read-month.test.ts.snap: -------------------------------------------------------------------------------- 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP 2 | 3 | exports[`parses a month listing page to return an array of message urls 1`] = ` 4 | Array [ 5 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018815.html", 6 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018816.html", 7 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018817.html", 8 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018818.html", 9 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018819.html", 10 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018820.html", 11 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018821.html", 12 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018822.html", 13 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018823.html", 14 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018824.html", 15 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018825.html", 16 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018826.html", 17 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018828.html", 18 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018827.html", 19 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018829.html", 20 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018830.html", 21 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018831.html", 22 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018832.html", 23 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018833.html", 24 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018834.html", 25 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018835.html", 26 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018836.html", 27 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018837.html", 28 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018838.html", 29 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018839.html", 30 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018840.html", 31 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018841.html", 32 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018842.html", 33 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018843.html", 34 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018844.html", 35 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018847.html", 36 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018846.html", 37 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018845.html", 38 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018848.html", 39 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018849.html", 40 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018850.html", 41 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018851.html", 42 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018852.html", 43 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018853.html", 44 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018854.html", 45 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018855.html", 46 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018856.html", 47 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018857.html", 48 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018858.html", 49 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018859.html", 50 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018860.html", 51 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018861.html", 52 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018862.html", 53 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018863.html", 54 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018864.html", 55 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018865.html", 56 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018866.html", 57 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018867.html", 58 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018868.html", 59 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018869.html", 60 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018870.html", 61 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018871.html", 62 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018872.html", 63 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018873.html", 64 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018874.html", 65 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018875.html", 66 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018876.html", 67 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018877.html", 68 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018878.html", 69 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018879.html", 70 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018880.html", 71 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018881.html", 72 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018882.html", 73 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018883.html", 74 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018884.html", 75 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018885.html", 76 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018886.html", 77 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018887.html", 78 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018888.html", 79 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018889.html", 80 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018890.html", 81 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018891.html", 82 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018892.html", 83 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018893.html", 84 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018894.html", 85 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018895.html", 86 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018896.html", 87 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018897.html", 88 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018898.html", 89 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018899.html", 90 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018900.html", 91 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018901.html", 92 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018902.html", 93 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018903.html", 94 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018904.html", 95 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018905.html", 96 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018906.html", 97 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018907.html", 98 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018908.html", 99 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018909.html", 100 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018910.html", 101 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018911.html", 102 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018912.html", 103 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018913.html", 104 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018914.html", 105 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018915.html", 106 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018916.html", 107 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018917.html", 108 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018918.html", 109 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018919.html", 110 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018920.html", 111 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018921.html", 112 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018922.html", 113 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018923.html", 114 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018924.html", 115 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018925.html", 116 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018926.html", 117 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018927.html", 118 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018928.html", 119 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018929.html", 120 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018930.html", 121 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018931.html", 122 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018932.html", 123 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018933.html", 124 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018934.html", 125 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018935.html", 126 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018936.html", 127 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018937.html", 128 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018938.html", 129 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018940.html", 130 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018939.html", 131 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018941.html", 132 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018942.html", 133 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018943.html", 134 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018944.html", 135 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018945.html", 136 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018946.html", 137 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018947.html", 138 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018948.html", 139 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018949.html", 140 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018950.html", 141 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018951.html", 142 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018952.html", 143 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018953.html", 144 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018954.html", 145 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018955.html", 146 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018956.html", 147 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018957.html", 148 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018958.html", 149 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018959.html", 150 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018960.html", 151 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018961.html", 152 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018962.html", 153 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018963.html", 154 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018964.html", 155 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018965.html", 156 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018966.html", 157 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018967.html", 158 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018968.html", 159 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018969.html", 160 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018970.html", 161 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018971.html", 162 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018972.html", 163 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018973.html", 164 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018974.html", 165 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018975.html", 166 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018976.html", 167 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018977.html", 168 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018978.html", 169 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018980.html", 170 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018979.html", 171 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018981.html", 172 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018982.html", 173 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018983.html", 174 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018984.html", 175 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018985.html", 176 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018986.html", 177 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018987.html", 178 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018988.html", 179 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018989.html", 180 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018990.html", 181 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018991.html", 182 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018992.html", 183 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018993.html", 184 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018994.html", 185 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018995.html", 186 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018996.html", 187 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018997.html", 188 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018998.html", 189 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/018999.html", 190 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019000.html", 191 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019001.html", 192 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019002.html", 193 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019003.html", 194 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019004.html", 195 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019005.html", 196 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019006.html", 197 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019007.html", 198 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019008.html", 199 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019009.html", 200 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019010.html", 201 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019011.html", 202 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019012.html", 203 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019013.html", 204 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019014.html", 205 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019015.html", 206 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019016.html", 207 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019017.html", 208 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019018.html", 209 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019019.html", 210 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019020.html", 211 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019021.html", 212 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019022.html", 213 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019023.html", 214 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019024.html", 215 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019025.html", 216 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019026.html", 217 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019027.html", 218 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019028.html", 219 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019029.html", 220 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019030.html", 221 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019031.html", 222 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019032.html", 223 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019033.html", 224 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019034.html", 225 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019035.html", 226 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019036.html", 227 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019037.html", 228 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019038.html", 229 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019039.html", 230 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019040.html", 231 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019041.html", 232 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019042.html", 233 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019043.html", 234 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019044.html", 235 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019045.html", 236 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019046.html", 237 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019047.html", 238 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019048.html", 239 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019049.html", 240 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019050.html", 241 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019051.html", 242 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019052.html", 243 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019053.html", 244 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019054.html", 245 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019055.html", 246 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019056.html", 247 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019058.html", 248 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019057.html", 249 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019059.html", 250 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019060.html", 251 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019061.html", 252 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019062.html", 253 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019063.html", 254 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019064.html", 255 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019065.html", 256 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019066.html", 257 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019067.html", 258 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019068.html", 259 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019069.html", 260 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019070.html", 261 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019071.html", 262 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019072.html", 263 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019073.html", 264 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019074.html", 265 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019075.html", 266 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019076.html", 267 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019077.html", 268 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019078.html", 269 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019079.html", 270 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019080.html", 271 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019081.html", 272 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019082.html", 273 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019083.html", 274 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019084.html", 275 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019085.html", 276 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019086.html", 277 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019087.html", 278 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019088.html", 279 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019090.html", 280 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019089.html", 281 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019091.html", 282 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019092.html", 283 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019093.html", 284 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019094.html", 285 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019095.html", 286 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019096.html", 287 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019097.html", 288 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019098.html", 289 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019099.html", 290 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019100.html", 291 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019101.html", 292 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019102.html", 293 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019103.html", 294 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019104.html", 295 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019105.html", 296 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019106.html", 297 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019107.html", 298 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019108.html", 299 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019109.html", 300 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019110.html", 301 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019111.html", 302 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019112.html", 303 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019113.html", 304 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019114.html", 305 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019115.html", 306 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019116.html", 307 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019117.html", 308 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019118.html", 309 | "https://mail.mozilla.org/pipermail/es-discuss/2011-December/019119.html", 310 | ] 311 | `; 312 | -------------------------------------------------------------------------------- /src/__tests__/__snapshots__/stringify.test.ts.snap: -------------------------------------------------------------------------------- 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP 2 | 3 | exports[`stiringify 1`] = ` 4 | "{\\"foo\\":10} 5 | {\\"bar\\":42} 6 | {\\"baz\\":32}" 7 | `; 8 | -------------------------------------------------------------------------------- /src/__tests__/index.test.ts: -------------------------------------------------------------------------------- 1 | import downloadAsStream from '../'; 2 | 3 | (jest as any).setTimeout(60000); 4 | 5 | test('download as stream', async () => { 6 | const messages = await downloadAsStream( 7 | 'https://mail.mozilla.org/pipermail/es-discuss/', 8 | { 9 | filterMonth(url) { 10 | return ( 11 | url === 12 | 'https://mail.mozilla.org/pipermail/es-discuss/2007-November' || 13 | url === 'https://mail.mozilla.org/pipermail/es-discuss/2007-December' 14 | ); 15 | }, 16 | parallel: 40, 17 | }, 18 | ).buffer(); 19 | expect(messages).toMatchSnapshot(); 20 | }); 21 | -------------------------------------------------------------------------------- /src/__tests__/read-index.test.ts: -------------------------------------------------------------------------------- 1 | import readIndex from '../read-index'; 2 | 3 | test('parses the index to return an array of month urls', async () => { 4 | const res = await readIndex('https://mail.mozilla.org/pipermail/es-discuss/'); 5 | expect(Array.isArray(res)).toBe(true); 6 | expect(res.length).toBeGreaterThan(70); 7 | res.forEach(url => { 8 | expect(url).toMatch( 9 | /^https:\/\/mail.mozilla.org\/pipermail\/es-discuss\/\d\d\d\d\-[A-Z][a-z]+$/, 10 | ); 11 | }); 12 | // by only looking at the oldest months, they should rarely change 13 | expect(res.slice(0, 70)).toMatchSnapshot(); 14 | }); 15 | -------------------------------------------------------------------------------- /src/__tests__/read-message.test.ts: -------------------------------------------------------------------------------- 1 | import readMessage from '../read-message'; 2 | 3 | function testMessage(url: string) { 4 | test(url, async () => { 5 | const message = await readMessage(url); 6 | expect(message).toMatchSnapshot(); 7 | }); 8 | } 9 | testMessage( 10 | 'https://mail.mozilla.org/pipermail/es-discuss/2013-April/029615.html', 11 | ); 12 | testMessage( 13 | // a message with no body 14 | 'https://mail.mozilla.org/pipermail/es-discuss/2008-October/007920.html', 15 | ); 16 | -------------------------------------------------------------------------------- /src/__tests__/read-month.test.ts: -------------------------------------------------------------------------------- 1 | import readMonth from '../read-month'; 2 | 3 | test('parses a month listing page to return an array of message urls', async () => { 4 | const res = await readMonth( 5 | 'https://mail.mozilla.org/pipermail/es-discuss/2011-December', 6 | ); 7 | expect(Array.isArray(res)).toBeTruthy(); 8 | expect(res.length).toBeGreaterThan(10); 9 | res.forEach(url => { 10 | expect(url).toMatch( 11 | /^https:\/\/mail.mozilla.org\/pipermail\/es-discuss\/2011-December\/\d\d\d\d\d\d\.html$/, 12 | ); 13 | }); 14 | expect(res).toMatchSnapshot(); 15 | }); 16 | -------------------------------------------------------------------------------- /src/__tests__/stringify.test.ts: -------------------------------------------------------------------------------- 1 | import stringify from '../stringify'; 2 | 3 | test('stiringify', async () => { 4 | const strm = stringify(); 5 | strm.write({foo: 10}); 6 | strm.write({bar: 42}); 7 | strm.write({baz: 32}); 8 | strm.end(); 9 | expect(await strm.buffer('utf8')).toMatchSnapshot(); 10 | }); 11 | -------------------------------------------------------------------------------- /src/html.ts: -------------------------------------------------------------------------------- 1 | import {Parser} from 'htmlparser2'; 2 | import {decode} from 'ent'; 3 | 4 | // no types are included for this export 5 | const {DomHandler} = require('htmlparser2'); 6 | 7 | const selfClosing = [ 8 | 'meta', 9 | 'img', 10 | 'link', 11 | 'input', 12 | 'source', 13 | 'area', 14 | 'base', 15 | 'col', 16 | 'br', 17 | 'hr', 18 | ]; 19 | 20 | export enum DomNodeType { 21 | tag = 'tag', 22 | text = 'text', 23 | } 24 | export interface DomNodeBase { 25 | type: DomNodeType; 26 | } 27 | export interface DomNodeTag extends DomNodeBase { 28 | type: DomNodeType.tag; 29 | name: string; 30 | children: DomNode[]; 31 | attribs: {[key: string]: string | void}; 32 | } 33 | export interface DomNodeText extends DomNodeBase { 34 | type: DomNodeType.text; 35 | data: string; 36 | } 37 | export type DomNode = DomNodeTag | DomNodeText; 38 | 39 | export default function parseHTML(src: string) { 40 | var handler = new DomHandler(); 41 | var p = new Parser(handler); 42 | p.parseComplete(src); 43 | function fix(dom: DomNode | DomNode[]): DomNode[] { 44 | if (Array.isArray(dom)) { 45 | return dom.map(fix).reduce(function(a, b) { 46 | return a.concat(b); 47 | }, []); 48 | } else if (dom.type != 'tag') { 49 | return [dom]; 50 | } else if (selfClosing.indexOf(dom.name.toLowerCase()) === -1) { 51 | if (dom.children) dom.children = fix(dom.children); 52 | else dom.children = []; 53 | return [dom]; 54 | } else { 55 | var c = dom.children; 56 | dom.children = []; 57 | return [dom as DomNode].concat(fix(c || [])); 58 | } 59 | } 60 | return new DOM(fix(handler.dom)); 61 | } 62 | 63 | function textContent(dom: DomNode[] | DomNode): string { 64 | if (Array.isArray(dom)) return dom.map(textContent).join(''); 65 | else if (dom.type === 'text') return decode(dom.data); 66 | else if (dom.type === 'tag') return textContent(dom.children); 67 | else return ''; 68 | } 69 | 70 | export class DOM { 71 | dom: DomNode[]; 72 | constructor(dom: DomNode[]) { 73 | this.dom = dom; 74 | } 75 | textContent(): string { 76 | return textContent(this.dom); 77 | } 78 | attr(name: string): string | null { 79 | const dom = this.dom[0]; 80 | if (dom.type === 'tag') { 81 | var keys = Object.keys(dom.attribs); 82 | for (var i = 0; i < keys.length; i++) { 83 | if (keys[i].toLowerCase() === name) { 84 | const value = dom.attribs[keys[i]]; 85 | return value ? decode(value) : null; 86 | } 87 | } 88 | } 89 | return null; 90 | } 91 | first(): DOM { 92 | return new DOM(this.dom.length ? [this.dom[0]] : []); 93 | } 94 | select(path: string[]): DOM { 95 | return new DOM(tagPath(this.dom, path.slice())); 96 | } 97 | } 98 | 99 | function tagPath(dom: DomNode[], path: string[]): DomNode[] { 100 | const tag = path.shift(); 101 | const matchingTags: DomNodeTag[] = dom.filter((c): c is DomNodeTag => { 102 | return c.type === DomNodeType.tag && c.name.toLowerCase() === tag; 103 | }); 104 | if (path.length === 0) return matchingTags; 105 | else 106 | return matchingTags 107 | .map(node => tagPath(node.children, path)) 108 | .reduce(function(a, b) { 109 | return a.concat(b); 110 | }, []); 111 | } 112 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import {Readable, ReadableStream} from 'barrage'; 2 | import Promise = require('promise'); 3 | import readIndex from './read-index'; 4 | import readMonth from './read-month'; 5 | import readMessage, {Message, Header} from './read-message'; 6 | import stringify from './stringify'; 7 | 8 | function toStream(p: PromiseLike): Readable { 9 | let hasRead = false; 10 | return new Readable({ 11 | objectMode: true, 12 | read(size, push) { 13 | if (hasRead) { 14 | return; 15 | } 16 | hasRead = true; 17 | p.then( 18 | a => { 19 | a.forEach(v => push(v)); 20 | push(null); 21 | }, 22 | err => { 23 | this.emit('error', err); 24 | }, 25 | ); 26 | }, 27 | }); 28 | } 29 | 30 | export interface Options { 31 | archiveUrlRegex?: RegExp; 32 | filterMonth?: (month: string) => boolean | PromiseLike; 33 | filterMessage?: (month: string) => boolean | PromiseLike; 34 | months?: number; 35 | parallelMonths?: number; 36 | parallel?: number; 37 | } 38 | export {readIndex, readMonth, readMessage, stringify}; 39 | export {Message, Header, ReadableStream}; 40 | 41 | export default function download( 42 | src: string, 43 | options: Options = {}, 44 | ): ReadableStream { 45 | const filterMonth = options.filterMonth || TRUE; 46 | const filterMessage = options.filterMessage || TRUE; 47 | 48 | const months = readIndex(src, options).then(function(months) { 49 | if (options.months && options.months !== Infinity) { 50 | return months.slice(Math.max(months.length - options.months, 0)); 51 | } else { 52 | return months; 53 | } 54 | }); 55 | 56 | return toStream(months) 57 | .filter(filterMonth) 58 | .flatMap(month => readMonth(month), {parallel: options.parallelMonths || 2}) 59 | .filter(filterMessage) 60 | .map(url => readMessage(url), {parallel: options.parallel || 10}); 61 | } 62 | 63 | function TRUE() { 64 | return true; 65 | } 66 | 67 | module.exports = download; 68 | module.exports.default = download; 69 | module.exports.readIndex = readIndex; 70 | module.exports.readMonth = readMonth; 71 | module.exports.readMessage = readMessage; 72 | module.exports.stringify = stringify; 73 | -------------------------------------------------------------------------------- /src/read-index.ts: -------------------------------------------------------------------------------- 1 | import request from 'then-request'; 2 | import Promise = require('promise'); 3 | 4 | export interface Options { 5 | archiveUrlRegex?: RegExp; 6 | } 7 | export default function readIndex( 8 | url: string, 9 | options: Options = {}, 10 | ): Promise { 11 | url = url.replace(/\/$/, ''); 12 | return request('GET', url, { 13 | retry: true, 14 | retryDelay: (err, res, attemptNo) => 500 * Math.pow(2, attemptNo), 15 | }) 16 | .getBody('utf8') 17 | .then(body => { 18 | const pattern = 19 | options.archiveUrlRegex || /\d\d\d\d\-[a-z]+\.txt(?:\.gz)?/gi; 20 | let match; 21 | const urls = []; 22 | while ((match = pattern.exec(body))) { 23 | urls.push(url + '/' + match[0].replace(/\.txt(?:\.gz)?/, '')); 24 | } 25 | return urls.reverse(); 26 | }); 27 | } 28 | -------------------------------------------------------------------------------- /src/read-message.ts: -------------------------------------------------------------------------------- 1 | import request from 'then-request'; 2 | import Promise = require('promise'); 3 | import html from './html'; 4 | 5 | export interface Header { 6 | subject: string; 7 | from: { 8 | name: string; 9 | email: string; 10 | }; 11 | reply: string; 12 | date: Date; 13 | } 14 | export interface Message { 15 | url: string; 16 | header: Header; 17 | body: string; 18 | } 19 | export default function readMessage(url: string): Promise { 20 | return request('GET', url, { 21 | retry: true, 22 | retryDelay: (err, res, attemptNo) => 500 * Math.pow(2, attemptNo), 23 | }) 24 | .getBody('utf8') 25 | .then(function(body) { 26 | try { 27 | const dom = html(body); 28 | 29 | const header = { 30 | subject: dom 31 | .select(['html', 'body', 'h1']) 32 | .first() 33 | .textContent() 34 | .trim(), 35 | from: { 36 | name: dom 37 | .select(['html', 'body', 'b']) 38 | .first() 39 | .textContent() 40 | .trim(), 41 | email: dom 42 | .select(['html', 'body', 'a']) 43 | .first() 44 | .textContent() 45 | .trim() 46 | .replace(' at ', '@'), 47 | }, 48 | reply: 49 | dom 50 | .select(['html', 'body', 'a']) 51 | .first() 52 | .attr('href') || '', 53 | date: new Date( 54 | dom 55 | .select(['html', 'body', 'i']) 56 | .first() 57 | .textContent() 58 | .trim(), 59 | ), 60 | }; 61 | 62 | return { 63 | url: url, 64 | header: header, 65 | body: dom 66 | .select(['html', 'body', 'p', 'pre']) 67 | .first() 68 | .textContent() 69 | .trim(), 70 | }; 71 | } catch (ex) { 72 | ex.message += '\n\n\n' + body; 73 | throw ex; 74 | } 75 | }); 76 | } 77 | -------------------------------------------------------------------------------- /src/read-month.ts: -------------------------------------------------------------------------------- 1 | import request from 'then-request'; 2 | import Promise = require('promise'); 3 | 4 | export default function readMonth(url: string): Promise { 5 | url = url.replace(/\/$/, '').replace(/\/date\.html$/, ''); 6 | return request('GET', url + '/date.html', { 7 | retry: true, 8 | retryDelay: (err, res, attemptNo) => 500 * Math.pow(2, attemptNo), 9 | }) 10 | .getBody('utf8') 11 | .then(body => { 12 | const urls = new Set(); 13 | const pattern = /href=\"(\d+\.html)\"/gi; 14 | let match; 15 | while ((match = pattern.exec(body))) { 16 | urls.add(url + '/' + match[1]); 17 | } 18 | return Array.from(urls); 19 | }); 20 | } 21 | -------------------------------------------------------------------------------- /src/stringify.ts: -------------------------------------------------------------------------------- 1 | import {Transform} from 'barrage'; 2 | 3 | export default function stringify(): Transform { 4 | let first = true; 5 | return new Transform({ 6 | writableObjectMode: true, 7 | transform(item, _, push, cb) { 8 | push((first ? '' : '\n') + JSON.stringify(item)); 9 | first = false; 10 | cb(); 11 | }, 12 | }); 13 | } 14 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "declaration": true, 4 | "lib": ["es2017"], 5 | "outDir": "lib", 6 | "strict": true 7 | } 8 | } 9 | --------------------------------------------------------------------------------