├── .editorconfig
├── .eslintrc
├── .gitignore
├── .npmignore
├── LICENSE
├── README.md
├── eslint.config.cjs
├── package.json
├── src
├── extractEmail.test.ts
├── extractEmail.ts
├── index.ts
├── normalizeInput.test.ts
└── normalizeInput.ts
└── tsconfig.json
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | charset = utf-8
5 | end_of_line = lf
6 | indent_size = 2
7 | indent_style = space
8 | insert_final_newline = true
9 | trim_trailing_whitespace = true
10 |
--------------------------------------------------------------------------------
/.eslintrc:
--------------------------------------------------------------------------------
1 | {
2 | "extends": [
3 | "canonical"
4 | ],
5 | "root": true
6 | }
7 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | coverage
2 | dist
3 | node_modules
4 | *.log
5 | .*
6 | !.editorconfig
7 | !.eslintignore
8 | !.eslintrc
9 | !.gitignore
10 | !.npmignore
11 | /package-lock.json
12 |
--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | test
2 | coverage
3 | .*
4 | *.log
5 | !.flowconfig
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2024, Gajus Kuizinas (https://gajus.com/)
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 | * Redistributions of source code must retain the above copyright
7 | notice, this list of conditions and the following disclaimer.
8 | * Redistributions in binary form must reproduce the above copyright
9 | notice, this list of conditions and the following disclaimer in the
10 | documentation and/or other materials provided with the distribution.
11 | * Neither the name of the Gajus Kuizinas (https://gajus.com/) nor the
12 | names of its contributors may be used to endorse or promote products
13 | derived from this software without specific prior written permission.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL GAJUS KUIZINAS BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # extract-email-address 📧
3 |
4 | [](https://coveralls.io/github/gajus/extract-email-address)
5 | [](https://www.npmjs.org/package/extract-email-address)
6 | [](https://github.com/gajus/canonical)
7 | [](https://twitter.com/kuizinas)
8 |
9 | Extracts email-like entities from an arbitrary text input.
10 |
11 | * [extract-email-address 📧](#extract-email-address)
12 | * [API](#extract-email-address-api)
13 | * [Usage](#extract-email-address-usage)
14 | * [Filtering results](#extract-email-address-filtering-results)
15 | * [Related projects](#extract-email-address-related-projects)
16 |
17 |
18 |
19 | ## API
20 |
21 | ```js
22 | import {
23 | extractEmail,
24 | type EmailMatch,
25 | } from 'extract-email-address';
26 |
27 | extractEmail(input: string): readonly EmailMatch[];
28 | ```
29 |
30 |
31 | ## Usage
32 |
33 | ```js
34 | import { extractEmail } from 'extract-email-address';
35 |
36 | extractEmail('extracts email from anywhere within the input gajus@gajus.com');
37 | // [{email: 'gajus@gajus.com'}]
38 |
39 | extractEmail('extracts multiple emails located anywhere within the input: foo@gajus.com, bar@gajus.com');
40 | // [{email: 'foo@gajus.com'}, {email: 'bar@gajus.com'}]
41 |
42 | extractEmail('extracts all sorts of obfuscated emails, e.g. f o o @ b a r . c o m or baz [at] qux [dot] com');
43 | // [{email: 'foo@bar.com'}, {email: 'baz@qux.com'}]
44 |
45 | extractEmail('extracts tagged emails, e.g. gajus+foo@gajus.com');
46 | // [{email: 'gajus+foo@gajus.com'}]
47 |
48 | extractEmail('extracts emails surrounded by odd unicode characters, e.g. 邮箱:gajus@gajus.com');
49 | // [{email: 'gajus@gajus.com'}]
50 |
51 | extractEmail('extracts emails surrounded by emojis, e.g. 📧gajus@gajus.com');
52 | // [{email: 'gajus@gajus.com'}]
53 |
54 | extractEmail('excludes invalid emails with invalid TLDs, e.g. gajus@gajus.png');
55 | // []
56 |
57 | extractEmail('ignores invalid emails foo@bar');
58 | // []
59 |
60 | ```
61 |
62 |
63 | ## Filtering results
64 |
65 | Some matches might be syntactically valid email addresses, but not actual email addresses, e.g. `apple-touch-icon@2.png`.
66 |
67 | `extract-email-address` uses a list of valid top-level domains to filter out matches that are definitely not emails (such as `png` example), but you might still need to filter out domain specific false-positives.
68 |
69 |
70 | ## Related projects
71 |
72 | * [`extract-date`](https://github.com/gajus/extract-date) – Extracts date from an arbitrary text input.
73 | * [`extract-price`](https://github.com/gajus/extract-price) – Extracts price from an arbitrary text input.
74 | * [`extract-time`](https://github.com/gajus/extract-time) – Extracts time from an arbitrary text input.
75 |
--------------------------------------------------------------------------------
/eslint.config.cjs:
--------------------------------------------------------------------------------
1 | const auto = require('eslint-config-canonical/configurations/auto');
2 |
3 | module.exports = [
4 | {
5 | files: ['**/*.cjs', '**/*.ts', '**/*.tsx'],
6 | },
7 | {
8 | files: ['**/*.ts'],
9 | rules: {
10 | 'id-length': 0,
11 | },
12 | },
13 | ...auto,
14 | {
15 | ignores: ['**/package-lock.json'],
16 | },
17 | ];
18 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "author": {
3 | "email": "gajus@gajus.com",
4 | "name": "Gajus Kuizinas",
5 | "url": "http://gajus.com"
6 | },
7 | "ava": {
8 | "extensions": [
9 | "ts"
10 | ],
11 | "files": [
12 | "src/**/*.test.ts"
13 | ],
14 | "require": [
15 | "ts-node/register/transpile-only"
16 | ]
17 | },
18 | "dependencies": {
19 | "decode-uri-component": "^0.4.1",
20 | "emoji-regex": "^9.0.0",
21 | "tlds": "^1.253.0"
22 | },
23 | "description": "Extracts email address from an arbitrary text input.",
24 | "devDependencies": {
25 | "ava": "^6.1.3",
26 | "del-cli": "^5.1.0",
27 | "eslint": "^8.57.0",
28 | "eslint-config-canonical": "^43.0.13",
29 | "semantic-release": "^24.0.0",
30 | "sinon": "^18.0.0",
31 | "ts-node": "^10.9.2"
32 | },
33 | "engines": {
34 | "node": ">6"
35 | },
36 | "keywords": [
37 | "date",
38 | "extract",
39 | "moment",
40 | "parse"
41 | ],
42 | "license": "BSD-3-Clause",
43 | "main": "./dist/index.js",
44 | "name": "extract-email-address",
45 | "repository": {
46 | "type": "git",
47 | "url": "https://github.com/gajus/extract-email-address"
48 | },
49 | "scripts": {
50 | "build": "del-cli ./dist && tsc",
51 | "lint": "eslint ./src ./test && flow",
52 | "test": "NODE_ENV=test ava --verbose --serial"
53 | },
54 | "version": "3.2.0"
55 | }
56 |
--------------------------------------------------------------------------------
/src/extractEmail.test.ts:
--------------------------------------------------------------------------------
1 | import { extractEmail } from './extractEmail';
2 | import test from 'ava';
3 |
4 | const fixtures = [
5 | 'foo bar baz at gajus@gajus.com',
6 | 'gajus@gajus.com',
7 | 'GAJUS@GAJUS.COM',
8 | ':gajus@gajus.com',
9 | '📧gajus@gajus.com',
10 | 'gajus@gajus.com.',
11 | 'foo gajus@gajus.com bar',
12 | 'foo gajus [at] gajus [dot] com',
13 | 'foo g a j u s [at] g a j u s [dot] c o m',
14 | '',
15 | '【email: gajus@gajus.com】',
16 | 'contact: gajus@gajus.com',
17 | '"mailto:gajus@gajus.com"',
18 | 'https://gajus@gajus.com',
19 | ];
20 |
21 | for (const fixture of fixtures) {
22 | test('extracts email ("' + fixture + '")', (t) => {
23 | t.deepEqual(extractEmail(fixture), [
24 | {
25 | email: 'gajus@gajus.com',
26 | },
27 | ]);
28 | });
29 | }
30 |
31 | test('extracts multiple email addresses', (t) => {
32 | t.deepEqual(extractEmail('foo@bar.com baz@qux.com'), [
33 | {
34 | email: 'foo@bar.com',
35 | },
36 | {
37 | email: 'baz@qux.com',
38 | },
39 | ]);
40 | });
41 |
42 | test('extracts email (gajus+test@gajus.com)', (t) => {
43 | t.deepEqual(extractEmail('gajus+test@gajus.com'), [
44 | {
45 | email: 'gajus+test@gajus.com',
46 | },
47 | ]);
48 | });
49 |
50 | test('extracts email (gajus.gajus@gajus.com)', (t) => {
51 | t.deepEqual(extractEmail('gajus.gajus@gajus.com'), [
52 | {
53 | email: 'gajus.gajus@gajus.com',
54 | },
55 | ]);
56 | });
57 |
58 | test('extracts email (gajus.gajus+test@gajus.com)', (t) => {
59 | t.deepEqual(extractEmail('gajus.gajus+test@gajus.com'), [
60 | {
61 | email: 'gajus.gajus+test@gajus.com',
62 | },
63 | ]);
64 | });
65 |
66 | test('excludes emails with invalid TLD (gajus@gajus.png)', (t) => {
67 | t.deepEqual(extractEmail('gajus@gajus.png'), []);
68 | });
69 |
70 | test('removes duplicates email', (t) => {
71 | t.deepEqual(extractEmail('gajus@gajus.com gajus@gajus.com'), [
72 | {
73 | email: 'gajus@gajus.com',
74 | },
75 | ]);
76 | });
77 |
78 | test('extracts email mailto', (t) => {
79 | t.deepEqual(extractEmail('mailto%3Ajohn%2Bsmith%40gajus.com'), [
80 | {
81 | email: 'john+smith@gajus.com',
82 | },
83 | ]);
84 | });
85 |
--------------------------------------------------------------------------------
/src/extractEmail.ts:
--------------------------------------------------------------------------------
1 | import { normalizeInput } from './normalizeInput';
2 | import tlds from 'tlds';
3 |
4 | export type EmailMatch = {
5 | email: string;
6 | };
7 |
8 | export const extractEmail = (input: string): readonly EmailMatch[] => {
9 | const matches = normalizeInput(input).match(
10 | // eslint-disable-next-line unicorn/better-regex, require-unicode-regexp, regexp/no-unused-capturing-group
11 | /\b[\w.%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g,
12 | );
13 |
14 | if (!matches) {
15 | return [];
16 | }
17 |
18 | return matches
19 | .map((email) => {
20 | return email;
21 | })
22 | .filter((email) => {
23 | for (const tld of tlds) {
24 | if (email.endsWith('.' + tld)) {
25 | return true;
26 | }
27 | }
28 |
29 | return false;
30 | })
31 | .filter((email, index, self) => {
32 | return self.indexOf(email) === index;
33 | })
34 | .map((email) => {
35 | return {
36 | email,
37 | };
38 | });
39 | };
40 |
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | export { type EmailMatch, extractEmail } from './extractEmail';
2 |
--------------------------------------------------------------------------------
/src/normalizeInput.test.ts:
--------------------------------------------------------------------------------
1 | import { normalizeInput } from './normalizeInput';
2 | import test from 'ava';
3 |
4 | test('normalizes different email formats', (t) => {
5 | t.is(normalizeInput('GAJUS@GAJUS.COM'), 'gajus@gajus.com');
6 | t.is(normalizeInput(':gajus@gajus.com'), 'gajus@gajus.com');
7 | t.is(normalizeInput('📧gajus@gajus.com'), 'gajus@gajus.com');
8 | t.is(
9 | normalizeInput('g a j u s [at] g a j u s [dot] c o m'),
10 | 'gajus@gajus.com',
11 | );
12 | t.is(
13 | normalizeInput('foo g a j u s [at] g a j u s [dot] c o m bar'),
14 | 'foo gajus@gajus.com bar',
15 | );
16 | t.is(normalizeInput('gajus[at]gajus[dot]co[dot]uk'), 'gajus@gajus.co.uk');
17 | t.is(normalizeInput('gajus[at]gajus[dot]com'), 'gajus@gajus.com');
18 | t.is(normalizeInput('gajus(at)gajus(dot)com'), 'gajus@gajus.com');
19 | t.is(normalizeInput('gajus [at] gajus [dot] com'), 'gajus@gajus.com');
20 | t.is(normalizeInput('gajus (at) gajus (dot) com'), 'gajus@gajus.com');
21 | t.is(normalizeInput('gajus gajus com'), 'gajus@gajus.com');
22 | t.is(normalizeInput('gajus at gajus dot com'), 'gajus@gajus.com');
23 | t.is(normalizeInput('john%2Bsmith%40gajus.com'), 'john+smith@gajus.com');
24 | });
25 |
--------------------------------------------------------------------------------
/src/normalizeInput.ts:
--------------------------------------------------------------------------------
1 | import decodeUriComponent from 'decode-uri-component';
2 | import createEmojiRegex from 'emoji-regex';
3 |
4 | const emojiRegex = createEmojiRegex();
5 |
6 | export const normalizeInput = (input: string): string => {
7 | return (
8 | decodeUriComponent(input)
9 | .replace(emojiRegex, ' ')
10 | .replaceAll(/(?<=\s|^)([.\-_a-z])\s?(?=[.\-_a-z](?:\s|$))/gu, '$1')
11 | .replaceAll(/\s+at\s+/gu, '@')
12 | .replaceAll(/\s+dot\s+/gu, '.')
13 | .replaceAll(/\s*\s*/gu, '@')
14 | .replaceAll(/\s*\s*/gu, '.')
15 | .replaceAll(/\s*\(at\)\s*/gu, '@')
16 | .replaceAll(/\s*\(dot\)\s*/gu, '.')
17 | .replaceAll(/\s*\[at\]\s*/gu, '@')
18 | .replaceAll(/\s*\[dot\]\s*/gu, '.')
19 |
20 | // Matches all ASCII characters from the space to tilde.
21 | // eslint-disable-next-line regexp/no-obscure-range
22 | .replaceAll(/[^ -~]/gu, ' ')
23 | .trim()
24 | .toLowerCase()
25 | );
26 | };
27 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "allowSyntheticDefaultImports": true,
4 | "declaration": true,
5 | "declarationMap": true,
6 | "esModuleInterop": true,
7 | "lib": [
8 | "es2021"
9 | ],
10 | "module": "commonjs",
11 | "moduleResolution": "node",
12 | "noImplicitAny": false,
13 | "noImplicitReturns": true,
14 | "outDir": "dist",
15 | "skipLibCheck": true,
16 | "sourceMap": true,
17 | "strict": true,
18 | "target": "es2020",
19 | "useUnknownInCatchVariables": false
20 | },
21 | "include": [
22 | "src"
23 | ]
24 | }
--------------------------------------------------------------------------------