├── .editorconfig ├── .eslintrc ├── .gitignore ├── .npmignore ├── LICENSE ├── README.md ├── eslint.config.cjs ├── package.json ├── src ├── extractEmail.test.ts ├── extractEmail.ts ├── index.ts ├── normalizeInput.test.ts └── normalizeInput.ts └── tsconfig.json /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | indent_size = 2 7 | indent_style = space 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "canonical" 4 | ], 5 | "root": true 6 | } 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | coverage 2 | dist 3 | node_modules 4 | *.log 5 | .* 6 | !.editorconfig 7 | !.eslintignore 8 | !.eslintrc 9 | !.gitignore 10 | !.npmignore 11 | /package-lock.json 12 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | test 2 | coverage 3 | .* 4 | *.log 5 | !.flowconfig 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2024, Gajus Kuizinas (https://gajus.com/) 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the Gajus Kuizinas (https://gajus.com/) nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL GAJUS KUIZINAS BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # extract-email-address 📧 3 | 4 | [![Coveralls](https://img.shields.io/coveralls/gajus/extract-email-address.svg?style=flat-square)](https://coveralls.io/github/gajus/extract-email-address) 5 | [![NPM version](http://img.shields.io/npm/v/extract-email-address.svg?style=flat-square)](https://www.npmjs.org/package/extract-email-address) 6 | [![Canonical Code Style](https://img.shields.io/badge/code%20style-canonical-blue.svg?style=flat-square)](https://github.com/gajus/canonical) 7 | [![Twitter Follow](https://img.shields.io/twitter/follow/kuizinas.svg?style=social&label=Follow)](https://twitter.com/kuizinas) 8 | 9 | Extracts email-like entities from an arbitrary text input. 10 | 11 | * [extract-email-address 📧](#extract-email-address) 12 | * [API](#extract-email-address-api) 13 | * [Usage](#extract-email-address-usage) 14 | * [Filtering results](#extract-email-address-filtering-results) 15 | * [Related projects](#extract-email-address-related-projects) 16 | 17 | 18 | 19 | ## API 20 | 21 | ```js 22 | import { 23 | extractEmail, 24 | type EmailMatch, 25 | } from 'extract-email-address'; 26 | 27 | extractEmail(input: string): readonly EmailMatch[]; 28 | ``` 29 | 30 | 31 | ## Usage 32 | 33 | ```js 34 | import { extractEmail } from 'extract-email-address'; 35 | 36 | extractEmail('extracts email from anywhere within the input gajus@gajus.com'); 37 | // [{email: 'gajus@gajus.com'}] 38 | 39 | extractEmail('extracts multiple emails located anywhere within the input: foo@gajus.com, bar@gajus.com'); 40 | // [{email: 'foo@gajus.com'}, {email: 'bar@gajus.com'}] 41 | 42 | extractEmail('extracts all sorts of obfuscated emails, e.g. f o o @ b a r . c o m or baz [at] qux [dot] com'); 43 | // [{email: 'foo@bar.com'}, {email: 'baz@qux.com'}] 44 | 45 | extractEmail('extracts tagged emails, e.g. gajus+foo@gajus.com'); 46 | // [{email: 'gajus+foo@gajus.com'}] 47 | 48 | extractEmail('extracts emails surrounded by odd unicode characters, e.g. 邮箱:gajus@gajus.com'); 49 | // [{email: 'gajus@gajus.com'}] 50 | 51 | extractEmail('extracts emails surrounded by emojis, e.g. 📧gajus@gajus.com'); 52 | // [{email: 'gajus@gajus.com'}] 53 | 54 | extractEmail('excludes invalid emails with invalid TLDs, e.g. gajus@gajus.png'); 55 | // [] 56 | 57 | extractEmail('ignores invalid emails foo@bar'); 58 | // [] 59 | 60 | ``` 61 | 62 | 63 | ## Filtering results 64 | 65 | Some matches might be syntactically valid email addresses, but not actual email addresses, e.g. `apple-touch-icon@2.png`. 66 | 67 | `extract-email-address` uses a list of valid top-level domains to filter out matches that are definitely not emails (such as `png` example), but you might still need to filter out domain specific false-positives. 68 | 69 | 70 | ## Related projects 71 | 72 | * [`extract-date`](https://github.com/gajus/extract-date) – Extracts date from an arbitrary text input. 73 | * [`extract-price`](https://github.com/gajus/extract-price) – Extracts price from an arbitrary text input. 74 | * [`extract-time`](https://github.com/gajus/extract-time) – Extracts time from an arbitrary text input. 75 | -------------------------------------------------------------------------------- /eslint.config.cjs: -------------------------------------------------------------------------------- 1 | const auto = require('eslint-config-canonical/configurations/auto'); 2 | 3 | module.exports = [ 4 | { 5 | files: ['**/*.cjs', '**/*.ts', '**/*.tsx'], 6 | }, 7 | { 8 | files: ['**/*.ts'], 9 | rules: { 10 | 'id-length': 0, 11 | }, 12 | }, 13 | ...auto, 14 | { 15 | ignores: ['**/package-lock.json'], 16 | }, 17 | ]; 18 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "author": { 3 | "email": "gajus@gajus.com", 4 | "name": "Gajus Kuizinas", 5 | "url": "http://gajus.com" 6 | }, 7 | "ava": { 8 | "extensions": [ 9 | "ts" 10 | ], 11 | "files": [ 12 | "src/**/*.test.ts" 13 | ], 14 | "require": [ 15 | "ts-node/register/transpile-only" 16 | ] 17 | }, 18 | "dependencies": { 19 | "decode-uri-component": "^0.4.1", 20 | "emoji-regex": "^9.0.0", 21 | "tlds": "^1.253.0" 22 | }, 23 | "description": "Extracts email address from an arbitrary text input.", 24 | "devDependencies": { 25 | "ava": "^6.1.3", 26 | "del-cli": "^5.1.0", 27 | "eslint": "^8.57.0", 28 | "eslint-config-canonical": "^43.0.13", 29 | "semantic-release": "^24.0.0", 30 | "sinon": "^18.0.0", 31 | "ts-node": "^10.9.2" 32 | }, 33 | "engines": { 34 | "node": ">6" 35 | }, 36 | "keywords": [ 37 | "date", 38 | "extract", 39 | "moment", 40 | "parse" 41 | ], 42 | "license": "BSD-3-Clause", 43 | "main": "./dist/index.js", 44 | "name": "extract-email-address", 45 | "repository": { 46 | "type": "git", 47 | "url": "https://github.com/gajus/extract-email-address" 48 | }, 49 | "scripts": { 50 | "build": "del-cli ./dist && tsc", 51 | "lint": "eslint ./src ./test && flow", 52 | "test": "NODE_ENV=test ava --verbose --serial" 53 | }, 54 | "version": "3.2.0" 55 | } 56 | -------------------------------------------------------------------------------- /src/extractEmail.test.ts: -------------------------------------------------------------------------------- 1 | import { extractEmail } from './extractEmail'; 2 | import test from 'ava'; 3 | 4 | const fixtures = [ 5 | 'foo bar baz at gajus@gajus.com', 6 | 'gajus@gajus.com', 7 | 'GAJUS@GAJUS.COM', 8 | ':gajus@gajus.com', 9 | '📧gajus@gajus.com', 10 | 'gajus@gajus.com.', 11 | 'foo gajus@gajus.com bar', 12 | 'foo gajus [at] gajus [dot] com', 13 | 'foo g a j u s [at] g a j u s [dot] c o m', 14 | '', 15 | '【email: gajus@gajus.com】', 16 | 'contact: gajus@gajus.com', 17 | '"mailto:gajus@gajus.com"', 18 | 'https://gajus@gajus.com', 19 | ]; 20 | 21 | for (const fixture of fixtures) { 22 | test('extracts email ("' + fixture + '")', (t) => { 23 | t.deepEqual(extractEmail(fixture), [ 24 | { 25 | email: 'gajus@gajus.com', 26 | }, 27 | ]); 28 | }); 29 | } 30 | 31 | test('extracts multiple email addresses', (t) => { 32 | t.deepEqual(extractEmail('foo@bar.com baz@qux.com'), [ 33 | { 34 | email: 'foo@bar.com', 35 | }, 36 | { 37 | email: 'baz@qux.com', 38 | }, 39 | ]); 40 | }); 41 | 42 | test('extracts email (gajus+test@gajus.com)', (t) => { 43 | t.deepEqual(extractEmail('gajus+test@gajus.com'), [ 44 | { 45 | email: 'gajus+test@gajus.com', 46 | }, 47 | ]); 48 | }); 49 | 50 | test('extracts email (gajus.gajus@gajus.com)', (t) => { 51 | t.deepEqual(extractEmail('gajus.gajus@gajus.com'), [ 52 | { 53 | email: 'gajus.gajus@gajus.com', 54 | }, 55 | ]); 56 | }); 57 | 58 | test('extracts email (gajus.gajus+test@gajus.com)', (t) => { 59 | t.deepEqual(extractEmail('gajus.gajus+test@gajus.com'), [ 60 | { 61 | email: 'gajus.gajus+test@gajus.com', 62 | }, 63 | ]); 64 | }); 65 | 66 | test('excludes emails with invalid TLD (gajus@gajus.png)', (t) => { 67 | t.deepEqual(extractEmail('gajus@gajus.png'), []); 68 | }); 69 | 70 | test('removes duplicates email', (t) => { 71 | t.deepEqual(extractEmail('gajus@gajus.com gajus@gajus.com'), [ 72 | { 73 | email: 'gajus@gajus.com', 74 | }, 75 | ]); 76 | }); 77 | 78 | test('extracts email mailto', (t) => { 79 | t.deepEqual(extractEmail('mailto%3Ajohn%2Bsmith%40gajus.com'), [ 80 | { 81 | email: 'john+smith@gajus.com', 82 | }, 83 | ]); 84 | }); 85 | -------------------------------------------------------------------------------- /src/extractEmail.ts: -------------------------------------------------------------------------------- 1 | import { normalizeInput } from './normalizeInput'; 2 | import tlds from 'tlds'; 3 | 4 | export type EmailMatch = { 5 | email: string; 6 | }; 7 | 8 | export const extractEmail = (input: string): readonly EmailMatch[] => { 9 | const matches = normalizeInput(input).match( 10 | // eslint-disable-next-line unicorn/better-regex, require-unicode-regexp, regexp/no-unused-capturing-group 11 | /\b[\w.%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g, 12 | ); 13 | 14 | if (!matches) { 15 | return []; 16 | } 17 | 18 | return matches 19 | .map((email) => { 20 | return email; 21 | }) 22 | .filter((email) => { 23 | for (const tld of tlds) { 24 | if (email.endsWith('.' + tld)) { 25 | return true; 26 | } 27 | } 28 | 29 | return false; 30 | }) 31 | .filter((email, index, self) => { 32 | return self.indexOf(email) === index; 33 | }) 34 | .map((email) => { 35 | return { 36 | email, 37 | }; 38 | }); 39 | }; 40 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export { type EmailMatch, extractEmail } from './extractEmail'; 2 | -------------------------------------------------------------------------------- /src/normalizeInput.test.ts: -------------------------------------------------------------------------------- 1 | import { normalizeInput } from './normalizeInput'; 2 | import test from 'ava'; 3 | 4 | test('normalizes different email formats', (t) => { 5 | t.is(normalizeInput('GAJUS@GAJUS.COM'), 'gajus@gajus.com'); 6 | t.is(normalizeInput(':gajus@gajus.com'), 'gajus@gajus.com'); 7 | t.is(normalizeInput('📧gajus@gajus.com'), 'gajus@gajus.com'); 8 | t.is( 9 | normalizeInput('g a j u s [at] g a j u s [dot] c o m'), 10 | 'gajus@gajus.com', 11 | ); 12 | t.is( 13 | normalizeInput('foo g a j u s [at] g a j u s [dot] c o m bar'), 14 | 'foo gajus@gajus.com bar', 15 | ); 16 | t.is(normalizeInput('gajus[at]gajus[dot]co[dot]uk'), 'gajus@gajus.co.uk'); 17 | t.is(normalizeInput('gajus[at]gajus[dot]com'), 'gajus@gajus.com'); 18 | t.is(normalizeInput('gajus(at)gajus(dot)com'), 'gajus@gajus.com'); 19 | t.is(normalizeInput('gajus [at] gajus [dot] com'), 'gajus@gajus.com'); 20 | t.is(normalizeInput('gajus (at) gajus (dot) com'), 'gajus@gajus.com'); 21 | t.is(normalizeInput('gajus gajus com'), 'gajus@gajus.com'); 22 | t.is(normalizeInput('gajus at gajus dot com'), 'gajus@gajus.com'); 23 | t.is(normalizeInput('john%2Bsmith%40gajus.com'), 'john+smith@gajus.com'); 24 | }); 25 | -------------------------------------------------------------------------------- /src/normalizeInput.ts: -------------------------------------------------------------------------------- 1 | import decodeUriComponent from 'decode-uri-component'; 2 | import createEmojiRegex from 'emoji-regex'; 3 | 4 | const emojiRegex = createEmojiRegex(); 5 | 6 | export const normalizeInput = (input: string): string => { 7 | return ( 8 | decodeUriComponent(input) 9 | .replace(emojiRegex, ' ') 10 | .replaceAll(/(?<=\s|^)([.\-_a-z])\s?(?=[.\-_a-z](?:\s|$))/gu, '$1') 11 | .replaceAll(/\s+at\s+/gu, '@') 12 | .replaceAll(/\s+dot\s+/gu, '.') 13 | .replaceAll(/\s*\s*/gu, '@') 14 | .replaceAll(/\s*\s*/gu, '.') 15 | .replaceAll(/\s*\(at\)\s*/gu, '@') 16 | .replaceAll(/\s*\(dot\)\s*/gu, '.') 17 | .replaceAll(/\s*\[at\]\s*/gu, '@') 18 | .replaceAll(/\s*\[dot\]\s*/gu, '.') 19 | 20 | // Matches all ASCII characters from the space to tilde. 21 | // eslint-disable-next-line regexp/no-obscure-range 22 | .replaceAll(/[^ -~]/gu, ' ') 23 | .trim() 24 | .toLowerCase() 25 | ); 26 | }; 27 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "allowSyntheticDefaultImports": true, 4 | "declaration": true, 5 | "declarationMap": true, 6 | "esModuleInterop": true, 7 | "lib": [ 8 | "es2021" 9 | ], 10 | "module": "commonjs", 11 | "moduleResolution": "node", 12 | "noImplicitAny": false, 13 | "noImplicitReturns": true, 14 | "outDir": "dist", 15 | "skipLibCheck": true, 16 | "sourceMap": true, 17 | "strict": true, 18 | "target": "es2020", 19 | "useUnknownInCatchVariables": false 20 | }, 21 | "include": [ 22 | "src" 23 | ] 24 | } --------------------------------------------------------------------------------