├── .gitignore
├── .npmignore
├── .travis.yml
├── CHANGELOG.md
├── LICENSE
├── README.md
├── package.json
├── src
├── my-name-is-url.js
├── parser.js
└── regex.js
└── test
├── fixtures
├── grabbable.json
├── matches.json
└── non-matches.json
└── unit.js
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | dist
3 | .nyc_output
4 | .DS_Store
5 | npm-debug.log
6 |
--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | src
2 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: node_js
2 | node_js: node
3 | script: npm run lint && npm test
4 | after_success: npm run coverage
5 | notifications:
6 | email:
7 | on_success: never
8 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Change Log
2 |
3 | All notable changes to this project will be documented in this file.
4 | This project adheres to [Semantic Versioning](http://semver.org/).
5 |
6 | ## [1.3.2] - 2016-05-23
7 |
8 | - Generate source maps for code coverage
9 | - Remove Code Climate integration
10 | - Migrate tests to AVA
11 | - Update jspm command with registry alias
12 | - Add better API docs
13 |
14 | ## [1.3.1] - 2016-05-02
15 |
16 | - Fix hostname/subdomain matching
17 | - Some tweaks to the readme
18 |
19 | ## [1.3.0] - 2016-05-02
20 |
21 | - Stricter checking of hostnames
22 | - Reuse hostname check for subdomains
23 | - Use more reliable checks for end of sentence
24 | - Match custom schemes
25 | - Improve regex readability
26 |
27 | ## [1.2.0] - 2016-05-01
28 |
29 | - Don't allow dots in hostname
30 | - Match optional subdomain
31 |
32 | ## [1.1.0] - 2016-04-30
33 |
34 | - Added test coverage
35 | - Added Code Climate integration
36 | - Reformat readme
37 | - Hostname must contain at least one char
38 | - Match scheme for any pattern
39 | - Make urls in HTML tags grabbable
40 | - Make urls in double quotes grabbable
41 | - Ignore double quotes in urls
42 | - Add change log
43 |
44 | ## [1.0.0] - 2016-04-22
45 |
46 | - First release
47 |
48 | [1.3.2]: https://github.com/lukechilds/my-name-is-url/compare/v1.3.1...v1.3.2
49 | [1.3.1]: https://github.com/lukechilds/my-name-is-url/compare/v1.3.0...v1.3.1
50 | [1.3.0]: https://github.com/lukechilds/my-name-is-url/compare/v1.2.0...v1.3.0
51 | [1.2.0]: https://github.com/lukechilds/my-name-is-url/compare/v1.1.0...v1.2.0
52 | [1.1.0]: https://github.com/lukechilds/my-name-is-url/compare/v1.0.0...v1.1.0
53 | [1.0.0]: https://github.com/lukechilds/my-name-is-url/compare/v0.0.0...v1.0.0
54 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016 Luke Childs
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # my-name-is-url [](https://travis-ci.org/lukechilds/my-name-is-url) [](https://coveralls.io/github/lukechilds/my-name-is-url?branch=master)
2 |
3 | Intelligently recognises many different url formats in a string. For the browser and node. [Here, have a play](http://lukechilds.github.io/my-name-is-url).
4 |
5 | ## About
6 |
7 | `my-name-is-url` was created because I couldn't find a parser with a high enough success rate. The url spec is so vague that many strings _could_ be a url, therefore matching the spec directly results in a lot of false positives. Most parsers get around this by requiring a url to contain a scheme to be matched as a url.
8 |
9 | The regular expression used in `my-name-is-url` tries to match patterns likely to represent a url in a sentence rather than matching the actual url spec. This results in a much wider scope of matchable urls than most other parsers without introducing loads of false positives.
10 |
11 | > ❗️**Important note**
12 | >
13 | > If you're trying to parse a url into sections (scheme,host) or check a url is valid this module isn't for you. This module is intended to find urls in a string.
14 |
15 | ## Install
16 |
17 | ```shell
18 | npm install --save my-name-is-url
19 | ```
20 |
21 | or
22 |
23 | ```shell
24 | jspm install my-name-is-url
25 | ```
26 |
27 | ## Usage
28 |
29 | ```js
30 | import Urls from 'my-name-is-url';
31 |
32 | const getText = 'Check out these sites: foobar.com,//foo.ninja,http://bar.com.';
33 | Urls(getText).get();
34 | // [ 'foobar.com', '//foo.ninja', 'http://bar.com' ]
35 |
36 | const filterText = 'My GitHub profile: https://github.com/lukechilds';
37 | Urls(filterText).filter(url => `${url}`);
38 | // 'My GitHub profile: https://github.com/lukechilds'
39 | ```
40 |
41 | ### Importing
42 |
43 | CommonJS
44 |
45 | ```js
46 | var Urls = require('my-name-is-url');
47 | ```
48 |
49 | ES6
50 |
51 | ```js
52 | import Urls from 'my-name-is-url';
53 | ```
54 |
55 | ### Regex
56 |
57 | If you just wanna do your own thing the regex used internally is helpfully exposed.
58 |
59 | ```js
60 | var urlRegex = require('my-name-is-url').regex;
61 | ```
62 |
63 | or
64 |
65 | ```js
66 | import { regex as urlRegex } from 'my-name-is-url';
67 | ```
68 |
69 | ## API
70 |
71 | ### regex
72 |
73 | The regex used internally for matching urls.
74 |
75 | ### get()
76 |
77 | Returns an array of url matches. If there are no matches an empty array will be returned.
78 |
79 | ```js
80 | const text = 'Check out these sites: foobar.com,//foo.ninja,http://bar.com.';
81 |
82 | Urls(text).get();
83 | // [ 'foobar.com', '//foo.ninja', 'http://bar.com' ]
84 | ```
85 |
86 | ### filter(cb)
87 |
88 | Runs a filter callback on each url in a string.
89 |
90 | #### cb
91 |
92 | *Required*
93 |
94 | Type: `function`
95 |
96 | ```js
97 | const text = 'My GitHub profile: https://github.com/lukechilds';
98 |
99 | Urls(text).filter(url => `${url}`);
100 | // 'My GitHub profile: https://github.com/lukechilds'
101 | ```
102 |
103 | > 👍 **Pro tip**
104 | >
105 | > You can get a parser instance by calling `Urls()` or `new Urls`, whichever you prefer.
106 |
107 | ## License
108 |
109 | MIT © Luke Childs
110 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "my-name-is-url",
3 | "version": "1.4.0",
4 | "description": "Intelligent URL parser",
5 | "keywords": [
6 | "urls",
7 | "url",
8 | "uri",
9 | "get",
10 | "extract",
11 | "find",
12 | "filter",
13 | "scrape",
14 | "text",
15 | "string",
16 | "browser"
17 | ],
18 | "main": "dist/my-name-is-url.js",
19 | "dependencies": {
20 | "tlds": "^1.110.0"
21 | },
22 | "devDependencies": {
23 | "ava": "^0.25.0",
24 | "babel-cli": "^6.7.5",
25 | "babel-plugin-add-module-exports": "^0.2.1",
26 | "babel-preset-es2015": "^6.6.0",
27 | "codecov": "^2.2.0",
28 | "coveralls": "^3.0.0",
29 | "eslint": "^4.2.0",
30 | "eslint-config-lukechilds": "^1.1.0",
31 | "nyc": "^11.0.2",
32 | "pre-commit": "^1.1.2"
33 | },
34 | "scripts": {
35 | "prebuild": "rm -rf dist",
36 | "build": "babel -d dist src",
37 | "prebuild:map": "npm run prebuild",
38 | "build:map": "babel --source-maps=true -d dist src",
39 | "pretest": "npm run build:map",
40 | "test": "nyc ava test",
41 | "lint": "eslint src",
42 | "coverage": "nyc report --reporter=text-lcov | coveralls && nyc report --reporter=text-lcov > coverage.lcov && codecov",
43 | "prepublish": "npm run build"
44 | },
45 | "pre-commit": [
46 | "lint",
47 | "test"
48 | ],
49 | "babel": {
50 | "presets": [
51 | "es2015"
52 | ],
53 | "plugins": [
54 | "add-module-exports"
55 | ]
56 | },
57 | "eslintConfig": {
58 | "extends": "lukechilds"
59 | },
60 | "repository": {
61 | "type": "git",
62 | "url": "git+https://github.com/lukechilds/my-name-is-url.git"
63 | },
64 | "author": "Luke Childs (http://lukechilds.co.uk)",
65 | "license": "MIT",
66 | "bugs": {
67 | "url": "https://github.com/lukechilds/my-name-is-url/issues"
68 | },
69 | "homepage": "https://github.com/lukechilds/my-name-is-url"
70 | }
71 |
--------------------------------------------------------------------------------
/src/my-name-is-url.js:
--------------------------------------------------------------------------------
1 | import Parser from './parser';
2 | import regex from './regex';
3 |
4 | // Factory function to return parser instance
5 | const Urls = (text = null) => new Parser(text);
6 |
7 | // Expose regex here for easy access
8 | Urls.regex = regex;
9 |
10 | export default Urls;
11 |
--------------------------------------------------------------------------------
/src/parser.js:
--------------------------------------------------------------------------------
1 | import regex from './regex';
2 |
3 | // Parser class
4 | export default class Parser {
5 |
6 | constructor(text = null) {
7 | this.text = text;
8 | }
9 |
10 | get() {
11 |
12 | // Make sure we have a string
13 | if(typeof this.text !== 'string') {
14 | this.text = '';
15 | }
16 |
17 | // Always return array
18 | return this.text.match(regex) || [];
19 | }
20 |
21 | filter(cb) {
22 |
23 | // Make sure we have a string
24 | if(typeof this.text !== 'string') {
25 | this.text = '';
26 | }
27 |
28 | // Check callback is a funciton
29 | if(typeof cb !== 'function') {
30 | throw new Error('Invalid filter callback');
31 | }
32 |
33 | // Run filter on urls
34 | return this.text.replace(regex, cb);
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/src/regex.js:
--------------------------------------------------------------------------------
1 | import tlds from 'tlds';
2 |
3 | // Reusables
4 | const validTlds = tlds.concat(['local', 'dev']).join('|');
5 | const escapeChar = `\\`;
6 | const notWhitespaceCommaDoubleQuoteOrDot = `[^${escapeChar}s,"]`;
7 | const dot = `${escapeChar}.`;
8 | const hostnameChars = `[a-z0-9]`;
9 | const number = `[0-9]`;
10 | const endingChars = `${dot}?([${escapeChar}s<>",]|$)`;
11 |
12 | // Sections
13 | const optionalScheme = `(([a-z]+:)?//)?`;
14 | const hostname = `(((${hostnameChars}-*)*${hostnameChars}+)${dot})+(${validTlds})`;
15 | const ip = `(${number}{1,3}${dot}){3}${number}{1,3}`;
16 | const optionalPortNumber = `(:${number}+)?`;
17 | const optionalSlash = `(${escapeChar}/(${notWhitespaceCommaDoubleQuoteOrDot}*)?)?`;
18 | const endsWithButDontMatch = `(?=${endingChars})`;
19 |
20 | // Build
21 | const regex = `${optionalScheme}(localhost|${hostname}|${ip})${optionalPortNumber}${optionalSlash}${endsWithButDontMatch}`;
22 |
23 | export default new RegExp(regex, 'gi');
24 |
--------------------------------------------------------------------------------
/test/fixtures/grabbable.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "description": "Comma separated urls",
4 | "text": "Check out these sites: site1.com,site2.com,site3.com",
5 | "matches": ["site1.com", "site2.com", "site3.com"]
6 | },
7 | {
8 | "description": "Urls at the end of a sentence",
9 | "text": "Check out this site: url.com.",
10 | "matches": ["url.com"]
11 | },
12 | {
13 | "description": "Urls at the end of a sentence with newline",
14 | "text": "Check out this site: url.com.\nNewline",
15 | "matches": ["url.com"]
16 | },
17 | {
18 | "description": "Urls at the end of a sentence with space",
19 | "text": "Check out this site: url.com. ",
20 | "matches": ["url.com"]
21 | },
22 | {
23 | "description": "Urls before line break",
24 | "text": "Check out this site: url.com\n",
25 | "matches": ["url.com"]
26 | },
27 | {
28 | "description": "Url in HTML",
29 | "text": "url.com",
30 | "matches": ["url.com"]
31 | },
32 | {
33 | "description": "Url with scheme in HTML",
34 | "text": "http://url.com",
35 | "matches": ["http://url.com"]
36 | },
37 | {
38 | "description": "IP in HTML",
39 | "text": "192.168.0.11",
40 | "matches": ["192.168.0.11"]
41 | },
42 | {
43 | "description": "Url in double quotes",
44 | "text": "\"url.com\"",
45 | "matches": ["url.com"]
46 | },
47 | {
48 | "description": "Url with scheme in double quotes",
49 | "text": "\"http://url.com\"",
50 | "matches": ["http://url.com"]
51 | },
52 | {
53 | "description": "IP in double quotes",
54 | "text": "\"192.168.0.11\"",
55 | "matches": ["192.168.0.11"]
56 | },
57 | {
58 | "description": "Url after double quote in hostname",
59 | "text": "double\"quote.com",
60 | "matches": ["quote.com"]
61 | },
62 | {
63 | "description": "Url before double quote in path",
64 | "text": "url.com/double\"quote",
65 | "matches": ["url.com/double"]
66 | }
67 | ]
68 |
--------------------------------------------------------------------------------
/test/fixtures/matches.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "description": "Url starting with http://",
4 | "url": "http://url.com"
5 | },
6 | {
7 | "description": "Url starting with https://",
8 | "url": "https://url.com"
9 | },
10 | {
11 | "description": "Url starting with //",
12 | "url": "//url.com"
13 | },
14 | {
15 | "description": "Url starting with custom scheme",
16 | "url": "custom://url.com"
17 | },
18 | {
19 | "description": "Url with no scheme",
20 | "url": "url.com"
21 | },
22 | {
23 | "description": "Url with gTLD",
24 | "url": "url.website"
25 | },
26 | {
27 | "description": "Url with .local TLD",
28 | "url": "url.local"
29 | },
30 | {
31 | "description": "Url with .dev TLD",
32 | "url": "url.dev"
33 | },
34 | {
35 | "description": "Url with trailing slash",
36 | "url": "url.com/"
37 | },
38 | {
39 | "description": "Url with port number",
40 | "url": "url.com:8080"
41 | },
42 | {
43 | "description": "Url with port number and trailing slash",
44 | "url": "url.com:8080/"
45 | },
46 | {
47 | "description": "IP address",
48 | "url": "192.168.0.1"
49 | },
50 | {
51 | "description": "IP address with trailing slash",
52 | "url": "192.168.0.1/"
53 | },
54 | {
55 | "description": "IP address starting with http://",
56 | "url": "http://192.168.0.1"
57 | },
58 | {
59 | "description": "IP address starting with https://",
60 | "url": "https://192.168.0.1"
61 | },
62 | {
63 | "description": "IP address starting with //",
64 | "url": "//192.168.0.1"
65 | },
66 | {
67 | "description": "Subdomain",
68 | "url": "subdomain.url.com"
69 | },
70 | {
71 | "description": "Subdomain with trailing slash",
72 | "url": "subdomain.url.com/"
73 | },
74 | {
75 | "description": "Subdomain starting with http://",
76 | "url": "http://subdomain.url.com"
77 | },
78 | {
79 | "description": "Subdomain starting with https://",
80 | "url": "https://subdomain.url.com"
81 | },
82 | {
83 | "description": "Subdomain starting with //",
84 | "url": "//subdomain.url.com"
85 | },
86 | {
87 | "description": "Deep subdomain",
88 | "url": "d.e.e.p.subdomain.url.com"
89 | },
90 | {
91 | "description": "Localhost",
92 | "url": "localhost"
93 | },
94 | {
95 | "description": "Localhost with trailing slash",
96 | "url": "localhost/"
97 | },
98 | {
99 | "description": "Localhost starting with http://",
100 | "url": "http://localhost"
101 | },
102 | {
103 | "description": "Localhost starting with https://",
104 | "url": "https://localhost"
105 | },
106 | {
107 | "description": "Localhost starting with //",
108 | "url": "//localhost"
109 | },
110 | {
111 | "description": "Hostname with hyphon",
112 | "url": "hyphon-url.com"
113 | },
114 | {
115 | "description": "Url with slug",
116 | "url": "url.com/slug"
117 | },
118 | {
119 | "description": "Url with hash",
120 | "url": "url.com/#hash"
121 | },
122 | {
123 | "description": "Url with query string",
124 | "url": "url.com/?foo"
125 | },
126 | {
127 | "description": "Url with query string with value",
128 | "url": "url.com/?foo=bar"
129 | },
130 | {
131 | "description": "Url with query string with multiple values",
132 | "url": "url.com/?foo=bar&hello=world"
133 | },
134 | {
135 | "description": "Url with complex query string",
136 | "url": "url.com/?foo[]=bar&foo[]=helloworld"
137 | },
138 | {
139 | "description": "Url with url encoded string",
140 | "url": "url.com/Test+url+encoding+with+symbols+!%40£%24%25^%26*()_%2B"
141 | },
142 | {
143 | "description": "Url with extension",
144 | "url": "url.com/foo.bar"
145 | }
146 | ]
147 |
--------------------------------------------------------------------------------
/test/fixtures/non-matches.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "description": "Dot seperated string without a valid TLD",
4 | "url": "url.notatld"
5 | },
6 | {
7 | "description": "Url with extra chars after a valid TLD",
8 | "url": "url.comextrachars"
9 | },
10 | {
11 | "description": "Url without dot",
12 | "url": "urlcom"
13 | },
14 | {
15 | "description": "Url with semicolon but no port number",
16 | "url": "url.com:"
17 | },
18 | {
19 | "description": "TLD with no hostname",
20 | "url": ".com"
21 | },
22 | {
23 | "description": "TLD with no hostname but two dots",
24 | "url": "..com"
25 | },
26 | {
27 | "description": "Hyphon at end of hostname",
28 | "url": "foo-.com"
29 | },
30 | {
31 | "description": "Dot seperated strings that don't end with a TLD",
32 | "url": "url.com.notatld"
33 | },
34 | {
35 | "description": "Backslash instead of hostname",
36 | "url": "\\.com"
37 | },
38 | {
39 | "description": "Double backslash instead of hostname",
40 | "url": "\\\\.com"
41 | },
42 | {
43 | "description": "Hostname ending with space",
44 | "url": "url .com"
45 | }
46 | ]
47 |
--------------------------------------------------------------------------------
/test/unit.js:
--------------------------------------------------------------------------------
1 | import test from 'ava';
2 |
3 | import Urls from '../dist/my-name-is-url';
4 | import Parser from '../dist/parser';
5 | import regex from '../dist/regex';
6 |
7 | import grabbable from './fixtures/grabbable.json';
8 | import matches from './fixtures/matches.json';
9 | import nonMatches from './fixtures/non-matches.json';
10 |
11 | test('Urls() should return instance of parser', t => {
12 | t.true(Urls() instanceof Parser);
13 | });
14 |
15 | test('Urls() should expose regex as property', t => {
16 | t.is(Urls.regex, regex);
17 | });
18 |
19 | test('.get() should always return an array', t => {
20 | t.true(Urls().get() instanceof Array);
21 | t.true(Urls('').get() instanceof Array);
22 | t.true(Urls('no url').get() instanceof Array);
23 | t.true(Urls('url.com').get() instanceof Array);
24 | });
25 |
26 | test('.get() should match a url', t => {
27 | t.deepEqual(Urls('url.com').get(), ['url.com']);
28 | });
29 |
30 | test('.filter() should throw error if filter callback is invalid', t => {
31 | t.throws(() => Urls().filter());
32 | });
33 |
34 | test('.filter() should filter matching urls', t => {
35 | const filteredUrl = Urls('hello url.com world').filter(url => `${url}`);
36 | t.is(filteredUrl, 'hello url.com world');
37 | });
38 |
39 | grabbable.forEach(grab => {
40 | test(`Grabbable: ${grab.description}`, t => t.deepEqual(Urls(grab.text).get(), grab.matches));
41 | });
42 |
43 | matches.forEach(match => {
44 | test(`Match: ${match.description}`, t => t.deepEqual(Urls(match.url).get(), [match.url]));
45 | });
46 |
47 | nonMatches.forEach(nonMatch => {
48 | test(`Non Match: ${nonMatch.description}`, t => t.deepEqual(Urls(nonMatch.url).get(), []));
49 | });
50 |
--------------------------------------------------------------------------------