├── .gitignore ├── .npmignore ├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── README.md ├── package.json ├── src ├── my-name-is-url.js ├── parser.js └── regex.js └── test ├── fixtures ├── grabbable.json ├── matches.json └── non-matches.json └── unit.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | dist 3 | .nyc_output 4 | .DS_Store 5 | npm-debug.log 6 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | src 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: node 3 | script: npm run lint && npm test 4 | after_success: npm run coverage 5 | notifications: 6 | email: 7 | on_success: never 8 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | All notable changes to this project will be documented in this file. 4 | This project adheres to [Semantic Versioning](http://semver.org/). 5 | 6 | ## [1.3.2] - 2016-05-23 7 | 8 | - Generate source maps for code coverage 9 | - Remove Code Climate integration 10 | - Migrate tests to AVA 11 | - Update jspm command with registry alias 12 | - Add better API docs 13 | 14 | ## [1.3.1] - 2016-05-02 15 | 16 | - Fix hostname/subdomain matching 17 | - Some tweaks to the readme 18 | 19 | ## [1.3.0] - 2016-05-02 20 | 21 | - Stricter checking of hostnames 22 | - Reuse hostname check for subdomains 23 | - Use more reliable checks for end of sentence 24 | - Match custom schemes 25 | - Improve regex readability 26 | 27 | ## [1.2.0] - 2016-05-01 28 | 29 | - Don't allow dots in hostname 30 | - Match optional subdomain 31 | 32 | ## [1.1.0] - 2016-04-30 33 | 34 | - Added test coverage 35 | - Added Code Climate integration 36 | - Reformat readme 37 | - Hostname must contain at least one char 38 | - Match scheme for any pattern 39 | - Make urls in HTML tags grabbable 40 | - Make urls in double quotes grabbable 41 | - Ignore double quotes in urls 42 | - Add change log 43 | 44 | ## [1.0.0] - 2016-04-22 45 | 46 | - First release 47 | 48 | [1.3.2]: https://github.com/lukechilds/my-name-is-url/compare/v1.3.1...v1.3.2 49 | [1.3.1]: https://github.com/lukechilds/my-name-is-url/compare/v1.3.0...v1.3.1 50 | [1.3.0]: https://github.com/lukechilds/my-name-is-url/compare/v1.2.0...v1.3.0 51 | [1.2.0]: https://github.com/lukechilds/my-name-is-url/compare/v1.1.0...v1.2.0 52 | [1.1.0]: https://github.com/lukechilds/my-name-is-url/compare/v1.0.0...v1.1.0 53 | [1.0.0]: https://github.com/lukechilds/my-name-is-url/compare/v0.0.0...v1.0.0 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Luke Childs 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # my-name-is-url [![Build Status](https://travis-ci.org/lukechilds/my-name-is-url.svg?branch=master)](https://travis-ci.org/lukechilds/my-name-is-url) [![Coverage Status](https://coveralls.io/repos/github/lukechilds/my-name-is-url/badge.svg?branch=master)](https://coveralls.io/github/lukechilds/my-name-is-url?branch=master) 2 | 3 | Intelligently recognises many different url formats in a string. For the browser and node. [Here, have a play](http://lukechilds.github.io/my-name-is-url). 4 | 5 | ## About 6 | 7 | `my-name-is-url` was created because I couldn't find a parser with a high enough success rate. The url spec is so vague that many strings _could_ be a url, therefore matching the spec directly results in a lot of false positives. Most parsers get around this by requiring a url to contain a scheme to be matched as a url. 8 | 9 | The regular expression used in `my-name-is-url` tries to match patterns likely to represent a url in a sentence rather than matching the actual url spec. This results in a much wider scope of matchable urls than most other parsers without introducing loads of false positives. 10 | 11 | > ❗️**Important note** 12 | > 13 | > If you're trying to parse a url into sections (scheme,host) or check a url is valid this module isn't for you. This module is intended to find urls in a string. 14 | 15 | ## Install 16 | 17 | ```shell 18 | npm install --save my-name-is-url 19 | ``` 20 | 21 | or 22 | 23 | ```shell 24 | jspm install my-name-is-url 25 | ``` 26 | 27 | ## Usage 28 | 29 | ```js 30 | import Urls from 'my-name-is-url'; 31 | 32 | const getText = 'Check out these sites: foobar.com,//foo.ninja,http://bar.com.'; 33 | Urls(getText).get(); 34 | // [ 'foobar.com', '//foo.ninja', 'http://bar.com' ] 35 | 36 | const filterText = 'My GitHub profile: https://github.com/lukechilds'; 37 | Urls(filterText).filter(url => `${url}`); 38 | // 'My GitHub profile: https://github.com/lukechilds' 39 | ``` 40 | 41 | ### Importing 42 | 43 | CommonJS 44 | 45 | ```js 46 | var Urls = require('my-name-is-url'); 47 | ``` 48 | 49 | ES6 50 | 51 | ```js 52 | import Urls from 'my-name-is-url'; 53 | ``` 54 | 55 | ### Regex 56 | 57 | If you just wanna do your own thing the regex used internally is helpfully exposed. 58 | 59 | ```js 60 | var urlRegex = require('my-name-is-url').regex; 61 | ``` 62 | 63 | or 64 | 65 | ```js 66 | import { regex as urlRegex } from 'my-name-is-url'; 67 | ``` 68 | 69 | ## API 70 | 71 | ### regex 72 | 73 | The regex used internally for matching urls. 74 | 75 | ### get() 76 | 77 | Returns an array of url matches. If there are no matches an empty array will be returned. 78 | 79 | ```js 80 | const text = 'Check out these sites: foobar.com,//foo.ninja,http://bar.com.'; 81 | 82 | Urls(text).get(); 83 | // [ 'foobar.com', '//foo.ninja', 'http://bar.com' ] 84 | ``` 85 | 86 | ### filter(cb) 87 | 88 | Runs a filter callback on each url in a string. 89 | 90 | #### cb 91 | 92 | *Required* 93 | 94 | Type: `function` 95 | 96 | ```js 97 | const text = 'My GitHub profile: https://github.com/lukechilds'; 98 | 99 | Urls(text).filter(url => `${url}`); 100 | // 'My GitHub profile: https://github.com/lukechilds' 101 | ``` 102 | 103 | > 👍 **Pro tip** 104 | > 105 | > You can get a parser instance by calling `Urls()` or `new Urls`, whichever you prefer. 106 | 107 | ## License 108 | 109 | MIT © Luke Childs 110 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "my-name-is-url", 3 | "version": "1.4.0", 4 | "description": "Intelligent URL parser", 5 | "keywords": [ 6 | "urls", 7 | "url", 8 | "uri", 9 | "get", 10 | "extract", 11 | "find", 12 | "filter", 13 | "scrape", 14 | "text", 15 | "string", 16 | "browser" 17 | ], 18 | "main": "dist/my-name-is-url.js", 19 | "dependencies": { 20 | "tlds": "^1.110.0" 21 | }, 22 | "devDependencies": { 23 | "ava": "^0.25.0", 24 | "babel-cli": "^6.7.5", 25 | "babel-plugin-add-module-exports": "^0.2.1", 26 | "babel-preset-es2015": "^6.6.0", 27 | "codecov": "^2.2.0", 28 | "coveralls": "^3.0.0", 29 | "eslint": "^4.2.0", 30 | "eslint-config-lukechilds": "^1.1.0", 31 | "nyc": "^11.0.2", 32 | "pre-commit": "^1.1.2" 33 | }, 34 | "scripts": { 35 | "prebuild": "rm -rf dist", 36 | "build": "babel -d dist src", 37 | "prebuild:map": "npm run prebuild", 38 | "build:map": "babel --source-maps=true -d dist src", 39 | "pretest": "npm run build:map", 40 | "test": "nyc ava test", 41 | "lint": "eslint src", 42 | "coverage": "nyc report --reporter=text-lcov | coveralls && nyc report --reporter=text-lcov > coverage.lcov && codecov", 43 | "prepublish": "npm run build" 44 | }, 45 | "pre-commit": [ 46 | "lint", 47 | "test" 48 | ], 49 | "babel": { 50 | "presets": [ 51 | "es2015" 52 | ], 53 | "plugins": [ 54 | "add-module-exports" 55 | ] 56 | }, 57 | "eslintConfig": { 58 | "extends": "lukechilds" 59 | }, 60 | "repository": { 61 | "type": "git", 62 | "url": "git+https://github.com/lukechilds/my-name-is-url.git" 63 | }, 64 | "author": "Luke Childs (http://lukechilds.co.uk)", 65 | "license": "MIT", 66 | "bugs": { 67 | "url": "https://github.com/lukechilds/my-name-is-url/issues" 68 | }, 69 | "homepage": "https://github.com/lukechilds/my-name-is-url" 70 | } 71 | -------------------------------------------------------------------------------- /src/my-name-is-url.js: -------------------------------------------------------------------------------- 1 | import Parser from './parser'; 2 | import regex from './regex'; 3 | 4 | // Factory function to return parser instance 5 | const Urls = (text = null) => new Parser(text); 6 | 7 | // Expose regex here for easy access 8 | Urls.regex = regex; 9 | 10 | export default Urls; 11 | -------------------------------------------------------------------------------- /src/parser.js: -------------------------------------------------------------------------------- 1 | import regex from './regex'; 2 | 3 | // Parser class 4 | export default class Parser { 5 | 6 | constructor(text = null) { 7 | this.text = text; 8 | } 9 | 10 | get() { 11 | 12 | // Make sure we have a string 13 | if(typeof this.text !== 'string') { 14 | this.text = ''; 15 | } 16 | 17 | // Always return array 18 | return this.text.match(regex) || []; 19 | } 20 | 21 | filter(cb) { 22 | 23 | // Make sure we have a string 24 | if(typeof this.text !== 'string') { 25 | this.text = ''; 26 | } 27 | 28 | // Check callback is a funciton 29 | if(typeof cb !== 'function') { 30 | throw new Error('Invalid filter callback'); 31 | } 32 | 33 | // Run filter on urls 34 | return this.text.replace(regex, cb); 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/regex.js: -------------------------------------------------------------------------------- 1 | import tlds from 'tlds'; 2 | 3 | // Reusables 4 | const validTlds = tlds.concat(['local', 'dev']).join('|'); 5 | const escapeChar = `\\`; 6 | const notWhitespaceCommaDoubleQuoteOrDot = `[^${escapeChar}s,"]`; 7 | const dot = `${escapeChar}.`; 8 | const hostnameChars = `[a-z0-9]`; 9 | const number = `[0-9]`; 10 | const endingChars = `${dot}?([${escapeChar}s<>",]|$)`; 11 | 12 | // Sections 13 | const optionalScheme = `(([a-z]+:)?//)?`; 14 | const hostname = `(((${hostnameChars}-*)*${hostnameChars}+)${dot})+(${validTlds})`; 15 | const ip = `(${number}{1,3}${dot}){3}${number}{1,3}`; 16 | const optionalPortNumber = `(:${number}+)?`; 17 | const optionalSlash = `(${escapeChar}/(${notWhitespaceCommaDoubleQuoteOrDot}*)?)?`; 18 | const endsWithButDontMatch = `(?=${endingChars})`; 19 | 20 | // Build 21 | const regex = `${optionalScheme}(localhost|${hostname}|${ip})${optionalPortNumber}${optionalSlash}${endsWithButDontMatch}`; 22 | 23 | export default new RegExp(regex, 'gi'); 24 | -------------------------------------------------------------------------------- /test/fixtures/grabbable.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "description": "Comma separated urls", 4 | "text": "Check out these sites: site1.com,site2.com,site3.com", 5 | "matches": ["site1.com", "site2.com", "site3.com"] 6 | }, 7 | { 8 | "description": "Urls at the end of a sentence", 9 | "text": "Check out this site: url.com.", 10 | "matches": ["url.com"] 11 | }, 12 | { 13 | "description": "Urls at the end of a sentence with newline", 14 | "text": "Check out this site: url.com.\nNewline", 15 | "matches": ["url.com"] 16 | }, 17 | { 18 | "description": "Urls at the end of a sentence with space", 19 | "text": "Check out this site: url.com. ", 20 | "matches": ["url.com"] 21 | }, 22 | { 23 | "description": "Urls before line break", 24 | "text": "Check out this site: url.com\n", 25 | "matches": ["url.com"] 26 | }, 27 | { 28 | "description": "Url in HTML", 29 | "text": "url.com", 30 | "matches": ["url.com"] 31 | }, 32 | { 33 | "description": "Url with scheme in HTML", 34 | "text": "http://url.com", 35 | "matches": ["http://url.com"] 36 | }, 37 | { 38 | "description": "IP in HTML", 39 | "text": "192.168.0.11", 40 | "matches": ["192.168.0.11"] 41 | }, 42 | { 43 | "description": "Url in double quotes", 44 | "text": "\"url.com\"", 45 | "matches": ["url.com"] 46 | }, 47 | { 48 | "description": "Url with scheme in double quotes", 49 | "text": "\"http://url.com\"", 50 | "matches": ["http://url.com"] 51 | }, 52 | { 53 | "description": "IP in double quotes", 54 | "text": "\"192.168.0.11\"", 55 | "matches": ["192.168.0.11"] 56 | }, 57 | { 58 | "description": "Url after double quote in hostname", 59 | "text": "double\"quote.com", 60 | "matches": ["quote.com"] 61 | }, 62 | { 63 | "description": "Url before double quote in path", 64 | "text": "url.com/double\"quote", 65 | "matches": ["url.com/double"] 66 | } 67 | ] 68 | -------------------------------------------------------------------------------- /test/fixtures/matches.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "description": "Url starting with http://", 4 | "url": "http://url.com" 5 | }, 6 | { 7 | "description": "Url starting with https://", 8 | "url": "https://url.com" 9 | }, 10 | { 11 | "description": "Url starting with //", 12 | "url": "//url.com" 13 | }, 14 | { 15 | "description": "Url starting with custom scheme", 16 | "url": "custom://url.com" 17 | }, 18 | { 19 | "description": "Url with no scheme", 20 | "url": "url.com" 21 | }, 22 | { 23 | "description": "Url with gTLD", 24 | "url": "url.website" 25 | }, 26 | { 27 | "description": "Url with .local TLD", 28 | "url": "url.local" 29 | }, 30 | { 31 | "description": "Url with .dev TLD", 32 | "url": "url.dev" 33 | }, 34 | { 35 | "description": "Url with trailing slash", 36 | "url": "url.com/" 37 | }, 38 | { 39 | "description": "Url with port number", 40 | "url": "url.com:8080" 41 | }, 42 | { 43 | "description": "Url with port number and trailing slash", 44 | "url": "url.com:8080/" 45 | }, 46 | { 47 | "description": "IP address", 48 | "url": "192.168.0.1" 49 | }, 50 | { 51 | "description": "IP address with trailing slash", 52 | "url": "192.168.0.1/" 53 | }, 54 | { 55 | "description": "IP address starting with http://", 56 | "url": "http://192.168.0.1" 57 | }, 58 | { 59 | "description": "IP address starting with https://", 60 | "url": "https://192.168.0.1" 61 | }, 62 | { 63 | "description": "IP address starting with //", 64 | "url": "//192.168.0.1" 65 | }, 66 | { 67 | "description": "Subdomain", 68 | "url": "subdomain.url.com" 69 | }, 70 | { 71 | "description": "Subdomain with trailing slash", 72 | "url": "subdomain.url.com/" 73 | }, 74 | { 75 | "description": "Subdomain starting with http://", 76 | "url": "http://subdomain.url.com" 77 | }, 78 | { 79 | "description": "Subdomain starting with https://", 80 | "url": "https://subdomain.url.com" 81 | }, 82 | { 83 | "description": "Subdomain starting with //", 84 | "url": "//subdomain.url.com" 85 | }, 86 | { 87 | "description": "Deep subdomain", 88 | "url": "d.e.e.p.subdomain.url.com" 89 | }, 90 | { 91 | "description": "Localhost", 92 | "url": "localhost" 93 | }, 94 | { 95 | "description": "Localhost with trailing slash", 96 | "url": "localhost/" 97 | }, 98 | { 99 | "description": "Localhost starting with http://", 100 | "url": "http://localhost" 101 | }, 102 | { 103 | "description": "Localhost starting with https://", 104 | "url": "https://localhost" 105 | }, 106 | { 107 | "description": "Localhost starting with //", 108 | "url": "//localhost" 109 | }, 110 | { 111 | "description": "Hostname with hyphon", 112 | "url": "hyphon-url.com" 113 | }, 114 | { 115 | "description": "Url with slug", 116 | "url": "url.com/slug" 117 | }, 118 | { 119 | "description": "Url with hash", 120 | "url": "url.com/#hash" 121 | }, 122 | { 123 | "description": "Url with query string", 124 | "url": "url.com/?foo" 125 | }, 126 | { 127 | "description": "Url with query string with value", 128 | "url": "url.com/?foo=bar" 129 | }, 130 | { 131 | "description": "Url with query string with multiple values", 132 | "url": "url.com/?foo=bar&hello=world" 133 | }, 134 | { 135 | "description": "Url with complex query string", 136 | "url": "url.com/?foo[]=bar&foo[]=helloworld" 137 | }, 138 | { 139 | "description": "Url with url encoded string", 140 | "url": "url.com/Test+url+encoding+with+symbols+!%40£%24%25^%26*()_%2B" 141 | }, 142 | { 143 | "description": "Url with extension", 144 | "url": "url.com/foo.bar" 145 | } 146 | ] 147 | -------------------------------------------------------------------------------- /test/fixtures/non-matches.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "description": "Dot seperated string without a valid TLD", 4 | "url": "url.notatld" 5 | }, 6 | { 7 | "description": "Url with extra chars after a valid TLD", 8 | "url": "url.comextrachars" 9 | }, 10 | { 11 | "description": "Url without dot", 12 | "url": "urlcom" 13 | }, 14 | { 15 | "description": "Url with semicolon but no port number", 16 | "url": "url.com:" 17 | }, 18 | { 19 | "description": "TLD with no hostname", 20 | "url": ".com" 21 | }, 22 | { 23 | "description": "TLD with no hostname but two dots", 24 | "url": "..com" 25 | }, 26 | { 27 | "description": "Hyphon at end of hostname", 28 | "url": "foo-.com" 29 | }, 30 | { 31 | "description": "Dot seperated strings that don't end with a TLD", 32 | "url": "url.com.notatld" 33 | }, 34 | { 35 | "description": "Backslash instead of hostname", 36 | "url": "\\.com" 37 | }, 38 | { 39 | "description": "Double backslash instead of hostname", 40 | "url": "\\\\.com" 41 | }, 42 | { 43 | "description": "Hostname ending with space", 44 | "url": "url .com" 45 | } 46 | ] 47 | -------------------------------------------------------------------------------- /test/unit.js: -------------------------------------------------------------------------------- 1 | import test from 'ava'; 2 | 3 | import Urls from '../dist/my-name-is-url'; 4 | import Parser from '../dist/parser'; 5 | import regex from '../dist/regex'; 6 | 7 | import grabbable from './fixtures/grabbable.json'; 8 | import matches from './fixtures/matches.json'; 9 | import nonMatches from './fixtures/non-matches.json'; 10 | 11 | test('Urls() should return instance of parser', t => { 12 | t.true(Urls() instanceof Parser); 13 | }); 14 | 15 | test('Urls() should expose regex as property', t => { 16 | t.is(Urls.regex, regex); 17 | }); 18 | 19 | test('.get() should always return an array', t => { 20 | t.true(Urls().get() instanceof Array); 21 | t.true(Urls('').get() instanceof Array); 22 | t.true(Urls('no url').get() instanceof Array); 23 | t.true(Urls('url.com').get() instanceof Array); 24 | }); 25 | 26 | test('.get() should match a url', t => { 27 | t.deepEqual(Urls('url.com').get(), ['url.com']); 28 | }); 29 | 30 | test('.filter() should throw error if filter callback is invalid', t => { 31 | t.throws(() => Urls().filter()); 32 | }); 33 | 34 | test('.filter() should filter matching urls', t => { 35 | const filteredUrl = Urls('hello url.com world').filter(url => `${url}`); 36 | t.is(filteredUrl, 'hello url.com world'); 37 | }); 38 | 39 | grabbable.forEach(grab => { 40 | test(`Grabbable: ${grab.description}`, t => t.deepEqual(Urls(grab.text).get(), grab.matches)); 41 | }); 42 | 43 | matches.forEach(match => { 44 | test(`Match: ${match.description}`, t => t.deepEqual(Urls(match.url).get(), [match.url])); 45 | }); 46 | 47 | nonMatches.forEach(nonMatch => { 48 | test(`Non Match: ${nonMatch.description}`, t => t.deepEqual(Urls(nonMatch.url).get(), [])); 49 | }); 50 | --------------------------------------------------------------------------------