├── .editorconfig ├── .gitattributes ├── .gitignore ├── .npmrc ├── .travis.yml ├── index.d.ts ├── index.js ├── index.test-d.ts ├── license ├── package.json ├── readme.md └── test.js /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = tab 5 | end_of_line = lf 6 | charset = utf-8 7 | trim_trailing_whitespace = true 8 | insert_final_newline = true 9 | 10 | [*.yml] 11 | indent_style = space 12 | indent_size = 2 13 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | yarn.lock 3 | -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | package-lock=false 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - '10' 4 | - '8' 5 | -------------------------------------------------------------------------------- /index.d.ts: -------------------------------------------------------------------------------- 1 | declare namespace urlRegex { 2 | interface Options { 3 | /** 4 | Only match an exact string. Useful with `RegExp#test` to check if a string is a URL. 5 | 6 | @default false 7 | */ 8 | readonly exact?: boolean; 9 | 10 | /** 11 | Force URLs to start with a valid protocol or `www`. If set to `false` it'll match the TLD against a list of valid [TLDs](https://github.com/stephenmathieson/node-tlds). 12 | 13 | @default true 14 | */ 15 | readonly strict?: boolean; 16 | } 17 | } 18 | 19 | /** 20 | Regular expression for matching URLs. 21 | 22 | @example 23 | ``` 24 | import urlRegex = require('url-regex'); 25 | 26 | urlRegex().test('http://github.com foo bar'); 27 | //=> true 28 | 29 | urlRegex().test('www.github.com foo bar'); 30 | //=> true 31 | 32 | urlRegex({exact: true}).test('http://github.com foo bar'); 33 | //=> false 34 | 35 | urlRegex({exact: true}).test('http://github.com'); 36 | //=> true 37 | 38 | urlRegex({strict: false}).test('github.com foo bar'); 39 | //=> true 40 | 41 | urlRegex({exact: true, strict: false}).test('github.com'); 42 | //=> true 43 | 44 | 'foo http://github.com bar //google.com'.match(urlRegex()); 45 | //=> ['http://github.com', '//google.com'] 46 | ``` 47 | */ 48 | declare function urlRegex(options?: urlRegex.Options): RegExp; 49 | 50 | export = urlRegex; 51 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | const ipRegex = require('ip-regex'); 3 | const tlds = require('tlds'); 4 | 5 | module.exports = options => { 6 | options = { 7 | strict: true, 8 | ...options 9 | }; 10 | 11 | const protocol = `(?:(?:[a-z]+:)?//)${options.strict ? '' : '?'}`; 12 | const auth = '(?:\\S+(?::\\S*)?@)?'; 13 | const ip = ipRegex.v4().source; 14 | const host = '(?:(?:[a-z\\u00a1-\\uffff0-9][-_]*)*[a-z\\u00a1-\\uffff0-9]+)'; 15 | const domain = '(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*'; 16 | const tld = `(?:\\.${options.strict ? '(?:[a-z\\u00a1-\\uffff]{2,})' : `(?:${tlds.sort((a, b) => b.length - a.length).join('|')})`})\\.?`; 17 | const port = '(?::\\d{2,5})?'; 18 | const path = '(?:[/?#][^\\s"]*)?'; 19 | const regex = `(?:${protocol}|www\\.)${auth}(?:localhost|${ip}|${host}${domain}${tld})${port}${path}`; 20 | 21 | return options.exact ? new RegExp(`(?:^${regex}$)`, 'i') : new RegExp(regex, 'ig'); 22 | }; 23 | -------------------------------------------------------------------------------- /index.test-d.ts: -------------------------------------------------------------------------------- 1 | import {expectType} from 'tsd'; 2 | import urlRegex = require('.'); 3 | 4 | expectType(urlRegex()); 5 | expectType(urlRegex({exact: true})); 6 | expectType(urlRegex({strict: false})); 7 | -------------------------------------------------------------------------------- /license: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) Kevin Mårtensson and Diego Perini 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "url-regex", 3 | "version": "5.0.0", 4 | "description": "Regular expression for matching URLs", 5 | "license": "MIT", 6 | "repository": "kevva/url-regex", 7 | "author": { 8 | "name": "Kevin Mårtensson", 9 | "email": "kevinmartensson@gmail.com", 10 | "url": "https://github.com/kevva" 11 | }, 12 | "engines": { 13 | "node": ">=8" 14 | }, 15 | "scripts": { 16 | "test": "xo && ava && tsd" 17 | }, 18 | "files": [ 19 | "index.js", 20 | "index.d.ts" 21 | ], 22 | "keywords": [ 23 | "regex", 24 | "string", 25 | "url" 26 | ], 27 | "dependencies": { 28 | "ip-regex": "^4.1.0", 29 | "tlds": "^1.203.0" 30 | }, 31 | "devDependencies": { 32 | "ava": "^1.4.1", 33 | "tsd": "^0.7.2", 34 | "xo": "^0.24.0" 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # url-regex [![Build Status](http://img.shields.io/travis/kevva/url-regex.svg?style=flat)](https://travis-ci.org/kevva/url-regex) 2 | 3 | > Regular expression for matching URLs 4 | 5 | Based on this [gist](https://gist.github.com/dperini/729294) by Diego Perini. 6 | 7 | 8 | ## Install 9 | 10 | ``` 11 | $ npm install url-regex 12 | ``` 13 | 14 | 15 | ## Usage 16 | 17 | ```js 18 | const urlRegex = require('url-regex'); 19 | 20 | urlRegex().test('http://github.com foo bar'); 21 | //=> true 22 | 23 | urlRegex().test('www.github.com foo bar'); 24 | //=> true 25 | 26 | urlRegex({exact: true}).test('http://github.com foo bar'); 27 | //=> false 28 | 29 | urlRegex({exact: true}).test('http://github.com'); 30 | //=> true 31 | 32 | urlRegex({strict: false}).test('github.com foo bar'); 33 | //=> true 34 | 35 | urlRegex({exact: true, strict: false}).test('github.com'); 36 | //=> true 37 | 38 | 'foo http://github.com bar //google.com'.match(urlRegex()); 39 | //=> ['http://github.com', '//google.com'] 40 | ``` 41 | 42 | 43 | ## API 44 | 45 | ### urlRegex([options]) 46 | 47 | Returns a `RegExp` for matching URLs. 48 | 49 | #### options 50 | 51 | ##### exact 52 | 53 | Type: `boolean`
54 | Default: `false` 55 | 56 | Only match an exact string. Useful with `RegExp#test` to check if a string is a URL. 57 | 58 | ##### strict 59 | 60 | Type: `boolean`
61 | Default: `true` 62 | 63 | Force URLs to start with a valid protocol or `www`. If set to `false` it'll match the TLD against a list of valid [TLDs](https://github.com/stephenmathieson/node-tlds). 64 | 65 | 66 | ## Related 67 | 68 | - [get-urls](https://github.com/sindresorhus/get-urls) - Get all URLs in text 69 | - [linkify-urls](https://github.com/sindresorhus/linkify-urls) - Linkify URLs in text 70 | 71 | 72 | ## License 73 | 74 | MIT © [Kevin Mårtensson](https://github.com/kevva) and [Diego Perini](https://github.com/dperini) 75 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | import test from 'ava'; 2 | import urlRegex from '.'; 3 | 4 | test('match exact URLs', t => { 5 | const fixtures = [ 6 | 'http://foo.com/blah_blah', 7 | 'http://foo.com/blah_blah/', 8 | 'http://foo.com/blah_blah_(wikipedia)', 9 | 'http://foo.com/blah_blah_(wikipedia)_(again)', 10 | 'http://www.example.com/wpstyle/?p=364', 11 | 'https://www.example.com/foo/?bar=baz&inga=42&quux', 12 | 'http://a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.com', 13 | 'http://a_b.z.com', 14 | 'http://mw1.google.com/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg', 15 | 'http://user:pass@example.com:123/one/two.three?q1=a1&q2=a2#body', 16 | 'http://www.microsoft.xn--comindex-g03d.html.irongeek.com', 17 | 'http://✪df.ws/123', 18 | 'http://localhost/', 19 | 'http://userid:password@example.com:8080', 20 | 'http://userid:password@example.com:8080/', 21 | 'http://userid@example.com', 22 | 'http://userid@example.com/', 23 | 'http://userid@example.com:8080', 24 | 'http://userid@example.com:8080/', 25 | 'http://userid:password@example.com', 26 | 'http://userid:password@example.com/', 27 | 'http://142.42.1.1/', 28 | 'http://142.42.1.1:8080/', 29 | 'http://➡.ws/䨹', 30 | 'http://⌘.ws', 31 | 'http://⌘.ws/', 32 | 'http://foo.com/blah_(wikipedia)#cite-1', 33 | 'http://foo.com/blah_(wikipedia)_blah#cite-1', 34 | 'http://foo.com/unicode_(✪)_in_parens', 35 | 'http://foo.com/(something)?after=parens', 36 | 'http://☺.damowmow.com/', 37 | 'http://code.google.com/events/#&product=browser', 38 | 'http://j.mp', 39 | 'ftp://foo.bar/baz', 40 | 'http://foo.bar/?q=Test%20URL-encoded%20stuff', 41 | 'http://مثال.إختبار', 42 | 'http://例子.测试', 43 | 'http://उदाहरण.परीक्षा', 44 | 'http://-.~_!$&\'()*+\';=:%40:80%2f::::::@example.com', 45 | 'http://1337.net', 46 | 'http://a.b-c.de', 47 | 'http://223.255.255.254', 48 | 'http://example.com?foo=bar', 49 | 'http://example.com#foo', 50 | 'ws://localhost:8080', 51 | 'ws://foo.ws', 52 | 'ws://a.b-c.de', 53 | 'ws://223.255.255.254', 54 | 'ws://userid:password@example.com', 55 | 'ws://➡.ws/䨹', 56 | '//localhost:8080', 57 | '//foo.ws', 58 | '//a.b-c.de', 59 | '//223.255.255.254', 60 | '//userid:password@example.com', 61 | '//➡.ws/䨹', 62 | 'www.google.com/unicorn', 63 | 'http://example.com.' 64 | ]; 65 | 66 | for (const x of fixtures) { 67 | t.true(urlRegex({exact: true}).test(x)); 68 | } 69 | }); 70 | 71 | test('match URLs in text', t => { 72 | const fixture = ` 73 | Lorem ipsum //dolor.sit 74 | example.com 75 | with path 76 | [and another](https://another.example.com) and 77 | Foo //bar.net/?q=Query with spaces 78 | `; 79 | 80 | t.deepEqual([ 81 | '//dolor.sit', 82 | 'http://example.com', 83 | 'http://example.com/with-path', 84 | 'https://another.example.com', 85 | '//bar.net/?q=Query' 86 | ], fixture.match(urlRegex())); 87 | }); 88 | 89 | test('do not match URLs', t => { 90 | const fixtures = [ 91 | 'http://', 92 | 'http://.', 93 | 'http://..', 94 | 'http://../', 95 | 'http://?', 96 | 'http://??', 97 | 'http://??/', 98 | 'http://#', 99 | 'http://##', 100 | 'http://##/', 101 | 'http://foo.bar?q=Spaces should be encoded', 102 | '//', 103 | '//a', 104 | '///a', 105 | '///', 106 | 'http:///a', 107 | 'foo.com', 108 | 'rdar://1234', 109 | 'h://test', 110 | 'http:// shouldfail.com', 111 | ':// should fail', 112 | 'http://foo.bar/foo(bar)baz quux', 113 | 'http://-error-.invalid/', 114 | 'http://-a.b.co', 115 | 'http://a.b-.co', 116 | 'http://123.123.123', 117 | 'http://3628126748', 118 | 'http://.www.foo.bar/', 119 | 'http://.www.foo.bar./', 120 | 'http://go/ogle.com', 121 | 'http://foo.bar/ /', 122 | 'http://a.b_z.com', 123 | 'http://ab_.z.com', 124 | 'http://google\\.com', 125 | 'http://www(google.com', 126 | 'http://www.example.xn--overly-long-punycode-test-string-test-tests-123-test-test123/', 127 | 'http://www=google.com', 128 | 'https://www.g.com/error\n/bleh/bleh', 129 | 'rdar://1234', 130 | '/foo.bar/', 131 | '///www.foo.bar./' 132 | ]; 133 | 134 | for (const x of fixtures) { 135 | t.false(urlRegex({exact: true}).test(x)); 136 | } 137 | }); 138 | 139 | test('match using list of TLDs', t => { 140 | const fixtures = [ 141 | 'foo.com/blah_blah', 142 | 'foo.com/blah_blah/', 143 | 'foo.com/blah_blah_(wikipedia)', 144 | 'foo.com/blah_blah_(wikipedia)_(again)', 145 | 'www.example.com/wpstyle/?p=364', 146 | 'www.example.com/foo/?bar=baz&inga=42&quux', 147 | 'a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.com', 148 | 'mw1.google.com/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg', 149 | 'user:pass@example.com:123/one/two.three?q1=a1&q2=a2#body', 150 | 'www.microsoft.xn--comindex-g03d.html.irongeek.com', 151 | '✪df.ws/123', 152 | 'localhost/', 153 | 'userid:password@example.com:8080', 154 | 'userid:password@example.com:8080/', 155 | 'userid@example.com', 156 | 'userid@example.com/', 157 | 'userid@example.com:8080', 158 | 'userid@example.com:8080/', 159 | 'userid:password@example.com', 160 | 'userid:password@example.com/', 161 | '142.42.1.1/', 162 | '142.42.1.1:8080/', 163 | '➡.ws/䨹', 164 | '⌘.ws', 165 | '⌘.ws/', 166 | 'foo.com/blah_(wikipedia)#cite-1', 167 | 'foo.com/blah_(wikipedia)_blah#cite-1', 168 | 'foo.com/unicode_(✪)_in_parens', 169 | 'foo.com/(something)?after=parens', 170 | '☺.damowmow.com/', 171 | 'code.google.com/events/#&product=browser', 172 | 'j.mp', 173 | 'foo.bar/baz', 174 | 'foo.bar/?q=Test%20URL-encoded%20stuff', 175 | '-.~_!$&\'()*+\';=:%40:80%2f::::::@example.com', 176 | '1337.net', 177 | 'a.b-c.de', 178 | '223.255.255.254', 179 | 'example.com?foo=bar', 180 | 'example.com#foo', 181 | 'localhost:8080', 182 | 'foo.ws', 183 | 'a.b-c.de', 184 | '223.255.255.254', 185 | 'userid:password@example.com', 186 | '➡.ws/䨹', 187 | '//localhost:8080', 188 | '//foo.ws', 189 | '//a.b-c.de', 190 | '//223.255.255.254', 191 | '//userid:password@example.com', 192 | '//➡.ws/䨹', 193 | 'www.google.com/unicorn', 194 | 'example.com.' 195 | ]; 196 | 197 | for (const x of fixtures) { 198 | t.true(urlRegex({exact: true, strict: false}).test(x)); 199 | } 200 | }); 201 | --------------------------------------------------------------------------------