├── .commitlintrc.js
├── .dist.babelrc
├── .dist.eslintrc
├── .editorconfig
├── .eslintignore
├── .gitattributes
├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .huskyrc
├── .lib.babelrc
├── .lib.eslintrc
├── .lintstagedrc.js
├── .npmrc
├── .nycrc
├── .prettierrc.js
├── .remarkignore
├── .remarkrc.js
├── .xo-config.js
├── LICENSE
├── README.md
├── package.json
├── src
    └── index.js
└── test
    ├── browser.js
    └── test.js


/.commitlintrc.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   extends: ['@commitlint/config-conventional']
3 | };
4 | 


--------------------------------------------------------------------------------
/.dist.babelrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "presets": [
 3 |     ["@babel/env", {
 4 |       "targets": {
 5 |         "browsers": [ "defaults, not ie 11" ]
 6 |       }
 7 |     }]
 8 |   ]
 9 | }
10 | 


--------------------------------------------------------------------------------
/.dist.eslintrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": ["eslint:recommended"],
 3 |   "parser": "@babel/eslint-parser",
 4 |   "parserOptions": {
 5 |     "requireConfigFile": false,
 6 |     "babelOptions": {
 7 |       "babelrc": false,
 8 |       "configFile": false,
 9 |       "presets": ["@babel/preset-env"]
10 |     }
11 |   },
12 |   "env": {
13 |     "node": false,
14 |     "browser": true,
15 |     "amd": true,
16 |     "es6": true,
17 |     "commonjs": true
18 |   },
19 |   "plugins": ["compat"],
20 |   "rules": {
21 |     "no-unused-vars": "off"
22 |   },
23 |   "globals": {
24 |   },
25 |   "settings": {
26 |     "polyfills": [
27 |     ]
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | indent_style = space
 5 | indent_size = 2
 6 | end_of_line = lf
 7 | charset = utf-8
 8 | trim_trailing_whitespace = true
 9 | insert_final_newline = true
10 | 


--------------------------------------------------------------------------------
/.eslintignore:
--------------------------------------------------------------------------------
1 | !.*.js
2 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto eol=lf
2 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on:
 3 |   - push
 4 |   - pull_request
 5 | jobs:
 6 |   build:
 7 |     runs-on: ${{ matrix.os }}
 8 |     strategy:
 9 |       matrix:
10 |         os:
11 |           - ubuntu-latest
12 |         node_version:
13 |           - 16
14 |           - 18
15 |     name: Node ${{ matrix.node_version }} on ${{ matrix.os }}
16 |     steps:
17 |       - uses: actions/checkout@v3
18 |       - name: Setup node
19 |         uses: actions/setup-node@v3
20 |         with:
21 |           node-version: ${{ matrix.node_version }}
22 |       - name: Install dependencies
23 |         run: npm install
24 |       - name: Run tests
25 |         run: npm run test
26 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | *.log
 3 | .idea
 4 | node_modules
 5 | coverage
 6 | .nyc_output
 7 | locales/
 8 | package-lock.json
 9 | yarn.lock
10 | 
11 | Thumbs.db
12 | tmp/
13 | temp/
14 | *.lcov
15 | .env
16 | lib
17 | dist
18 | 


--------------------------------------------------------------------------------
/.huskyrc:
--------------------------------------------------------------------------------
1 | {
2 |   "hooks": {
3 |     "pre-commit": "lint-staged",
4 |     "commit-msg": "commitlint -E HUSKY_GIT_PARAMS"
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/.lib.babelrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "presets": [
 3 |     ["@babel/env", {
 4 |       "targets": {
 5 |         "node": "14",
 6 |         "browsers": [ "defaults, not ie 11" ]
 7 |       }
 8 |     }]
 9 |   ],
10 |   "sourceMaps": "both"
11 | }
12 | 


--------------------------------------------------------------------------------
/.lib.eslintrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": ["eslint:recommended", "plugin:node/recommended"],
 3 |   "env": { "browser": true" },
 4 |   "plugins": ["compat"],
 5 |   "rules": {
 6 |   },
 7 |   "settings": {
 8 |     "polyfills": []
 9 |   }
10 | }
11 | 


--------------------------------------------------------------------------------
/.lintstagedrc.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   '*.md': (filenames) => filenames.map((filename) => `remark ${filename} -qfo`),
3 |   'package.json': 'fixpack',
4 |   '*.js': 'xo --fix'
5 | };
6 | 


--------------------------------------------------------------------------------
/.npmrc:
--------------------------------------------------------------------------------
1 | package-lock=false
2 | 


--------------------------------------------------------------------------------
/.nycrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extension": [
 3 |     ".js"
 4 |   ],
 5 |   "report-dir": "./coverage",
 6 |   "temp-dir": "./.nyc_output",
 7 |   "check-coverage": true,
 8 |   "lines": 100,
 9 |   "functions": 100,
10 |   "branches": 100,
11 |   "reporter": ["lcov", "html", "text"]
12 | }
13 | 


--------------------------------------------------------------------------------
/.prettierrc.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   singleQuote: true,
3 |   bracketSpacing: true,
4 |   trailingComma: 'none'
5 | };
6 | 


--------------------------------------------------------------------------------
/.remarkignore:
--------------------------------------------------------------------------------
1 | test/snapshots/**/*.md
2 | 


--------------------------------------------------------------------------------
/.remarkrc.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   plugins: ['preset-github']
3 | };
4 | 


--------------------------------------------------------------------------------
/.xo-config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   prettier: true,
3 |   space: true,
4 |   extends: ['xo-lass']
5 | };
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Forward Email LLC, Kevin Mårtensson <kevinmartensson@gmail.com>, and Diego Perini
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # url-regex-safe
  2 | 
  3 | [![build status](https://github.com/spamscanner/url-regex-safe/actions/workflows/ci.yml/badge.svg)](https://github.com/spamscanner/url-regex-safe/actions/workflows/ci.yml)
  4 | [![code style](https://img.shields.io/badge/code_style-XO-5ed9c7.svg)](https://github.com/sindresorhus/xo)
  5 | [![styled with prettier](https://img.shields.io/badge/styled_with-prettier-ff69b4.svg)](https://github.com/prettier/prettier)
  6 | [![made with lass](https://img.shields.io/badge/made_with-lass-95CC28.svg)](https://lass.js.org)
  7 | [![license](https://img.shields.io/github/license/spamscanner/url-regex-safe.svg)](LICENSE)
  8 | [![npm downloads](https://img.shields.io/npm/dt/url-regex-safe.svg)](https://npm.im/url-regex-safe)
  9 | 
 10 | > Regular expression matching for URL's. Maintained, safe, and browser-friendly version of [url-regex][]. Resolves [CVE-2020-7661][cve] for Node.js servers. Works in Node v14+ and browsers.  **Maintained for [Spam Scanner][spam-scanner] and [Forward Email][forward-email]**.
 11 | 
 12 | 
 13 | ## Table of Contents
 14 | 
 15 | * [Foreword](#foreword)
 16 | * [Install](#install)
 17 | * [Usage](#usage)
 18 |   * [Node](#node)
 19 |   * [Browser](#browser)
 20 | * [Options](#options)
 21 | * [Quick tips and migration from url-regex](#quick-tips-and-migration-from-url-regex)
 22 | * [Limitations](#limitations)
 23 | * [Contributors](#contributors)
 24 | * [License](#license)
 25 | 
 26 | 
 27 | ## Foreword
 28 | 
 29 | After discovering [CVE-2020-7661][cve] and disclosing it [publicly](https://portswigger.net/daily-swig/unpatched-regex-bug-leaves-node-js-apps-open-to-redos-attacks) (through my work on [Spam Scanner][spam-scanner] and [Forward Email][forward-email]) – I used an implementation of [url-regex][] with some extra glue on top to filter out bad URL matches.
 30 | 
 31 | However after using it on [Forward Email][forward-email] in production (which processes hundreds of thousands of emails per week), I found and documented several more [core issues](https://github.com/kevva/url-regex/pull/35) with [url-regex][].
 32 | 
 33 | Realizing that [url-regex][] is no longer actively maintained, has 9 open pull requests as of this writing, and also lacked browser support – I decided to write this package for everyone and merge all the open pull requests.
 34 | 
 35 | This package should hopefully more closely resemble real-world intended usage of a URL regular expression, and also allowing the user to configure it as they wish.  Please check out [Forward Email][forward-email] if this package helped you, and explore our source code on GitHub which shows how we use this package.
 36 | 
 37 | 
 38 | ## Install
 39 | 
 40 | **NOTE:** The default behavior of this package will attempt to load [re2](https://github.com/uhop/node-re2) (it is an optional peer dependency used to prevent regular expression denial of service attacks and more).  If you wish to use this behavior, you must have `re2` installed via `npm install re2` – otherwise it will fallback to using normal `RegExp` instances.  As of v4.0.0 we added an option if you wish to force this package to not even attempt to load `re2` (e.g. it's in your `node_modules` [but you don't want to use it](https://github.com/spamscanner/url-regex-safe/issues/28)) – simply pass `re2: false` as an option.
 41 | 
 42 | [npm][]:
 43 | 
 44 | ```sh
 45 | npm install url-regex-safe
 46 | ```
 47 | 
 48 | 
 49 | ## Usage
 50 | 
 51 | ### Node
 52 | 
 53 | We've resolved [CVE-2020-7661][cve] by including [RE2][] for Node.js usage.  You will not have to manually wrap your URL regular expressions with `new RE2(urlRegex())` anymore through `url-regex-safe` (we do it automatically for you).
 54 | 
 55 | ```js
 56 | const urlRegexSafe = require('url-regex-safe');
 57 | 
 58 | const str = 'some long string with url.com in it';
 59 | const matches = str.match(urlRegexSafe());
 60 | 
 61 | for (const match of matches) {
 62 |   console.log('match', match);
 63 | }
 64 | 
 65 | console.log(urlRegexSafe({ exact: true }).test('github.com'));
 66 | ```
 67 | 
 68 | ### Browser
 69 | 
 70 | Since [RE2][] is not made for the browser, it will not be used, and therefore [CVE-2020-7661][cve] is still an issue on the client-side. However it is not severe since the most it would do is crash the browser tab (as on the Node.js side it would have crashed the entire process and thrown an out of memory exception).
 71 | 
 72 | #### VanillaJS
 73 | 
 74 | This is the solution for you if you're just using `<script>` tags everywhere!
 75 | 
 76 | ```html
 77 | <script src="https://unpkg.com/url-regex-safe"></script>
 78 | <script type="text/javascript">
 79 |   (function() {
 80 |     var str = 'some long string with url.com in it';
 81 |     var matches = str.match(urlRegexSafe());
 82 | 
 83 |     for (var i=0; i<matches.length; i++) {
 84 |       console.log('match', matches[i]);
 85 |     }
 86 | 
 87 |     console.log(urlRegexSafe({ exact: true }).test('github.com'));
 88 |   })();
 89 | </script>
 90 | ```
 91 | 
 92 | #### Bundler
 93 | 
 94 | Assuming you are using [browserify][], [webpack][], [rollup][], or another bundler, you can simply follow [Node](#node) usage above.
 95 | 
 96 | #### TypeScript
 97 | 
 98 | To use this package with [TypeScript](https://www.typescriptlang.org/), you can install the [`@types/url-regex-safe`](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/master/types/url-regex-safe) package for type definitions.
 99 | 
100 | ```sh
101 | npm install --save-dev @types/url-regex-safe
102 | ```
103 | 
104 | 
105 | ## Options
106 | 
107 | | Property         | Type    | Default Value                                                | Description                                                                                                                                                                                                                                                                                                                                                    |   |
108 | | ---------------- | ------- | ------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | - |
109 | | `re2`            | Boolean | `true`                                                       | Attempt to load `re2` to use instead of `RegExp` for creating new regular expression instances.  If you pass `re2: false`, then `re2` will not even be attempted to be loaded.                                                                                                                                                                                 |   |
110 | | `exact`          | Boolean | `false`                                                      | Only match an exact String. Useful with `regex.test(str)` to check if a String is a URL. We set this to `false` by default in order to match String values such as `github.com` (as opposed to requiring a protocol or `www` subdomain).  We feel this closely more resembles real-world intended usage of this package.                                       |   |
111 | | `strict`         | Boolean | `false`                                                      | Force URL's to start with a valid protocol or `www` if set to `true`. If `true`, then it will allow any TLD as long as it is a minimum of 2 valid characters. If it is `false`, then it will match the TLD against the list of valid TLD's using [tlds](https://github.com/stephenmathieson/node-tlds#readme).                                                 |   |
112 | | `auth`           | Boolean | `false`                                                      | Match against Basic Authentication headers. We set this to `false` by default since [it was deprecated in Chromium](https://bugs.chromium.org/p/chromium/issues/detail?id=82250#c7), and otherwise it leaves the user with unwanted URL matches (more closely resembles real-world intended usage of this package by having it set to `false` by default too). |   |
113 | | `localhost`      | Boolean | `true`                                                       | Allows localhost in the URL hostname portion. See the [test/test.js](test/test.js) for more insight into the localhost test and how it will return a value which may be unwanted. A pull request would be considered to resolve the "pic.jp" vs. "pic.jpg" issue.                                                                                              |   |
114 | | `parens`         | Boolean | `false`                                                      | Match against Markdown-style trailing parenthesis. We set this to `false` because it should be up to the user to parse for Markdown URL's.                                                                                                                                                                                                                     |   |
115 | | `apostrophes`    | Boolean | `false`                                                      | Match against apostrophes. We set this to `false` because we don't want the String `background: url('http://example.com/pic.jpg');` to result in `http://example.com/pic.jpg'`.  See this [issue](https://github.com/kevva/url-regex/pull/55) for more information.                                                                                            |   |
116 | | `trailingPeriod` | Boolean | `false`                                                      | Match against trailing periods. We set this to `false` by default since real-world behavior would want `example.com` versus `example.com.` as the match (this is different than [url-regex][] where it matches the trailing period in that package).                                                                                                           |   |
117 | | `ipv4`           | Boolean | `true`                                                       | Match against IPv4 URL's.                                                                                                                                                                                                                                                                                                                                      |   |
118 | | `ipv6`           | Boolean | `true`                                                       | Match against IPv6 URL's.                                                                                                                                                                                                                                                                                                                                      |   |
119 | | `tlds`           | Array   | [tlds](https://github.com/stephenmathieson/node-tlds#readme) | Match against a specific list of tlds, or the default list provided by [tlds](https://github.com/stephenmathieson/node-tlds#readme).                                                                                                                                                                                                                           |   |
120 | | `returnString`   | Boolean | `false`                                                      | Return the RegExp as a String instead of a `RegExp` (useful for custom logic, such as we did with [Spam Scanner][spam-scanner]).                                                                                                                                                                                                                               |   |
121 | 
122 | 
123 | ## Quick tips and migration from url-regex
124 | 
125 | You must override the default and set `strict: true` if you do not wish to match `github.com` by itself (though `www.github.com` will work if `strict: false`).
126 | 
127 | Unlike the deprecated and unmaintained package [url-regex][], we do a few things differently:
128 | 
129 | * We set `strict` to `false` by default ([url-regex][] had this set to `true`)
130 | * We added an `auth` option, which is set to `false` by default ([url-regex][] matches against Basic Authentication; had this set to `true` - however this is a deprecated behavior in Chromium).
131 | * We added `parens` and `ipv6` options, which are set to `false` and `true` by default ([url-regex][] had `parens` set to `true` and `ipv6` was non-existent or set to `false` rather).
132 | * We added an `apostrophe` option, which is set to `false` by default ([url-regex][] had this set to `true`).
133 | * We added a `trailingPeriod` option, which is set to `false` by default (which means matches won't contain trailing periods, whereas [url-regex][] had this set to `true`).
134 | 
135 | 
136 | ## Limitations
137 | 
138 | **This limitation only applies if you are using `re2`**: Since we cannot use regular expression's "negative lookbehinds" functionality (due to [RE2][] limitations), we could not merge the logic from this [pull request](https://github.com/kevva/url-regex/pull/67/commits/6c31d81c35c3bb72c413c6e4af92a37b2689ead2).  This would have allowed us to make it so `example.jpeg` would match only if it was `example.jp`, however if you pass `example.jpeg` right now it will extract `example.jp` from it (since `.jp` is a TLD).  An alternative solution may exist, and we welcome community contributions regarding this issue.
139 | 
140 | 
141 | ## Contributors
142 | 
143 | | Name                  | Website                    |
144 | | --------------------- | -------------------------- |
145 | | **Forward Email LLC** | <https://forwardemail.net> |
146 | | **Kevin Mårtensson**  |                            |
147 | | **Diego Perini**      |                            |
148 | 
149 | 
150 | ## License
151 | 
152 | [MIT](LICENSE) © [Forward Email LLC](https://forwardemail.net)
153 | 
154 | 
155 | ##
156 | 
157 | [npm]: https://www.npmjs.com/
158 | 
159 | [cve]: https://nvd.nist.gov/vuln/detail/CVE-2020-7661
160 | 
161 | [re2]: https://github.com/uhop/node-re2
162 | 
163 | [browserify]: https://github.com/browserify/browserify
164 | 
165 | [webpack]: https://github.com/webpack/webpack
166 | 
167 | [rollup]: https://github.com/rollup/rollup
168 | 
169 | [url-regex]: https://github.com/kevva/url-regex
170 | 
171 | [spam-scanner]: https://spamscanner.net
172 | 
173 | [forward-email]: https://forwardemail.net
174 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "name": "url-regex-safe",
  3 |   "description": "Regular expression matching for URL's. Maintained, safe, and browser-friendly version of url-regex. Resolves CVE-2020-7661. Works in Node v10.12.0+ and browsers.",
  4 |   "version": "4.0.0",
  5 |   "author": "Forward Email LLC (https://forwardemail.net)",
  6 |   "browser": {
  7 |     "re2": false
  8 |   },
  9 |   "bugs": {
 10 |     "url": "https://github.com/spamscanner/url-regex-safe/issues"
 11 |   },
 12 |   "contributors": [
 13 |     "Forward Email LLC (https://forwardemail.net)",
 14 |     "Kevin Mårtensson <kevinmartensson@gmail.com>",
 15 |     "Diego Perini"
 16 |   ],
 17 |   "dependencies": {
 18 |     "ip-regex": "4.3.0",
 19 |     "tlds": "^1.242.0"
 20 |   },
 21 |   "devDependencies": {
 22 |     "@babel/cli": "^7.22.10",
 23 |     "@babel/core": "^7.22.10",
 24 |     "@babel/eslint-parser": "^7.22.10",
 25 |     "@babel/preset-env": "^7.22.10",
 26 |     "@commitlint/cli": "^17.7.1",
 27 |     "@commitlint/config-conventional": "^17.7.0",
 28 |     "ava": "^4.3.0",
 29 |     "babelify": "^10.0.0",
 30 |     "browserify": "^17.0.0",
 31 |     "cross-env": "^7.0.3",
 32 |     "eslint": "^8.47.0",
 33 |     "eslint-config-xo-lass": "^2.0.1",
 34 |     "eslint-plugin-compat": "^4.1.4",
 35 |     "eslint-plugin-node": "^11.1.0",
 36 |     "fixpack": "^4.0.0",
 37 |     "husky": "^8.0.3",
 38 |     "jsdom": "15",
 39 |     "lint-staged": "^14.0.0",
 40 |     "nyc": "^15.1.0",
 41 |     "re2": "^1.20.1",
 42 |     "remark-cli": "^11.0.0",
 43 |     "remark-preset-github": "^4.0.4",
 44 |     "rimraf": "^5.0.1",
 45 |     "tinyify": "^3.0.0",
 46 |     "xo": "^0.56.0"
 47 |   },
 48 |   "engines": {
 49 |     "node": ">= 14"
 50 |   },
 51 |   "files": [
 52 |     "lib",
 53 |     "dist"
 54 |   ],
 55 |   "homepage": "https://github.com/spamscanner/url-regex-safe",
 56 |   "jsdelivr": "dist/url-regex-safe.min.js",
 57 |   "keywords": [
 58 |     "2020",
 59 |     "7661",
 60 |     "CVE-2020-7661",
 61 |     "cve",
 62 |     "detect",
 63 |     "email",
 64 |     "emails",
 65 |     "expresion",
 66 |     "expression",
 67 |     "from",
 68 |     "get",
 69 |     "html",
 70 |     "mail",
 71 |     "mails",
 72 |     "maintained",
 73 |     "parse",
 74 |     "parser",
 75 |     "parsing",
 76 |     "regex",
 77 |     "regexer",
 78 |     "regexer",
 79 |     "regexes",
 80 |     "regexing",
 81 |     "regexp",
 82 |     "safe",
 83 |     "scan",
 84 |     "sniff",
 85 |     "str",
 86 |     "string",
 87 |     "text",
 88 |     "url",
 89 |     "urls"
 90 |   ],
 91 |   "license": "MIT",
 92 |   "main": "lib/index.js",
 93 |   "peerDependencies": {
 94 |     "re2": "^1.20.1"
 95 |   },
 96 |   "peerDependenciesMeta": {
 97 |     "re2": {
 98 |       "optional": true
 99 |     }
100 |   },
101 |   "repository": {
102 |     "type": "git",
103 |     "url": "https://github.com/spamscanner/url-regex-safe"
104 |   },
105 |   "scripts": {
106 |     "browserify": "browserify src/index.js -o dist/url-regex-safe.js -s urlRegexSafe -g [ babelify --configFile ./.dist.babelrc ]",
107 |     "build": "npm run build:clean && npm run build:lib && npm run build:dist",
108 |     "build:clean": "rimraf lib dist",
109 |     "build:dist": "npm run browserify && npm run minify",
110 |     "build:lib": "babel --config-file ./.lib.babelrc src --out-dir lib",
111 |     "lint": "npm run lint:js && npm run lint:md && npm run lint:pkg && npm run lint:lib && npm run lint:dist",
112 |     "lint:dist": "eslint --no-inline-config -c .dist.eslintrc dist",
113 |     "lint:js": "xo --fix",
114 |     "lint:lib": "eslint -c .lib.eslintrc lib",
115 |     "lint:md": "remark . -qfo",
116 |     "lint:pkg": "fixpack",
117 |     "minify": "cross-env NODE_ENV=production browserify src/index.js -o dist/url-regex-safe.min.js -s urlRegexSafe -g [ babelify --configFile ./.dist.babelrc ] -p tinyify",
118 |     "prepare": "husky install",
119 |     "pretest": "npm run build && npm run lint",
120 |     "test": "cross-env NODE_ENV=test nyc ava"
121 |   },
122 |   "unpkg": "dist/url-regex-safe.min.js"
123 | }
124 | 


--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
 1 | const ipRegex = require('ip-regex');
 2 | const tlds = require('tlds');
 3 | 
 4 | const ipv4 = ipRegex.v4().source;
 5 | const ipv6 = ipRegex.v6().source;
 6 | const host = '(?:(?:[a-z\\u00a1-\\uffff0-9][-_]*)*[a-z\\u00a1-\\uffff0-9]+)';
 7 | const domain = '(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*';
 8 | const strictTld = '(?:[a-z\\u00a1-\\uffff]{2,})';
 9 | const defaultTlds = `(?:${tlds.sort((a, b) => b.length - a.length).join('|')})`;
10 | const port = '(?::\\d{2,5})?';
11 | 
12 | let RE2;
13 | let hasRE2;
14 | 
15 | module.exports = (options) => {
16 |   options = {
17 |     //
18 |     // attempt to use re2, if set to false will use RegExp
19 |     // (we did this approach because we don't want to load in-memory re2 if users don't want it)
20 |     // <https://github.com/spamscanner/url-regex-safe/issues/28>
21 |     //
22 |     re2: true,
23 |     exact: false,
24 |     strict: false,
25 |     auth: false,
26 |     localhost: true,
27 |     parens: false,
28 |     apostrophes: false,
29 |     trailingPeriod: false,
30 |     ipv4: true,
31 |     ipv6: true,
32 |     returnString: false,
33 |     ...options
34 |   };
35 | 
36 |   /* istanbul ignore next */
37 |   const SafeRegExp =
38 |     options.re2 && hasRE2 !== false
39 |       ? (() => {
40 |           if (typeof RE2 === 'function') return RE2;
41 |           try {
42 |             RE2 = require('re2');
43 |             return typeof RE2 === 'function' ? RE2 : RegExp;
44 |           } catch {
45 |             hasRE2 = false;
46 |             return RegExp;
47 |           }
48 |         })()
49 |       : RegExp;
50 | 
51 |   const protocol = `(?:(?:[a-z]+:)?//)${options.strict ? '' : '?'}`;
52 | 
53 |   // Add option to disable matching urls with HTTP Basic Authentication
54 |   // <https://github.com/kevva/url-regex/pull/63>
55 |   const auth = options.auth ? '(?:\\S+(?::\\S*)?@)?' : '';
56 | 
57 |   // Add ability to pass custom list of tlds
58 |   // <https://github.com/kevva/url-regex/pull/66>
59 |   const tld = `(?:\\.${
60 |     options.strict
61 |       ? strictTld
62 |       : options.tlds
63 |       ? `(?:${options.tlds.sort((a, b) => b.length - a.length).join('|')})`
64 |       : defaultTlds
65 |   })${options.trailingPeriod ? '\\.?' : ''}`;
66 | 
67 |   let disallowedChars = '\\s"';
68 |   if (!options.parens) {
69 |     // Not accept closing parenthesis
70 |     // <https://github.com/kevva/url-regex/pull/35>
71 |     disallowedChars += '\\)';
72 |   }
73 | 
74 |   if (!options.apostrophes) {
75 |     // Don't allow apostrophes
76 |     // <https://github.com/kevva/url-regex/pull/55>
77 |     disallowedChars += "'";
78 |   }
79 | 
80 |   const path = options.trailingPeriod
81 |     ? `(?:[/?#][^${disallowedChars}]*)?`
82 |     : `(?:(?:[/?#][^${disallowedChars}]*[^${disallowedChars}.?!])|[/])?`;
83 | 
84 |   // Added IPv6 support
85 |   // <https://github.com/kevva/url-regex/issues/60>
86 |   let regex = `(?:${protocol}|www\\.)${auth}(?:`;
87 |   if (options.localhost) regex += 'localhost|';
88 |   if (options.ipv4) regex += `${ipv4}|`;
89 |   if (options.ipv6) regex += `${ipv6}|`;
90 |   regex += `${host}${domain}${tld})${port}${path}`;
91 | 
92 |   // Add option to return the regex string instead of a RegExp
93 |   if (options.returnString) return regex;
94 | 
95 |   return options.exact
96 |     ? new SafeRegExp(`(?:^${regex}$)`, 'i')
97 |     : new SafeRegExp(regex, 'ig');
98 | };
99 | 


--------------------------------------------------------------------------------
/test/browser.js:
--------------------------------------------------------------------------------
 1 | const path = require('node:path');
 2 | const { readFileSync } = require('node:fs');
 3 | const { Script } = require('node:vm');
 4 | const test = require('ava');
 5 | const { JSDOM, VirtualConsole } = require('jsdom');
 6 | 
 7 | const virtualConsole = new VirtualConsole();
 8 | virtualConsole.sendTo(console);
 9 | 
10 | const script = new Script(
11 |   readFileSync(path.join(__dirname, '..', 'dist', 'url-regex-safe.min.js'))
12 | );
13 | 
14 | const dom = new JSDOM(``, {
15 |   url: 'http://localhost:3000/',
16 |   referrer: 'http://localhost:3000/',
17 |   contentType: 'text/html',
18 |   includeNodeLocations: true,
19 |   resources: 'usable',
20 |   runScripts: 'dangerously',
21 |   virtualConsole
22 | });
23 | 
24 | dom.runVMScript(script);
25 | 
26 | test('should work in the browser', (t) => {
27 |   t.true(typeof dom.window.urlRegexSafe === 'function');
28 |   t.true(dom.window.urlRegexSafe({ exact: true }).test('github.com'));
29 |   t.deepEqual(
30 |     'some long string with url.com in it'.match(dom.window.urlRegexSafe()),
31 |     ['url.com']
32 |   );
33 | });
34 | 


--------------------------------------------------------------------------------
/test/test.js:
--------------------------------------------------------------------------------
  1 | const test = require('ava');
  2 | const urlRegex = require('..');
  3 | 
  4 | const fixtures = [
  5 |   "http://-.~_!$&'()*+';=:%40:80%2f::::::@example.com",
  6 |   '//223.255.255.254',
  7 |   '//a.b-c.de',
  8 |   '//foo.ws',
  9 |   '//localhost:8080',
 10 |   '//userid:password@example.com',
 11 |   '//➡.ws/䨹',
 12 |   'ftp://foo.bar/baz',
 13 |   'http://1337.net',
 14 |   'http://142.42.1.1/',
 15 |   'http://142.42.1.1:8080/',
 16 |   'http://223.255.255.254',
 17 |   'http://a.b-c.de',
 18 |   'http://a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.com',
 19 |   'http://a_b.z.com',
 20 |   'http://code.google.com/events/#&product=browser',
 21 |   'http://example.com#foo',
 22 |   'http://example.com.',
 23 |   'http://example.com?foo=bar',
 24 |   'http://foo.bar/?q=Test%20URL-encoded%20stuff',
 25 |   'http://foo.com/(something)?after=parens',
 26 |   'http://foo.com/blah_(wikipedia)#cite-1',
 27 |   'http://foo.com/blah_(wikipedia)_blah#cite-1',
 28 |   'http://foo.com/blah_blah',
 29 |   'http://foo.com/blah_blah/',
 30 |   'http://foo.com/blah_blah_(wikipedia)',
 31 |   'http://foo.com/blah_blah_(wikipedia)_(again)',
 32 |   'http://foo.com/unicode_(✪)_in_parens',
 33 |   'http://j.mp',
 34 |   'http://localhost/',
 35 |   'http://mw1.google.com/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg',
 36 |   'http://user:pass@example.com:123/one/two.three?q1=a1&q2=a2#body',
 37 |   'http://userid:password@example.com',
 38 |   'http://userid:password@example.com/',
 39 |   'http://userid:password@example.com:8080',
 40 |   'http://userid:password@example.com:8080/',
 41 |   'http://userid@example.com',
 42 |   'http://userid@example.com/',
 43 |   'http://userid@example.com:8080',
 44 |   'http://userid@example.com:8080/',
 45 |   'http://www.example.com/wpstyle/?p=364',
 46 |   'http://www.microsoft.xn--comindex-g03d.html.irongeek.com',
 47 |   'http://⌘.ws',
 48 |   'http://⌘.ws/',
 49 |   'http://☺.damowmow.com/',
 50 |   'http://✪df.ws/123',
 51 |   'http://➡.ws/䨹',
 52 |   'https://www.example.com/foo/?bar=baz&inga=42&quux',
 53 |   'ws://223.255.255.254',
 54 |   'ws://a.b-c.de',
 55 |   'ws://foo.ws',
 56 |   'ws://localhost:8080',
 57 |   'ws://userid:password@example.com',
 58 |   'ws://➡.ws/䨹',
 59 |   'www.google.com/unicorn'
 60 | ];
 61 | for (const x of fixtures) {
 62 |   test(`match exact URLs: ${x}`, (t) => {
 63 |     t.true(
 64 |       urlRegex({
 65 |         exact: true,
 66 |         auth: true,
 67 |         parens: true,
 68 |         trailingPeriod: true
 69 |       }).test(x)
 70 |     );
 71 |   });
 72 | }
 73 | 
 74 | for (const x of [
 75 |   'http://مثال.إختبار',
 76 |   'http://उदाहरण.परीक्षा',
 77 |   'http://例子.测试'
 78 | ]) {
 79 |   test(`match exact URLs with strict set to true: ${x}`, (t) => {
 80 |     t.true(
 81 |       urlRegex({ exact: true, strict: true, auth: true, parens: true }).test(x)
 82 |     );
 83 |   });
 84 | }
 85 | 
 86 | test('match URLs in text', (t) => {
 87 |   const fixture = `
 88 | 		Foo //bar.net/?q=Query with spaces
 89 | 		Lorem ipsum //dolor.sit
 90 | 		<a href="http://example.com">example.com</a>
 91 | 		<a href="http://example.com/with-path">with path</a>
 92 | 		[and another](https://another.example.com) and
 93 | 	`;
 94 | 
 95 |   t.deepEqual(fixture.match(urlRegex({ strict: true })), [
 96 |     '//bar.net/?q=Query',
 97 |     '//dolor.sit',
 98 |     'http://example.com',
 99 |     'http://example.com/with-path',
100 |     'https://another.example.com'
101 |   ]);
102 | });
103 | 
104 | for (const x of [
105 |   'http://',
106 |   'http://.',
107 |   'http://..',
108 |   'http://../',
109 |   'http://?',
110 |   'http://??',
111 |   'http://??/',
112 |   'http://#',
113 |   'http://##',
114 |   'http://##/',
115 |   'http://foo.bar?q=Spaces should be encoded',
116 |   '//',
117 |   '//a',
118 |   '///a',
119 |   '///',
120 |   'http:///a',
121 |   'rdar://1234',
122 |   'h://test',
123 |   'http:// shouldfail.com',
124 |   ':// should fail',
125 |   'http://foo.bar/foo(bar)baz quux',
126 |   'http://-error-.invalid/',
127 |   'http://-a.b.co',
128 |   'http://a.b-.co',
129 |   'http://123.123.123',
130 |   'http://3628126748',
131 |   'http://.www.foo.bar/',
132 |   'http://.www.foo.bar./',
133 |   'http://go/ogle.com',
134 |   'http://foo.bar/ /',
135 |   'http://a.b_z.com',
136 |   'http://ab_.z.com',
137 |   'http://google\\.com',
138 |   'http://www(google.com',
139 |   'http://www.example.xn--overly-long-punycode-test-string-test-tests-123-test-test123/',
140 |   'http://www=google.com',
141 |   'https://www.g.com/error\n/bleh/bleh',
142 |   '/foo.bar/',
143 |   '///www.foo.bar./'
144 | ]) {
145 |   test(`do not match URLs: ${x}`, (t) => {
146 |     t.false(urlRegex({ exact: true }).test(x));
147 |   });
148 | }
149 | 
150 | test('do not match URLs: foo.com', (t) => {
151 |   t.false(urlRegex({ exact: true, strict: true }).test('foo.com'));
152 | });
153 | 
154 | for (const x of [
155 |   "-.~_!$&'()*+';=:%40:80%2f::::::@example.com",
156 |   '//223.255.255.254',
157 |   '//a.b-c.de',
158 |   '//foo.ws',
159 |   '//localhost:8080',
160 |   '//userid:password@example.com',
161 |   '//➡.ws/䨹',
162 |   '1337.net',
163 |   '142.42.1.1/',
164 |   '142.42.1.1:8080/',
165 |   '223.255.255.254',
166 |   'a.b-c.de',
167 |   'a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.com',
168 |   'code.google.com/events/#&product=browser',
169 |   'example.com#foo',
170 |   'example.com',
171 |   'example.com.',
172 |   'example.com?foo=bar',
173 |   'foo.bar/?q=Test%20URL-encoded%20stuff',
174 |   'foo.bar/baz',
175 |   'foo.com/(something)?after=parens',
176 |   'foo.com/blah_(wikipedia)#cite-1',
177 |   'foo.com/blah_(wikipedia)_blah#cite-1',
178 |   'foo.com/blah_blah',
179 |   'foo.com/blah_blah/',
180 |   'foo.com/blah_blah_(wikipedia)',
181 |   'foo.com/blah_blah_(wikipedia)_(again)',
182 |   'foo.com/unicode_(✪)_in_parens',
183 |   'foo.ws',
184 |   'google.com',
185 |   'j.mp',
186 |   'localhost/',
187 |   'localhost:8080',
188 |   'mw1.google.com/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg',
189 |   'user:pass@example.com:123/one/two.three?q1=a1&q2=a2#body',
190 |   'userid:password@example.com',
191 |   'userid:password@example.com/',
192 |   'userid:password@example.com:8080',
193 |   'userid:password@example.com:8080/',
194 |   'userid@example.com',
195 |   'userid@example.com/',
196 |   'userid@example.com:8080',
197 |   'userid@example.com:8080/',
198 |   'www.example.com/foo/?bar=baz&inga=42&quux',
199 |   'www.example.com/wpstyle/?p=364',
200 |   'www.google.com/unicorn',
201 |   'www.microsoft.xn--comindex-g03d.html.irongeek.com',
202 |   '⌘.ws',
203 |   '⌘.ws/',
204 |   '☺.damowmow.com/',
205 |   '✪df.ws/123',
206 |   '➡.ws/䨹'
207 | ]) {
208 |   test(`match using list of TLDs: ${x}`, (t) => {
209 |     t.true(
210 |       urlRegex({
211 |         exact: true,
212 |         auth: true,
213 |         parens: true,
214 |         trailingPeriod: true
215 |       }).test(x)
216 |     );
217 |   });
218 | }
219 | 
220 | test('opt out of matching basic auth', (t) => {
221 |   const strictFixturesWithAuth = [
222 |     "http://-.~_!$&'()*+';=:%40:80%2f::::::@example.com",
223 |     'http://user:pass@example.com:123/one/two.three?q1=a1&q2=a2#body',
224 |     'http://userid:password@example.com',
225 |     'http://userid:password@example.com/with/path',
226 |     'http://userid:password@example.com:8080',
227 |     'http://userid:password@example.com:8080/path',
228 |     'http://userid@example.com',
229 |     'http://userid@example.com/with/path',
230 |     'http://userid@localhost:8080',
231 |     'http://userid@localhost:8080/path'
232 |   ];
233 | 
234 |   for (const x of strictFixturesWithAuth) {
235 |     // With protocol
236 |     t.false(urlRegex({ exact: true, strict: true, auth: false }).test(x));
237 | 
238 |     // Relative protocol
239 |     t.false(urlRegex({ exact: true, auth: false }).test(x.replace('http', '')));
240 | 
241 |     // No protocol
242 |     t.false(
243 |       urlRegex({ exact: true, auth: false }).test(x.replace('http://', ''))
244 |     );
245 |   }
246 | 
247 |   const textFixture = `
248 | 		Lorem ipsum http://userid:password@example.com:8080 dolor sit
249 | 		<a href="http://userid:password@example.com:8080/">example.com</a>
250 | 		another //userid:password@example.com one
251 | 		bites //userid:password@example.com/with/path the dust
252 | 		and http://user:pass@example.com:123/one/two.three?q1=a1&q2=a2#body another one
253 | 		and <a href="http://user:pass@example.com:123/one/two.three?q1=a1&q2=a2#body">another one</a>
254 | 		and another <a href="userid:password@example.com">one gone</a>
255 | 		and another userid@example.com one gone
256 | 		another http://userid@example.com/ one
257 | 		bites http://userid@localhost:8080 the
258 | 		dust http://userid@localhost:8080/path
259 | 	`;
260 | 
261 |   // Strict matches none because auth always breaks the url
262 |   t.is(
263 |     textFixture.match(urlRegex({ exact: false, strict: true, auth: false })),
264 |     null
265 |   );
266 | 
267 |   // Non-strict will only match domain:port/path as auth separates the protocol
268 |   const textFixtureMatches = [
269 |     'example.com:8080',
270 |     'example.com:8080/',
271 |     'example.com',
272 |     'example.com',
273 |     'example.com/with/path',
274 |     'example.com:123/one/two.three?q1=a1&q2=a2#body',
275 |     'example.com:123/one/two.three?q1=a1&q2=a2#body',
276 |     'example.com',
277 |     'example.com',
278 |     'example.com/',
279 |     'localhost:8080',
280 |     'localhost:8080/path'
281 |   ];
282 | 
283 |   // With protocol
284 |   t.deepEqual(
285 |     textFixtureMatches,
286 |     textFixture.match(urlRegex({ exact: false, auth: false }))
287 |   );
288 | 
289 |   // Relative protocol
290 |   t.deepEqual(
291 |     textFixtureMatches,
292 |     textFixture.replace('http:', '').match(urlRegex())
293 |   );
294 | 
295 |   // No protocol
296 |   t.deepEqual(
297 |     textFixtureMatches,
298 |     textFixture.replace('http://', '').match(urlRegex())
299 |   );
300 | });
301 | 
302 | test('match using explicit list of TLDs', (t) => {
303 |   const fixtures = [
304 |     "-.~_!$&'()*+';=:%40:80%2f::::::@example.com",
305 |     "-.~_!$&'()*+';=:%40:80%2f::::::@example.onion",
306 |     '//223.255.255.254',
307 |     '//a.b-c.de',
308 |     '//foo.ws',
309 |     '//localhost:8080',
310 |     '//userid:password@example.com',
311 |     '//➡.onion/䨹',
312 |     '//➡.ws/䨹',
313 |     '1337.net',
314 |     '142.42.1.1/',
315 |     '142.42.1.1:8080/',
316 |     '223.255.255.254',
317 |     'a.b-c.de',
318 |     'a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.com',
319 |     'code.google.com/events/#&product=browser',
320 |     'example.com#foo',
321 |     'example.com.',
322 |     'example.com?foo=bar',
323 |     'example.onion',
324 |     'foo.bar/?q=Test%20URL-encoded%20stuff',
325 |     'foo.bar/baz',
326 |     'foo.com/(something)?after=parens',
327 |     'foo.com/blah_(wikipedia)#cite-1',
328 |     'foo.com/blah_(wikipedia)_blah#cite-1',
329 |     'foo.com/blah_blah',
330 |     'foo.com/blah_blah/',
331 |     'foo.com/blah_blah_(wikipedia)',
332 |     'foo.com/blah_blah_(wikipedia)_(again)',
333 |     'foo.com/unicode_(✪)_in_parens',
334 |     'foo.ws',
335 |     'j.mp',
336 |     'localhost/',
337 |     'localhost:8080',
338 |     'mw1.google.com/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg',
339 |     'mw1.unicorn.education/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg',
340 |     'unicorn.education',
341 |     'user:pass@example.com:123/one/two.three?q1=a1&q2=a2#body',
342 |     'userid:password@example.com',
343 |     'userid:password@example.com/',
344 |     'userid:password@example.com:8080',
345 |     'userid:password@example.com:8080/',
346 |     'userid:password@example.education',
347 |     'userid@example.com',
348 |     'userid@example.com/',
349 |     'userid@example.com:8080',
350 |     'userid@example.com:8080/',
351 |     'www.example.com/foo/?bar=baz&inga=42&quux',
352 |     'www.example.com/wpstyle/?p=364',
353 |     'www.example.onion/wpstyle/?p=364',
354 |     'www.google.com/unicorn',
355 |     'www.microsoft.xn--comindex-g03d.html.irongeek.com',
356 |     '⌘.ws',
357 |     '⌘.ws/',
358 |     '☺.damowmow.com/',
359 |     '✪df.ws/123',
360 |     '➡.ws/䨹'
361 |   ];
362 | 
363 |   for (const x of fixtures) {
364 |     t.true(
365 |       urlRegex({
366 |         exact: true,
367 |         auth: true,
368 |         parens: true,
369 |         tlds: ['com', 'ws', 'de', 'net', 'mp', 'bar', 'onion', 'education'],
370 |         trailingPeriod: true
371 |       }).test(x)
372 |     );
373 |   }
374 | });
375 | 
376 | test('fail if not in explicit list of TLDs', (t) => {
377 |   const fixtures = [
378 |     "-.~_!$&'()*+';=:%40:80%2f::::::@example.biz",
379 |     '//a.b-c.uk',
380 |     '//foo.uk',
381 |     '//userid:password@example.biz',
382 |     '//➡.cn/䨹',
383 |     '1337.biz',
384 |     'a.b-c.cn',
385 |     'a.b-c.ly',
386 |     'a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.biz',
387 |     'code.google.biz/events/#&product=browser',
388 |     'example.biz#foo',
389 |     'example.biz.',
390 |     'example.biz?foo=bar',
391 |     'foo.baz/?q=Test%20URL-encoded%20stuff',
392 |     'foo.baz/baz',
393 |     'foo.baz/blah_blah',
394 |     'foo.biz/(something)?after=parens',
395 |     'foo.biz/blah_(wikipedia)#cite-1',
396 |     'foo.biz/blah_(wikipedia)_blah#cite-1',
397 |     'foo.biz/blah_blah_(wikipedia)',
398 |     'foo.biz/unicode_(✪)_in_parens',
399 |     'foo.co.uk/blah_blah/',
400 |     'foo.jp',
401 |     'foo.onion/blah_blah_(wikipedia)_(again)',
402 |     'j.onion',
403 |     'mw1.google.biz/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg',
404 |     'user:pass@example.biz:123/one/two.three?q1=a1&q2=a2#body',
405 |     'userid:password@example.biz',
406 |     'userid:password@example.biz/',
407 |     'userid:password@example.biz:8080',
408 |     'userid:password@example.biz:8080/',
409 |     'userid@example.biz',
410 |     'userid@example.biz/',
411 |     'userid@example.biz:8080',
412 |     'userid@example.biz:8080/',
413 |     'www.example.biz/foo/?bar=baz&inga=42&quux',
414 |     'www.example.education/wpstyle/?p=364',
415 |     'www.google.biz/unicorn',
416 |     'www.microsoft.xn--comindex-g03d.html.irongeek.biz',
417 |     '⌘.onion',
418 |     '⌘.onion/',
419 |     '☺.damowmow.biz/',
420 |     '✪df.onion/123',
421 |     '➡.onion/䨹',
422 |     '➡.uk/䨹'
423 |   ];
424 | 
425 |   for (const x of fixtures) {
426 |     t.false(
427 |       urlRegex({
428 |         exact: true,
429 |         auth: true,
430 |         parens: true,
431 |         tlds: ['com', 'ws', 'de', 'net', 'mp', 'bar']
432 |       }).test(x)
433 |     );
434 |   }
435 | });
436 | 
437 | test('do not match URLs with non-strict mode', (t) => {
438 |   t.false(
439 |     urlRegex({ exact: true, auth: true, parens: true }).test(
440 |       '018137.113.215.4074.138.129.172220.179.206.94180.213.144.175250.45.147.1364868726sgdm6nohQ'
441 |     )
442 |   );
443 | });
444 | 
445 | test('IPv4', (t) => {
446 |   t.true(urlRegex().test('1.1.1.1'));
447 |   t.false(urlRegex({ ipv4: false }).test('1.1.1.1'));
448 | });
449 | 
450 | test('IPv6', (t) => {
451 |   t.true(urlRegex().test('2606:4700:4700::1111'));
452 |   t.false(urlRegex({ ipv6: false }).test('2606:4700:4700::1111'));
453 | });
454 | 
455 | test('parses similar to Gmail by default', (t) => {
456 |   t.deepEqual(
457 |     "foo@bar.com [foo]@bar.com foo bar @foob.com 'text@example.com, some text'".match(
458 |       urlRegex()
459 |     ),
460 |     ['bar.com', 'bar.com', 'foob.com', 'example.com']
461 |   );
462 | });
463 | 
464 | test('apostrophes', (t) => {
465 |   t.deepEqual(
466 |     "background: url('http://example.com/pic.jpg');".match(urlRegex()),
467 |     ['http://example.com/pic.jpg']
468 |   );
469 |   t.deepEqual(
470 |     "background: url('http://example.com/pic.jpg');".match(
471 |       urlRegex({ apostrophes: true })
472 |     ),
473 |     ["http://example.com/pic.jpg'"]
474 |   );
475 |   t.deepEqual(
476 |     "background: url('http://example.com/pic.jpg');".match(
477 |       urlRegex({ parens: true, apostrophes: true })
478 |     ),
479 |     ["http://example.com/pic.jpg');"]
480 |   );
481 | });
482 | 
483 | test('returns string', (t) => {
484 |   t.true(typeof urlRegex({ returnString: true }) === 'string');
485 | });
486 | 
487 | test('localhost', (t) => {
488 |   t.deepEqual(
489 |     "background: url('http://localhost/pic.jpg');".match(
490 |       urlRegex({ localhost: true })
491 |     ),
492 |     ['http://localhost/pic.jpg']
493 |   );
494 |   t.deepEqual(
495 |     "background: url('http://localhost/pic.jpg');".match(
496 |       urlRegex({ localhost: false })
497 |     ),
498 |     ['pic.jp']
499 |   );
500 | });
501 | 
502 | for (const [source, withTrailingPeriod, withoutTrailingPeriod] of [
503 |   [
504 |     'background example.com. foobar.com',
505 |     ['example.com.', 'foobar.com'],
506 |     ['example.com', 'foobar.com']
507 |   ],
508 |   [
509 |     'https://example.com/dir.',
510 |     ['https://example.com/dir.'],
511 |     ['https://example.com/dir']
512 |   ],
513 |   [
514 |     'https://example.com/dir. ',
515 |     ['https://example.com/dir.'],
516 |     ['https://example.com/dir']
517 |   ],
518 |   [
519 |     'https://example.com/dir.\n',
520 |     ['https://example.com/dir.'],
521 |     ['https://example.com/dir']
522 |   ],
523 |   [
524 |     'https://example.com/index.html',
525 |     ['https://example.com/index.html'],
526 |     ['https://example.com/index.html']
527 |   ],
528 |   [
529 |     'https://example.com/index.html.',
530 |     ['https://example.com/index.html.'],
531 |     ['https://example.com/index.html']
532 |   ],
533 |   [
534 |     'https://example.com/dir.with.dot/.',
535 |     ['https://example.com/dir.with.dot/.'],
536 |     ['https://example.com/dir.with.dot/']
537 |   ],
538 |   // Question marks
539 |   ['Have you ever visited example.com?', ['example.com?'], ['example.com']],
540 |   ['example.com/?', ['example.com/?'], ['example.com/']],
541 |   [
542 |     'https://example.com/dir?',
543 |     ['https://example.com/dir?'],
544 |     ['https://example.com/dir']
545 |   ],
546 |   // Exclamation marks
547 |   ['You should check out example.com!', ['example.com'], ['example.com']],
548 |   ['Here is example.com/!', ['example.com/!'], ['example.com/']],
549 |   [
550 |     'https://example.com/dir/!',
551 |     ['https://example.com/dir/!'],
552 |     ['https://example.com/dir/']
553 |   ],
554 |   [
555 |     'https://example.com/dir!',
556 |     ['https://example.com/dir!'],
557 |     ['https://example.com/dir']
558 |   ]
559 | ]) {
560 |   const sourceTitle = source.replace('\n', '\\n');
561 | 
562 |   test(`trailingPeriod: true (${sourceTitle})`, (t) => {
563 |     t.deepEqual(
564 |       source.match(urlRegex({ trailingPeriod: true })),
565 |       withTrailingPeriod
566 |     );
567 |   });
568 | 
569 |   test(`trailingPeriod: false (${sourceTitle})`, (t) => {
570 |     t.deepEqual(
571 |       source.match(urlRegex({ trailingPeriod: false })),
572 |       withoutTrailingPeriod
573 |     );
574 |   });
575 | }
576 | 


--------------------------------------------------------------------------------