├── .eslintignore
├── .gitattributes
├── tests
    ├── perf
    │   ├── .eslintrc.js
    │   ├── index.html
    │   └── perf.js
    ├── vendor
    │   └── benchmark.js
    │   │   ├── nano.jar
    │   │   ├── LICENSE.txt
    │   │   └── README.md
    ├── helpers
    │   ├── .eslintrc.js
    │   ├── h.js
    │   ├── h-matchers.js
    │   └── h-unicode.js
    ├── jasmine.json
    ├── .eslintrc.js
    ├── index.html
    └── spec
    │   ├── s-addons-build.js
    │   └── s-addons-matchrecursive.js
├── types
    ├── tslint.json
    ├── tsconfig.json
    └── test.ts
├── .editorconfig
├── tools
    ├── scripts
    │   ├── .eslintrc.js
    │   ├── script-regex.js
    │   ├── property-regex.js
    │   ├── category-regex.js
    │   └── utils.js
    └── output
    │   └── properties.js
├── .github
    └── workflows
    │   └── build.yml
├── .gitignore
├── .babelrc
├── src
    ├── index.js
    └── addons
    │   ├── unicode-scripts.js
    │   ├── unicode-categories.js
    │   ├── unicode-properties.js
    │   ├── build.js
    │   ├── matchrecursive.js
    │   └── unicode-base.js
├── LICENSE
├── package.json
├── docs
    ├── assets
    │   └── index.css
    ├── unicode
    │   └── index.html
    ├── index.html
    ├── syntax
    │   ├── named_capture_comparison
    │   │   └── index.html
    │   └── index.html
    └── flags
    │   └── index.html
├── .eslintrc.js
└── README.md


/.eslintignore:
--------------------------------------------------------------------------------
1 | xregexp-all.js
2 | lib
3 | tests/perf/versions
4 | tests/vendor
5 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Automatically normalize line endings for all text-based files
2 | * text=auto eol=lf
3 | 


--------------------------------------------------------------------------------
/tests/perf/.eslintrc.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |     "globals": {
3 |         "Benchmark": true
4 |     }
5 | };
6 | 


--------------------------------------------------------------------------------
/tests/vendor/benchmark.js/nano.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/slevithan/xregexp/HEAD/tests/vendor/benchmark.js/nano.jar


--------------------------------------------------------------------------------
/types/tslint.json:
--------------------------------------------------------------------------------
1 | {
2 |     "extends": "dtslint/dtslint.json",
3 |     "rules": {
4 |         "no-inferrable-types": false
5 |     }
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/helpers/.eslintrc.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |     "rules": {
3 |         "global-require": 0,
4 |         "no-global-assign": 0,
5 |         "no-native-reassign": 0
6 |     }
7 | };
8 | 


--------------------------------------------------------------------------------
/tests/jasmine.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "spec_dir": "tests",
 3 |   "spec_files": [
 4 |     "spec/**/*.js"
 5 |   ],
 6 |   "helpers": [
 7 |     "helpers/**/*.js"
 8 |   ],
 9 |   "stopSpecOnExpectationFailure": false,
10 |   "random": false
11 | }
12 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | charset = utf-8
 5 | indent_style = space
 6 | indent_size = 4
 7 | end_of_line = lf
 8 | insert_final_newline = true
 9 | trim_trailing_whitespace = true
10 | max_line_length = 100
11 | 
12 | [*.yml]
13 | indent_size = 2
14 | 


--------------------------------------------------------------------------------
/tools/scripts/.eslintrc.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |     "rules": {
 3 |         "global-require": "off",
 4 |         "no-console": "off",
 5 |         "no-sync": "off",
 6 |         "func-style": [
 7 |             "error",
 8 |             "declaration",
 9 |             {"allowArrowFunctions": true}
10 |         ]
11 |     }
12 | };
13 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Node.js CI
 2 | on:
 3 |   - push
 4 |   - pull_request
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |     - uses: actions/checkout@v6
10 |     - name: Use Node.js
11 |       uses: actions/setup-node@v6
12 |       with:
13 |         node-version: 20
14 |     - run: npm install
15 |     - run: npm test
16 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | lib
 2 | xregexp-all.js
 3 | .nyc_output
 4 | coverage
 5 | 
 6 | # Compiled Python files
 7 | *.pyc
 8 | 
 9 | # Installed npm modules
10 | node_modules
11 | 
12 | # Folder view configuration files
13 | .DS_Store
14 | Desktop.ini
15 | 
16 | # Thumbnail cache files
17 | ._*
18 | Thumbs.db
19 | 
20 | # Files that might appear on external disks
21 | .Spotlight-V100
22 | .Trashes
23 | 


--------------------------------------------------------------------------------
/types/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |         "module": "commonjs",
 4 |         "lib": [
 5 |             "es6"
 6 |         ],
 7 |         "noImplicitAny": true,
 8 |         "noImplicitThis": true,
 9 |         "strictNullChecks": true,
10 |         "strictFunctionTypes": true,
11 |         "noEmit": true,
12 |         "forceConsistentCasingInFileNames": true,
13 |         "baseUrl": ".",
14 |         "types": [],
15 |         "paths": { "xregexp": ["."] }
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/.babelrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "plugins": [
 3 |     ["@babel/plugin-transform-unicode-property-regex", { "useUnicodeFlag": false }],
 4 |     [
 5 |       "@babel/plugin-transform-runtime",
 6 |       {
 7 |         "corejs": 3
 8 |       }
 9 |     ],
10 |     "add-module-exports",
11 |     "transform-xregexp",
12 |     "array-includes"
13 |   ],
14 |   "presets": [
15 |     [
16 |       "@babel/env", {
17 |         "exclude": [
18 |           "transform-literals",
19 |           "transform-sticky-regex"
20 |         ]
21 |       }
22 |     ]
23 |   ]
24 | }
25 | 


--------------------------------------------------------------------------------
/tools/scripts/script-regex.js:
--------------------------------------------------------------------------------
 1 | const {
 2 |     assemble,
 3 |     writeFile,
 4 |     unicodeVersion
 5 | } = require('./utils.js');
 6 | 
 7 | const scripts = require(`${unicodeVersion}`).Script;
 8 | 
 9 | const result = [];
10 | for (const script of scripts) {
11 |     if (script === 'Unknown') {
12 |         continue;
13 |     }
14 |     const codePoints = require(`${unicodeVersion}/Script/${script}/code-points.js`);
15 |     result.push(assemble({
16 |         name: script,
17 |         codePoints
18 |     }));
19 | }
20 | writeFile('scripts.js', result);
21 | 


--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
 1 | import XRegExp from './xregexp';
 2 | 
 3 | import build from './addons/build';
 4 | import matchRecursive from './addons/matchrecursive';
 5 | import unicodeBase from './addons/unicode-base';
 6 | import unicodeCategories from './addons/unicode-categories';
 7 | import unicodeProperties from './addons/unicode-properties';
 8 | import unicodeScripts from './addons/unicode-scripts';
 9 | 
10 | build(XRegExp);
11 | matchRecursive(XRegExp);
12 | unicodeBase(XRegExp);
13 | unicodeCategories(XRegExp);
14 | unicodeProperties(XRegExp);
15 | unicodeScripts(XRegExp);
16 | 
17 | export default XRegExp;
18 | 


--------------------------------------------------------------------------------
/src/addons/unicode-scripts.js:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * XRegExp Unicode Scripts 5.1.2
 3 |  * <xregexp.com>
 4 |  * Steven Levithan (c) 2010-present MIT License
 5 |  * Unicode data by Mathias Bynens <mathiasbynens.be>
 6 |  */
 7 | 
 8 | import scripts from '../../tools/output/scripts';
 9 | 
10 | export default (XRegExp) => {
11 | 
12 |     /**
13 |      * Adds support for all Unicode scripts. E.g., `\p{Latin}`. Token names are case insensitive,
14 |      * and any spaces, hyphens, and underscores are ignored.
15 |      *
16 |      * Uses Unicode 14.0.0.
17 |      *
18 |      * @requires XRegExp, Unicode Base
19 |      */
20 | 
21 |     if (!XRegExp.addUnicodeData) {
22 |         throw new ReferenceError('Unicode Base must be loaded before Unicode Scripts');
23 |     }
24 | 
25 |     XRegExp.addUnicodeData(scripts, 'Script');
26 | };
27 | 


--------------------------------------------------------------------------------
/tests/.eslintrc.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |     "env": {
 3 |         "jasmine": true
 4 |     },
 5 |     "globals": {
 6 |         "XRegExp": true,
 7 |         "resetFeatures": true,
 8 |         "REGEX_DATA": true,
 9 |         "hasNativeD": true,
10 |         "hasNativeS": true,
11 |         "hasNativeU": true,
12 |         "hasNativeY": true,
13 |         "hasStrictMode": true,
14 |         "testUnicodeToken": true
15 |     },
16 |     "rules": {
17 |         "brace-style": 0,
18 |         "dot-location": 0,
19 |         "key-spacing": 0,
20 |         "no-control-regex": 0,
21 |         "no-empty-function": 0,
22 |         "no-loop-func": 0,
23 |         "no-multi-assign": 0,
24 |         "no-multi-spaces": 0,
25 |         "no-template-curly-in-string": 0,
26 |         "no-useless-call": 0,
27 |         "no-warning-comments": 0,
28 |         "object-property-newline": 0
29 |     }
30 | };
31 | 


--------------------------------------------------------------------------------
/src/addons/unicode-categories.js:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * XRegExp Unicode Categories 5.1.2
 3 |  * <xregexp.com>
 4 |  * Steven Levithan (c) 2010-present MIT License
 5 |  * Unicode data by Mathias Bynens <mathiasbynens.be>
 6 |  */
 7 | 
 8 | import categories from '../../tools/output/categories';
 9 | 
10 | export default (XRegExp) => {
11 | 
12 |     /**
13 |      * Adds support for Unicode's general categories. E.g., `\p{Lu}` or `\p{Uppercase Letter}`. See
14 |      * category descriptions in UAX #44 <http://unicode.org/reports/tr44/#GC_Values_Table>. Token
15 |      * names are case insensitive, and any spaces, hyphens, and underscores are ignored.
16 |      *
17 |      * Uses Unicode 14.0.0.
18 |      *
19 |      * @requires XRegExp, Unicode Base
20 |      */
21 | 
22 |     if (!XRegExp.addUnicodeData) {
23 |         throw new ReferenceError('Unicode Base must be loaded before Unicode Categories');
24 |     }
25 | 
26 |     XRegExp.addUnicodeData(categories);
27 | };
28 | 


--------------------------------------------------------------------------------
/tools/scripts/property-regex.js:
--------------------------------------------------------------------------------
 1 | const {
 2 |     assemble,
 3 |     writeFile,
 4 |     unicodeVersion
 5 | } = require('./utils.js');
 6 | 
 7 | // This includes only the binary properties required by UTS18 RL1.2 for level 1 Unicode regex
 8 | // support, minus `Assigned` which has special handling since it is the inverse of Unicode category
 9 | // `Unassigned`. To include all binary properties, change this to:
10 | // `const properties = require(`${unicodeVersion}`).Binary_Property;`
11 | const properties = [
12 |     'ASCII',
13 |     'Alphabetic',
14 |     'Any',
15 |     'Default_Ignorable_Code_Point',
16 |     'Lowercase',
17 |     'Noncharacter_Code_Point',
18 |     'Uppercase',
19 |     'White_Space'
20 | ];
21 | 
22 | const result = [];
23 | for (const property of properties) {
24 |     const codePoints = require(`${unicodeVersion}/Binary_Property/${property}/code-points.js`);
25 |     result.push(assemble({
26 |         name: property,
27 |         codePoints
28 |     }));
29 | }
30 | writeFile('properties.js', result);
31 | 


--------------------------------------------------------------------------------
/tools/scripts/category-regex.js:
--------------------------------------------------------------------------------
 1 | const aliasesToNames = require('unicode-property-value-aliases').get('General_Category');
 2 | 
 3 | const namesToAliases = new Map();
 4 | for (const [alias, name] of aliasesToNames) {
 5 |     if (!namesToAliases.has(name) || namesToAliases.get(name).length > name) {
 6 |         namesToAliases.set(name, alias);
 7 |     }
 8 | }
 9 | 
10 | const {
11 |     assemble,
12 |     writeFile,
13 |     unicodeVersion
14 | } = require('./utils.js');
15 | 
16 | const categories = require(`${unicodeVersion}`).General_Category;
17 | 
18 | const aliases = [];
19 | for (const category of categories) {
20 |     const alias = namesToAliases.get(category);
21 |     aliases.push({
22 |         alias,
23 |         category
24 |     });
25 | }
26 | aliases.sort(function(a, b) {
27 |     return a.alias < b.alias ? -1 : 1;
28 | });
29 | 
30 | const result = [];
31 | for (const {alias, category} of aliases) {
32 |     const codePoints = require(`${unicodeVersion}/General_Category/${category}/code-points.js`);
33 |     result.push(assemble({
34 |         name: alias,
35 |         alias: category,
36 |         codePoints
37 |     }));
38 | }
39 | writeFile('categories.js', result);
40 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2007-present Steven Levithan <https://xregexp.com/>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/tests/helpers/h.js:
--------------------------------------------------------------------------------
 1 | if (typeof global === 'undefined') {
 2 |     global = window;
 3 | } else {
 4 |     global.XRegExp = require('../../xregexp-all');
 5 | }
 6 | 
 7 | // Ensure that all features are reset to default when each spec starts
 8 | global.resetFeatures = function() {
 9 |     XRegExp.uninstall('astral');
10 |     XRegExp.install('namespacing');
11 | };
12 | 
13 | // Property name used for extended regex instance data
14 | global.REGEX_DATA = 'xregexp';
15 | 
16 | // Check for ES2021 `d` flag support
17 | global.hasNativeD = XRegExp._hasNativeFlag('d');
18 | // Check for ES2018 `s` flag support
19 | global.hasNativeS = XRegExp._hasNativeFlag('s');
20 | // Check for ES6 `u` flag support
21 | global.hasNativeU = XRegExp._hasNativeFlag('u');
22 | // Check for ES6 `y` flag support
23 | global.hasNativeY = XRegExp._hasNativeFlag('y');
24 | // Check for strict mode support
25 | global.hasStrictMode = (function() {
26 |     'use strict';
27 | 
28 |     return !this;
29 | }());
30 | 
31 | // Naive polyfill of String.prototype.repeat
32 | if (!String.prototype.repeat) {
33 |     String.prototype.repeat = function(count) {
34 |         return count ? Array(count + 1).join(this) : '';
35 |     };
36 | }
37 | 


--------------------------------------------------------------------------------
/tests/vendor/benchmark.js/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright 2010-2016 Mathias Bynens <https://mathiasbynens.be/>
 2 | Based on JSLitmus.js, copyright Robert Kieffer <http://broofa.com/>
 3 | Modified by John-David Dalton <http://allyoucanleet.com/>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/tests/helpers/h-matchers.js:
--------------------------------------------------------------------------------
 1 | if (typeof global === 'undefined') {
 2 |     global = window;
 3 | }
 4 | 
 5 | global.addToEqualMatchMatcher = function() {
 6 |     jasmine.addMatchers({
 7 |         // Similar to toEqual with arrays, but ignores custom properties of arrays. Useful when
 8 |         // comparing regex matches with array literals.
 9 |         toEqualMatch: function() {
10 |             return {
11 |                 compare: function(actual, expected) {
12 |                     var isA = jasmine.isA_;
13 |                     var result = {};
14 | 
15 |                     if (isA('Array', actual)) {
16 |                         if (!isA('Array', expected) || actual.length !== expected.length) {
17 |                             result.pass = false;
18 |                         } else {
19 |                             for (var i = 0; i < actual.length; ++i) {
20 |                                 if (actual[i] !== expected[i]) {
21 |                                     result.pass = false;
22 |                                 }
23 |                             }
24 |                             if (result.pass === undefined) {
25 |                                 result.pass = true;
26 |                             }
27 |                         }
28 |                     } else {
29 |                         result.pass = false;
30 |                     }
31 | 
32 |                     return result;
33 |                 }
34 |             };
35 |         }
36 |     });
37 | };
38 | 


--------------------------------------------------------------------------------
/tests/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |     <title>XRegExp Specs</title>
 7 | 
 8 |     <!-- Jasmine -->
 9 |     <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jasmine/jasmine@5.12.0/lib/jasmine-core/jasmine.css">
10 |     <script src="https://cdn.jsdelivr.net/gh/jasmine/jasmine@5.12.0/lib/jasmine-core/jasmine.js"></script>
11 |     <script src="https://cdn.jsdelivr.net/gh/jasmine/jasmine@5.12.0/lib/jasmine-core/jasmine-html.js"></script>
12 |     <script src="https://cdn.jsdelivr.net/gh/jasmine/jasmine@5.12.0/lib/jasmine-core/boot0.js"></script>
13 |     <script>
14 |         jasmine.getEnv().configure({random: false});
15 |     </script>
16 |     <script src="https://cdn.jsdelivr.net/gh/jasmine/jasmine@5.12.0/lib/jasmine-core/boot1.js"></script>
17 | 
18 |     <!-- XRegExp -->
19 |     <script src="../xregexp-all.js"></script>
20 | 
21 |     <!-- Helpers -->
22 |     <script src="helpers/h.js"></script>
23 |     <script src="helpers/h-matchers.js"></script>
24 |     <script src="helpers/h-unicode.js"></script>
25 | 
26 |     <!-- Specs -->
27 |     <script src="spec/s-xregexp.js"></script>
28 |     <script src="spec/s-xregexp-methods.js"></script>
29 |     <script src="spec/s-addons-build.js"></script>
30 |     <script src="spec/s-addons-matchrecursive.js"></script>
31 |     <script src="spec/s-addons-unicode.js"></script>
32 | 
33 |     <style>
34 |         #header {
35 |             font-family: Monaco, "Lucida Console", monospace;
36 |             font-size: 11px;
37 |             margin-bottom: 15px;
38 |         }
39 |     </style>
40 | </head>
41 | <body>
42 |     <p id="header">Note: A variety of syntax extensions are added by the specs on this page, so this isn't an ideal place to test XRegExp via the console.</p>
43 | </body>
44 | </html>
45 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "xregexp",
 3 |   "version": "5.1.2",
 4 |   "description": "Extended regular expressions",
 5 |   "homepage": "http://xregexp.com/",
 6 |   "author": "Steven Levithan <steves_list@hotmail.com>",
 7 |   "license": "MIT",
 8 |   "repository": {
 9 |     "type": "git",
10 |     "url": "https://github.com/slevithan/xregexp.git"
11 |   },
12 |   "keywords": [
13 |     "regex",
14 |     "regexp",
15 |     "regular expression",
16 |     "unicode"
17 |   ],
18 |   "browser": "./lib/index.js",
19 |   "main": "./lib/index.js",
20 |   "module": "./src/index.js",
21 |   "files": [
22 |     "src",
23 |     "lib",
24 |     "tools/output",
25 |     "xregexp-all.js",
26 |     "types/index.d.ts"
27 |   ],
28 |   "scripts": {
29 |     "lint": "eslint .",
30 |     "babel": "babel src -d lib",
31 |     "build-unicode-data": "node tools/scripts/category-regex.js && node tools/scripts/property-regex.js && node tools/scripts/script-regex.js",
32 |     "prebuild": "npm run build-unicode-data && npm run lint && npm run babel",
33 |     "build": "browserify lib/index.js --standalone XRegExp > xregexp-all.js",
34 |     "pretest": "npm run build",
35 |     "test": "nyc --reporter=lcov --reporter=text-summary jasmine JASMINE_CONFIG_PATH=tests/jasmine.json",
36 |     "prepublish": "npm test"
37 |   },
38 |   "types": "types/index.d.ts",
39 |   "devDependencies": {
40 |     "@babel/cli": "^7.28.3",
41 |     "@babel/core": "^7.28.5",
42 |     "@babel/plugin-transform-unicode-property-regex": "^7.27.1",
43 |     "@babel/plugin-transform-runtime": "^7.28.5",
44 |     "@babel/preset-env": "^7.28.5",
45 |     "@unicode/unicode-14.0.0": "^1.6.16",
46 |     "babel-plugin-add-module-exports": "^1.0.4",
47 |     "babel-plugin-array-includes": "^2.0.3",
48 |     "babel-plugin-transform-xregexp": "^1.0.0",
49 |     "browserify": "^17.0.1",
50 |     "eslint": "^8.57.1",
51 |     "jasmine": "^5.12.0",
52 |     "jsesc": "^3.1.0",
53 |     "nyc": "^17.1.0",
54 |     "unicode-property-value-aliases": "^3.9.0"
55 |   },
56 |   "dependencies": {
57 |     "@babel/runtime-corejs3": "^7.28.4"
58 |   }
59 | }
60 | 


--------------------------------------------------------------------------------
/src/addons/unicode-properties.js:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * XRegExp Unicode Properties 5.1.2
 3 |  * <xregexp.com>
 4 |  * Steven Levithan (c) 2012-present MIT License
 5 |  * Unicode data by Mathias Bynens <mathiasbynens.be>
 6 |  */
 7 | 
 8 | import properties from '../../tools/output/properties';
 9 | 
10 | export default (XRegExp) => {
11 | 
12 |     /**
13 |      * Adds properties to meet the UTS #18 Level 1 RL1.2 requirements for Unicode regex support. See
14 |      * <http://unicode.org/reports/tr18/#RL1.2>. Following are definitions of these properties from
15 |      * UAX #44 <http://unicode.org/reports/tr44/>:
16 |      *
17 |      * - Alphabetic
18 |      *   Characters with the Alphabetic property. Generated from: Lowercase + Uppercase + Lt + Lm +
19 |      *   Lo + Nl + Other_Alphabetic.
20 |      *
21 |      * - Default_Ignorable_Code_Point
22 |      *   For programmatic determination of default ignorable code points. New characters that should
23 |      *   be ignored in rendering (unless explicitly supported) will be assigned in these ranges,
24 |      *   permitting programs to correctly handle the default rendering of such characters when not
25 |      *   otherwise supported.
26 |      *
27 |      * - Lowercase
28 |      *   Characters with the Lowercase property. Generated from: Ll + Other_Lowercase.
29 |      *
30 |      * - Noncharacter_Code_Point
31 |      *   Code points permanently reserved for internal use.
32 |      *
33 |      * - Uppercase
34 |      *   Characters with the Uppercase property. Generated from: Lu + Other_Uppercase.
35 |      *
36 |      * - White_Space
37 |      *   Spaces, separator characters and other control characters which should be treated by
38 |      *   programming languages as "white space" for the purpose of parsing elements.
39 |      *
40 |      * The properties ASCII, Any, and Assigned are also included but are not defined in UAX #44. UTS
41 |      * #18 RL1.2 additionally requires support for Unicode scripts and general categories. These are
42 |      * included in XRegExp's Unicode Categories and Unicode Scripts addons.
43 |      *
44 |      * Token names are case insensitive, and any spaces, hyphens, and underscores are ignored.
45 |      *
46 |      * Uses Unicode 14.0.0.
47 |      *
48 |      * @requires XRegExp, Unicode Base
49 |      */
50 | 
51 |     if (!XRegExp.addUnicodeData) {
52 |         throw new ReferenceError('Unicode Base must be loaded before Unicode Properties');
53 |     }
54 | 
55 |     const unicodeData = properties;
56 | 
57 |     // Add non-generated data
58 |     unicodeData.push({
59 |         name: 'Assigned',
60 |         // Since this is defined as the inverse of Unicode category Cn (Unassigned), the Unicode
61 |         // Categories addon is required to use this property
62 |         inverseOf: 'Cn'
63 |     });
64 | 
65 |     XRegExp.addUnicodeData(unicodeData);
66 | };
67 | 


--------------------------------------------------------------------------------
/tests/perf/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>XRegExp Performance Tests</title>
 6 |     <style>
 7 |         body {
 8 |             font-family: Calibri, Verdana, Sans-serif;
 9 |             padding: 30px;
10 |             margin: 0;
11 |         }
12 |         applet {
13 |             position: absolute;
14 |             left: -9999px;
15 |         }
16 |     </style>
17 | </head>
18 | <body>
19 |     <div id="log"></div>
20 | 
21 |     <script>
22 |         // Load latest XRegExp script unless known old version provided in query param
23 |         (function() {
24 |             var match = /[?&]version=([^&]+)/.exec(location.search);
25 |             var version = match ? match[1] : null;
26 |             var isValidVersion = /^\d+\.\d+\.\d+(?:-\w+)?$/.test(version);
27 | 
28 |             if (version === null || version === 'latest') {
29 |                 document.write('<script src="../../xregexp-all.js"><\/script>');
30 |             } else if (isValidVersion) {
31 |                 // Hack around ES6 incompatibility in XRegExp versions prior to 3.0.0
32 |                 if (parseInt(version, 10) < 3) {
33 |                     delete RegExp.prototype.sticky;
34 |                 }
35 |                 document.write('<script src="https://unpkg.com/xregexp@' + version + '/xregexp-all.js"><\/script>');
36 |             }
37 |         }());
38 |     </script>
39 | 
40 |     <script src="../vendor/benchmark.js/lodash.js"></script>
41 |     <script src="../vendor/benchmark.js/platform.js"></script>
42 |     <script src="../vendor/benchmark.js/benchmark.js"></script>
43 |     <script src="perf.js"></script>
44 | 
45 |     <script>
46 |         // Load an applet for higher-precision timing if needed and not manually disabled.
47 |         // Adapted from Lo-Dash <http://lodash.com/> performance tests.
48 |         (function() {
49 |             if (/[?&]nojava=true(?:&|$)/.test(location.search)) {
50 |                 return;
51 |             }
52 |             var perf = window.performance,
53 |                 begin = new Date,
54 |                 measured;
55 |             // Check whether the applet is needed
56 |             while (!(measured = new Date - begin)) {
57 |                 // No-op
58 |             }
59 |             if (measured !== 1 && !(perf && (perf.now || perf.webkitNow))) {
60 |                 document.write('<applet code="nano" archive="../vendor/benchmark.js/nano.jar"></applet>');
61 |             }
62 |         }());
63 | 
64 |         if (typeof XRegExp === 'undefined') {
65 |             document.getElementById('log').innerHTML = 'Unable to find XRegExp version provided in query at &lt;unpkg.com&gt;';
66 |         } else {
67 |             // Global run function provided by perf script
68 |             onload = run;
69 |         }
70 |     </script>
71 | </body>
72 | </html>
73 | 


--------------------------------------------------------------------------------
/tests/vendor/benchmark.js/README.md:
--------------------------------------------------------------------------------
  1 | # Benchmark.js <sup>v2.1.1</sup>
  2 | 
  3 | A [robust](https://mathiasbynens.be/notes/javascript-benchmarking "Bulletproof JavaScript benchmarks") benchmarking library that supports high-resolution timers & returns statistically significant results. As seen on [jsPerf](https://jsperf.com/).
  4 | 
  5 | ## Documentation
  6 | 
  7 | * [API Documentation](https://benchmarkjs.com/docs)
  8 | 
  9 | ## Download
 10 | 
 11 |  * [Development source](https://raw.githubusercontent.com/bestiejs/benchmark.js/2.1.1/benchmark.js)
 12 | 
 13 | ## Installation
 14 | 
 15 | Benchmark.js’ only hard dependency is [lodash](https://lodash.com/).
 16 | Include [platform.js](https://mths.be/platform) to populate [Benchmark.platform](https://benchmarkjs.com/docs#platform).
 17 | 
 18 | In a browser:
 19 | 
 20 | ```html
 21 | <script src="lodash.js"></script>
 22 | <script src="platform.js"></script>
 23 | <script src="benchmark.js"></script>
 24 | ```
 25 | 
 26 | In an AMD loader:
 27 | 
 28 | ```js
 29 | require({
 30 |   'paths': {
 31 |     'benchmark': 'path/to/benchmark',
 32 |     'lodash': 'path/to/lodash',
 33 |     'platform': 'path/to/platform'
 34 |   }
 35 | },
 36 | ['benchmark'], function(Benchmark) {/*…*/});
 37 | ```
 38 | 
 39 | Using npm:
 40 | 
 41 | ```bash
 42 | $ npm i --save benchmark
 43 | ```
 44 | 
 45 | In Node.js:
 46 | 
 47 | ```js
 48 | var Benchmark = require('benchmark');
 49 | ```
 50 | 
 51 | Optionally, use the [microtime module](https://github.com/wadey/node-microtime) by Wade Simmons:
 52 | 
 53 | ```bash
 54 | npm i --save microtime
 55 | ```
 56 | 
 57 | Usage example:
 58 | 
 59 | ```js
 60 | var suite = new Benchmark.Suite;
 61 | 
 62 | // add tests
 63 | suite.add('RegExp#test', function() {
 64 |   /o/.test('Hello World!');
 65 | })
 66 | .add('String#indexOf', function() {
 67 |   'Hello World!'.indexOf('o') > -1;
 68 | })
 69 | // add listeners
 70 | .on('cycle', function(event) {
 71 |   console.log(String(event.target));
 72 | })
 73 | .on('complete', function() {
 74 |   console.log('Fastest is ' + this.filter('fastest').map('name'));
 75 | })
 76 | // run async
 77 | .run({ 'async': true });
 78 | 
 79 | // logs:
 80 | // => RegExp#test x 4,161,532 +-0.99% (59 cycles)
 81 | // => String#indexOf x 6,139,623 +-1.00% (131 cycles)
 82 | // => Fastest is String#indexOf
 83 | ```
 84 | 
 85 | ## Developing
 86 | 
 87 | The following `npm` tasks are available to assist during development and release:
 88 | 
 89 | - `npm run server` will start `live-server` and open the base directory in your browser; then you can, for example, browse to /example/jsperf/ to run the available tests in your browser using the local benchmark.js file. 
 90 | 
 91 | - `npm run test` -- nuff said.
 92 | 
 93 | - `npm run doc` -- will regenerate the documentation from source.
 94 | 
 95 | Also note that rough support for a test *catalog* is available for the `/example/jsperf/` demo: run `./build-jsperf.sh` to update the catalog file and then the next reload of the `/example/jsperf/index.html` page will show a clickable list of all available tests near the bottom so you can browse and jump from one test file/suite to another.
 96 | 
 97 | 
 98 | ## Support
 99 | 
100 | Tested in Chrome 46-47, Firefox 42-43, IE 9-11, Edge 13, Safari 8-9, Node.js 0.10-6, & PhantomJS 1.9.8.
101 | 
102 | ## BestieJS
103 | 
104 | Benchmark.js is part of the BestieJS *“Best in Class”* module collection. This means we promote solid browser/environment support, ES5+ precedents, unit testing, & plenty of documentation.
105 | 


--------------------------------------------------------------------------------
/docs/assets/index.css:
--------------------------------------------------------------------------------
 1 | body {font-family:Calibri, Tahoma, Verdana, Arial, Helvetica, sans-serif; font-size:85%; margin:0; padding:0; background:#fff;}
 2 | a:link, a:visited {color:#296e31; text-decoration:none;}
 3 | a:hover, a:active {color:#0a3716; text-decoration:underline;}
 4 | #header {padding:15px 15px 10px; border-bottom:3px solid #e3e3e3; background:#f3f3f3;}
 5 | #logoX {color:#999;}
 6 | #body {height:100%; padding:15px;}
 7 | #navBar {height:100%; width:200px; float:left;}
 8 | #main {height:100%; margin-left:200px;}
 9 | #footer {clear:both; border-top:3px solid #e3e3e3; padding:0 15px 20px;}
10 | #footnotes {margin-top:25px;}
11 | #tocContainer {float:right; background:#fff; padding:5px 0 20px 20px;}
12 | #toc {border:1px solid #aaa; padding:0 20px 8px;}
13 | #toc h2 {margin-top:15px;}
14 | #toc ul {padding-left:15px;}
15 | .small {font-size:80%;}
16 | .plain {font-weight:normal;}
17 | .alert {color:#900; font-weight:bold;}
18 | .todo {color:#c00; font-weight:bold;}
19 | .clear {clear:both;}
20 | h1 {margin-bottom:0; font-family:Cambria, Tahoma, Verdana, Arial, Helvetica, sans-serif;}
21 | h1 a:link, h1 a:visited, h1 a:active, h1 a:hover {color:#000; text-decoration:none;}
22 | h1.subtitle {margin-top:0; font-size:1.2em; font-weight:normal; font-family:Calibri, Tahoma, Verdana, Arial, Helvetica, sans-serif;}
23 | h2 {border-bottom:1px solid #aaa; margin-top:25px; font-family:Cambria, "Times New Roman", Times, serif; font-size:145%;}
24 | h2 code {border-bottom:0;}
25 | h2 code span.plain {font-size:90%;}
26 | h3 {margin:15px 0 10px; font-family:Cambria, "Times New Roman", Times, serif; font-size:125%; font-weight:normal;}
27 | pre {background:#fafafa; white-space:pre-wrap; font-family:Monaco, Consolas, "Courier New", Courier, monospace; border:1px solid #e3e3e3; padding:5px;}
28 | code {font-family:Monaco, Consolas, "Courier New", Courier, monospace; border:1px solid #eee; background:#f3f3f3;}
29 | cite {font-style:normal;}
30 | q {font-style:italic;}
31 | q:before, q:after {content:"";}
32 | mark {background:#ffc;}
33 | li {margin-bottom:1px; line-height:130%;}
34 | table {border-collapse:collapse; border-color:#888;}
35 | table ul {padding-left:20px; margin:0;}
36 | thead {background:#333; color:#f3f3f3;}
37 | th, td {border:solid #888; border-width:0 1px 1px 0; padding:5px;}
38 | tr.alt {background:#f3f3f3;}
39 | tr.alt code {background:#fafafa;}
40 | table.api {margin-left:20px;}
41 | table.api th, table.api td {border:0;}
42 | table.api tr.alt {background:#fff;}
43 | table.api tr.alt td {border-top:1px solid #ddd;}
44 | table.api tbody th {vertical-align:top; text-align:left; border-right:1px solid #ddd;}
45 | div.aside {border:3px double #ddd; background:#f6f6f6; padding:0 15px 15px; margin-bottom:15px;}
46 | div.aside p {margin:15px 0 0;}
47 | div.aside code {border:1px solid #ddd; background:#f6f6f6; padding:0 2px;}
48 | div.right {float:right; clear:right;}
49 | div.aside.right {width:300px; margin-left:15px;}
50 | a.footnoteLink {font-size:80%; color:#999;}
51 | tr.highlight {background:#bfdcff;}
52 | tr.highlight code {border-color:#99b9df; background:#b3ceef;}
53 | 
54 | .menu {
55 | 	width:180px;
56 | }
57 | .menu ul {
58 | 	list-style-type:none;
59 | 	margin:0;
60 | 	padding:0 0 10px 0;
61 | 	border:0 solid #a0df99;
62 | 	border-width:0 1px 1px 0;
63 | }
64 | .menu li a {
65 | 	font:italic 15px Georgia, "Times New Roman", Times, serif;
66 | 	display:block;
67 | 	height:24px;
68 | 	padding:4px 0 4px 10px;
69 | 	line-height:24px;
70 | 	text-decoration:none;
71 | }
72 | .menu li a:link, .menu li a:visited {
73 | 	color:#296e31;
74 | }
75 | .menu li a:hover {
76 | 	color:#0a3716;
77 | 	text-decoration:underline;
78 | }
79 | .menu li a.selected {
80 | 	color:#333; font-weight:bold;
81 | }
82 | a img {border:0;}
83 | 


--------------------------------------------------------------------------------
/docs/unicode/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <meta charset="utf-8" />
 5 |   <title>Unicode :: XRegExp</title>
 6 |   <link href="../assets/index.css" rel="stylesheet" type="text/css" />
 7 | </head>
 8 | <body>
 9 | <div id="header">
10 |   <h1><a href="../index.html"><span id="logoX">X</span>RegExp</a></h1>
11 |   <h1 class="subtitle">The one of a kind JavaScript regular expression library</h1>
12 | </div>
13 | <div id="body">
14 |   <div id="navBar">
15 |     <div class="menu">
16 |       <ul>
17 |         <li><a href="../index.html">Home</a></li>
18 |         <li><a href="../api/index.html">API</a></li>
19 |         <li><a href="../syntax/index.html">New syntax</a></li>
20 |         <li><a href="../flags/index.html">New flags</a></li>
21 |         <li><a href="../unicode/index.html" class="selected">Unicode</a></li>
22 |       </ul>
23 |     </div>
24 |   </div>
25 |   <div id="main">
26 | 
27 | 
28 | 
29 | 
30 | 
31 |     <h1>Unicode</h1>
32 | 
33 |     <p><strong>Requires the Unicode addons</strong>, which are bundled in <a href="https://unpkg.com/xregexp/xregexp-all.js"><code>xregexp-all.js</code></a>. Alternatively, you can download the individual addon scripts from GitHub. XRegExp's <a href="https://www.npmjs.com/package/xregexp">npm package</a> uses <code>xregexp-all.js</code>.</p>
34 | 
35 |     <p>The Unicode Base script adds base support for Unicode matching via the <code>\p{&hellip;}</code> syntax. &#192; la carte token addon packages add support for Unicode categories, scripts, and other properties. All Unicode tokens can be inverted using <code>\P{&hellip;}</code> or <code>\p{^&hellip;}</code>. Token names are case insensitive, and any spaces, hyphens, and underscores are ignored. You can omit the braces for token names that are a single letter.</p>
36 | 
37 |     <h3>Example</h3>
38 | <pre class="sh_javascript">// Categories
39 | XRegExp('\\p{Sc}\\pN+'); // Sc = currency symbol, N = number
40 | // Can also use the full names \p{Currency_Symbol} and \p{Number}
41 | 
42 | // Scripts
43 | XRegExp('\\p{Cyrillic}');
44 | XRegExp('[\\p{Latin}\\p{Common}]');
45 | // Can also use the Script= prefix to match ES2018: \p{Script=Cyrillic}
46 | 
47 | // Properties
48 | XRegExp('\\p{ASCII}');
49 | XRegExp('\\p{Assigned}');
50 | 
51 | // In action...
52 | 
53 | const unicodeWord = XRegExp("^\\pL+$"); // L = letter
54 | unicodeWord.test("&#x0420;&#x0443;&#x0441;&#x0441;&#x043A;&#x0438;&#x0439;"); <span class="sh_comment">// true</span>
55 | unicodeWord.test("&#x65E5;&#x672C;&#x8A9E;"); <span class="sh_comment">// true</span>
56 | unicodeWord.test("&#x0627;&#x0644;&#x0639;&#x0631;&#x0628;&#x064A;&#x0629;"); <span class="sh_comment">// true</span>
57 | 
58 | XRegExp("^\\p{Katakana}+$").test("&#x30AB;&#x30BF;&#x30AB;&#x30CA;"); <span class="sh_comment">// true</span>
59 | </pre>
60 | 
61 |     <p>By default, <code>\p{&hellip;}</code> and <code>\P{&hellip;}</code> support the Basic Multilingual Plane (i.e. code points up to <code>U+FFFF</code>). You can opt-in to full 21-bit Unicode support (with code points up to <code>U+10FFFF</code>) on a per-regex basis by using flag <code>A</code>. In XRegExp, this is called <em>astral mode</em>. You can automatically add flag <code>A</code> for all new regexes by running <code>XRegExp.install('astral')</code>. When in astral mode, <code>\p{&hellip;}</code> and <code>\P{&hellip;}</code> always match a full code point rather than a code unit, using surrogate pairs for code points above <code>U+FFFF</code>.</p>
62 | 
63 | <pre class="sh_javascript">// Using flag A to match astral code points
64 | XRegExp('^\\pS$').test('💩'); // -> false
65 | XRegExp('^\\pS$', 'A').test('💩'); // -> true
66 | // Using surrogate pair U+D83D U+DCA9 to represent U+1F4A9 (pile of poo)
67 | XRegExp('^\\pS$', 'A').test('\uD83D\uDCA9'); // -> true
68 | 
69 | // Implicit flag A
70 | XRegExp.install('astral');
71 | XRegExp('^\\pS$').test('💩'); // -> true
72 | </pre>
73 | 
74 |     <p>Opting in to astral mode disables the use of <code>\p{&hellip;}</code> and <code>\P{&hellip;}</code> within character classes. In astral mode, use e.g. <code>(\pL|[0-9_])+</code> instead of <code>[\pL0-9_]+</code>.</p>
75 | 
76 | 
77 | 
78 | 
79 | 
80 |   </div>
81 | </div>
82 | <div id="footer">
83 |   <p>&copy; <a href="https://slev.life/">Steven Levithan</a> :: <a href="https://github.com/slevithan/xregexp">GitHub</a> :: <a href="https://xregexp.com/">XRegExp.com</a></p>
84 | </div>
85 | </body>
86 | </html>
87 | 


--------------------------------------------------------------------------------
/tests/helpers/h-unicode.js:
--------------------------------------------------------------------------------
  1 | if (typeof global === 'undefined') {
  2 |     global = window;
  3 | }
  4 | 
  5 | /*
  6 |  * Runs a series of `expect` assertions, given a Unicode token name and arrays of code points that
  7 |  * should or should not be matched.
  8 |  */
  9 | global.testUnicodeToken = function(name, options) {
 10 |     var pattern = '^\\p{' + name + '}$';
 11 |     var negated = '^\\P{' + name + '}$';
 12 |     var astralRegex = XRegExp(pattern, 'A');
 13 |     var negatedAstralRegex = XRegExp(negated, 'A');
 14 |     var bmpRegex;
 15 |     var negatedBmpRegex;
 16 |     var isBmpChar;
 17 | 
 18 |     if (options.isAstralOnly) {
 19 |         expect(function() {XRegExp(pattern);}).toThrowError(SyntaxError);
 20 |         expect(function() {XRegExp(negated);}).toThrowError(SyntaxError);
 21 |     } else {
 22 |         bmpRegex = XRegExp(pattern);
 23 |         negatedBmpRegex = XRegExp(negated);
 24 |     }
 25 | 
 26 |     if (options.valid) {
 27 |         options.valid.forEach(function(chr) {
 28 |             expect(astralRegex.test(chr)).toBe(true);
 29 |             expect(negatedAstralRegex.test(chr)).toBe(false);
 30 |             if (!options.isAstralOnly) {
 31 |                 isBmpChar = chr.length === 1; //chr.codePointAt(0) === chr.charCodeAt(0)
 32 |                 expect(bmpRegex.test(chr)).toBe(isBmpChar);
 33 |                 expect(negatedBmpRegex.test(chr)).toBe(false);
 34 |             }
 35 |         });
 36 |     }
 37 | 
 38 |     if (options.invalid) {
 39 |         options.invalid.forEach(function(chr) {
 40 |             expect(astralRegex.test(chr)).toBe(false);
 41 |             expect(negatedAstralRegex.test(chr)).toBe(true);
 42 |             if (!options.isAstralOnly) {
 43 |                 isBmpChar = chr.length === 1; //chr.codePointAt(0) === chr.charCodeAt(0)
 44 |                 expect(bmpRegex.test(chr)).toBe(false);
 45 |                 expect(negatedBmpRegex.test(chr)).toBe(isBmpChar);
 46 |             }
 47 |         });
 48 |     }
 49 | };
 50 | 
 51 | 
 52 | /*!
 53 |  * ES6 Unicode Shims 0.1
 54 |  * Steven Levithan (c) 2012 MIT License
 55 |  */
 56 | 
 57 | /**
 58 |  * Returns a string created using the specified sequence of Unicode code points. Accepts integers
 59 |  * between 0 and 0x10FFFF. Code points above 0xFFFF are converted to surrogate pairs. If a provided
 60 |  * integer is in the surrogate range, it produces an unpaired surrogate. Comes from accepted ES6
 61 |  * proposals.
 62 |  * @memberOf String
 63 |  * @param {Number} cp1, cp2... Sequence of Unicode code points.
 64 |  * @returns {String} String created from the specified code points.
 65 |  * @example
 66 |  *
 67 |  * // Basic use
 68 |  * String.fromCodePoint(0x41); // -> 'A'
 69 |  *
 70 |  * // Multiple code points; returns astral characters as surrogate pairs
 71 |  * String.fromCodePoint(0x20B20, 0x28B4E, 0x29DF6);
 72 |  * // Unlike String.fromCharCode, this correctly handles code points above 0xFFFF
 73 |  */
 74 | if (!String.fromCodePoint) {
 75 |     String.fromCodePoint = function() {
 76 |         var chars = [],
 77 |             i, offset, point, units;
 78 |         for (i = 0; i < arguments.length; ++i) {
 79 |             point = arguments[i];
 80 |             offset = point - 0x10000;
 81 |             units = point > 0xFFFF ? [0xD800 + (offset >> 10), 0xDC00 + (offset & 0x3FF)] : [point];
 82 |             chars.push(String.fromCharCode.apply(null, units));
 83 |         }
 84 |         return chars.join("");
 85 |     };
 86 | }
 87 | 
 88 | /**
 89 |  * Returns the numeric Unicode code point of the character at the given index. Here `pos` is the
 90 |  * code *unit* position. If it's the second surrogate of a pair or an unpaired starting surrogate,
 91 |  * the code unit of the surrogate is returned; otherwise the code point is derived from the
 92 |  * surrogate pair. Comes from accepted ES6 proposals.
 93 |  * @memberOf String.prototype
 94 |  * @param {Number} [pos=0] Code point index in the string. Defaults to `0` if not a number.
 95 |  * @returns {Number} Code point at the specified index. `NaN` if the index is less than `0` or
 96 |  *   greater than the string length.
 97 |  * @example
 98 |  *
 99 |  * var str = String.fromCodePoint(166734);
100 |  * str.codePointAt(0); // -> 166734
101 |  * // Unlike the charCodeAt method, this correctly handles code points above 0xFFFF
102 |  */
103 | /*if (!String.prototype.codePointAt) {
104 |     String.prototype.codePointAt = function (pos) {
105 |         pos = isNaN(pos) ? 0 : pos;
106 |         var str = String(this),
107 |             code = str.charCodeAt(pos),
108 |             next = str.charCodeAt(pos + 1);
109 |         // If a surrogate pair
110 |         if (0xD800 <= code && code <= 0xDBFF && 0xDC00 <= next && next <= 0xDFFF) {
111 |             return ((code - 0xD800) * 0x400) + (next - 0xDC00) + 0x10000;
112 |         }
113 |         return code;
114 |     };
115 | }*/
116 | 


--------------------------------------------------------------------------------
/tools/scripts/utils.js:
--------------------------------------------------------------------------------
  1 | const fs = require('fs');
  2 | const jsesc = require('jsesc');
  3 | 
  4 | const pkg = require('../../package.json');
  5 | const dependencies = Object.keys(pkg.devDependencies);
  6 | const unicodeVersion = dependencies.find((name) => /^@unicode\/unicode-\d/.test(name));
  7 | 
  8 | // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  9 | const highSurrogate = (codePoint) => Math.floor((codePoint - 0x10000) / 0x400) + 0xD800;
 10 | 
 11 | const lowSurrogate = (codePoint) => ((codePoint - 0x10000) % 0x400) + 0xDC00;
 12 | 
 13 | const codePointToString = (codePoint) => {
 14 |     const string = String.fromCodePoint(codePoint);
 15 |     // Important: escape RegExp meta-characters.
 16 |     if (/[$()*+\-\./?\[\]^{|}]/.test(string)) {
 17 |         return `\\${string}`;
 18 |     }
 19 |     return string;
 20 | };
 21 | 
 22 | const createRange = (codePoints) => {
 23 |     // Does the range contain lone high surrogates?
 24 |     let isBmpLast = false;
 25 |     // Does the range contain astral code points?
 26 |     let hasAstralCodePoints = false;
 27 |     const bmp = [];
 28 |     const supplementary = new Map();
 29 |     for (const codePoint of codePoints) {
 30 |         if (codePoint >= 0xD800 && codePoint <= 0xDBFF) {
 31 |             isBmpLast = true;
 32 |             bmp.push(codePoint);
 33 |         } else if (codePoint <= 0xFFFF) {
 34 |             bmp.push(codePoint);
 35 |         } else { // It’s a supplementary code point.
 36 |             const hi = highSurrogate(codePoint);
 37 |             const lo = lowSurrogate(codePoint);
 38 |             if (supplementary.has(hi)) {
 39 |                 supplementary.get(hi).push(lo);
 40 |             } else {
 41 |                 supplementary.set(hi, [lo]);
 42 |             }
 43 |             hasAstralCodePoints = true;
 44 |         }
 45 |     }
 46 | 
 47 |     const supplementaryByLowRanges = new Map();
 48 |     for (const [hi, lo] of supplementary) {
 49 |         const key = createBmpRange(lo);
 50 |         if (supplementaryByLowRanges.has(key)) {
 51 |             supplementaryByLowRanges.get(key).push(hi);
 52 |         } else {
 53 |             supplementaryByLowRanges.set(key, [hi]);
 54 |         }
 55 |     }
 56 |     // `supplementaryDictByLowRanges` looks like this:
 57 |     // { 'low surrogate range': [list of high surrogates that have this exact low surrogate range] })
 58 | 
 59 |     const bmpRange = createBmpRange(bmp, {addBrackets: false});
 60 | 
 61 |     const buf = [];
 62 |     let astralRange = '';
 63 | 
 64 |     // [bmpRange (including orphaned high surrogates), astralRange, isBmpLast]
 65 |     if (hasAstralCodePoints) {
 66 |         for (const [lo, hi] of supplementaryByLowRanges) {
 67 |             buf.push(createBmpRange(hi) + lo);
 68 |         }
 69 |         astralRange = buf.join('|');
 70 |     }
 71 | 
 72 |     return {
 73 |         bmp: bmpRange,
 74 |         astral: astralRange,
 75 |         isBmpLast: isBmpLast && hasAstralCodePoints
 76 |     };
 77 | };
 78 | 
 79 | const createBmpRange = (r, {addBrackets} = {addBrackets: true}) => {
 80 |     if (r.length === 0) {return '';}
 81 | 
 82 |     const buf = [];
 83 |     let [start] = r;
 84 |     let [end] = r;
 85 |     let predict = start + 1;
 86 |     r = r.slice(1);
 87 | 
 88 |     let counter = 0;
 89 |     for (const code of r) {
 90 |         if (predict == code) {
 91 |             end = code;
 92 |             predict = code + 1;
 93 |             continue;
 94 |         } else {
 95 |             if (start == end) {
 96 |                 buf.push(codePointToString(start));
 97 |                 counter++;
 98 |             } else if (end == start + 1) {
 99 |                 buf.push(`${codePointToString(start)}${codePointToString(end)}`);
100 |                 counter += 2;
101 |             } else {
102 |                 buf.push(`${codePointToString(start)}-${codePointToString(end)}`);
103 |                 counter += 2;
104 |             }
105 |             start = code;
106 |             end = code;
107 |             predict = code + 1;
108 |         }
109 |     }
110 | 
111 |     if (start == end) {
112 |         buf.push(codePointToString(start));
113 |         counter++;
114 |     } else if (end == start + 1) {
115 |         buf.push(`${codePointToString(start)}${codePointToString(end)}`);
116 |         counter += 2;
117 |     } else {
118 |         buf.push(`${codePointToString(start)}-${codePointToString(end)}`);
119 |         counter += 2;
120 |     }
121 | 
122 |     const output = buf.join('');
123 |     if (!addBrackets || counter == 1) {
124 |         return output;
125 |     }
126 |     return `[${output}]`;
127 | };
128 | 
129 | const assemble = ({name, alias, codePoints}) => {
130 |     const {bmp, astral, isBmpLast} = createRange(codePoints);
131 |     const result = {name};
132 |     if (alias) {
133 |         result.alias = alias;
134 |     }
135 |     if (isBmpLast) {
136 |         result.isBmpLast = true;
137 |     }
138 |     if (bmp) {
139 |         result.bmp = bmp;
140 |     }
141 |     if (astral) {
142 |         result.astral = astral;
143 |     }
144 |     return result;
145 | };
146 | 
147 | const writeFile = (name, object) => {
148 |     console.log(`Saving ${name}…`);
149 |     const output = jsesc(object, {
150 |         compact: false,
151 |         indent: '    '
152 |     });
153 |     fs.writeFileSync(
154 |         `${__dirname}/../output/${name}`,
155 |         `module.exports = ${output};\n`
156 |     );
157 | };
158 | 
159 | module.exports = {
160 |     assemble,
161 |     writeFile,
162 |     unicodeVersion
163 | };
164 | 


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 | 	<meta charset="utf-8" />
  5 | 	<title>XRegExp</title>
  6 | 	<link href="./assets/index.css" rel="stylesheet" type="text/css" />
  7 | </head>
  8 | <body>
  9 | <div id="header">
 10 | 	<h1><a href="./index.html"><span id="logoX">X</span>RegExp</a></h1>
 11 | 	<h1 class="subtitle">The one of a kind JavaScript regular expression library</h1>
 12 | </div>
 13 | <div id="body">
 14 | 	<div id="navBar">
 15 | 		<div class="menu">
 16 | 			<ul>
 17 | 				<li><a href="./index.html" class="selected">Home</a></li>
 18 | 				<li><a href="./api/index.html">API</a></li>
 19 | 				<li><a href="./syntax/index.html">New syntax</a></li>
 20 | 				<li><a href="./flags/index.html">New flags</a></li>
 21 | 				<li><a href="./unicode/index.html">Unicode</a></li>
 22 | 			</ul>
 23 | 		</div>
 24 | 	</div>
 25 | 	<div id="main">
 26 | 
 27 | 
 28 | 
 29 | 
 30 | 
 31 |     <div id="tocContainer">
 32 |       <div id="toc">
 33 |         <ul style="margin-bottom:5px;">
 34 |           <li><a href="#what">What is it?</a></li>
 35 |           <li><a href="#features">Features</a></li>
 36 |           <li><a href="#performance">Performance</a></li>
 37 |           <li><a href="#installation">Installation and usage</a></li>
 38 |           <li><a href="#v5-breaking">v5 breaking change</a></li>
 39 |         </ul>
 40 |       </div>
 41 |     </div>
 42 | 
 43 |     <h2 id="what">What is it?</h2>
 44 | 
 45 |     <p>XRegExp provides augmented (and extensible) JavaScript regular expressions. You get modern syntax and flags beyond what browsers support natively. XRegExp is also a regex utility belt with tools to make your grepping and parsing easier, while freeing you from regex cross-browser inconsistencies and other annoyances.</p>
 46 | 
 47 |     <p>XRegExp supports all native ES6 regular expression syntax. It supports <mark>ES5+ browsers</mark> (including Internet Explorer 9+), and you can use it with <mark>Node.js</mark> or as a <mark>RequireJS</mark> module. Over the years, many of XRegExp's features have been adopted by new JavaScript standards (named capturing, Unicode properties/scripts/categories, flag <code>s</code>, sticky matching, etc.), so using XRegExp can be a way to extend these features into older browsers. It's released under the <a href="https://mit-license.org/">MIT License</a>.</p>
 48 | 
 49 |     <p><strong>XRegExp lets you write regexes like this:</strong></p>
 50 | 
 51 | <pre class="sh_javascript">// Using named capture and flag x (free-spacing and line comments)
 52 | const date = XRegExp(`(?&lt;year>  [0-9]{4} ) -?  # year
 53 |                       (?&lt;month> [0-9]{2} ) -?  # month
 54 |                       (?&lt;day>   [0-9]{2} )     # day`, 'x');
 55 | </pre>
 56 | 
 57 |     <p><strong>And do cool stuff like this:</strong></p>
 58 | 
 59 | <pre class="sh_javascript">// Using named backreferences...
 60 | XRegExp.exec('2021-02-23', date).groups.year;
 61 | // -> '2021'
 62 | XRegExp.replace('2021-02-23', date, '$&lt;month&gt;/$&lt;day&gt;/$&lt;year&gt;');
 63 | // -> '02/23/2021'
 64 | 
 65 | // Finding matches within matches, while passing forward and returning specific backreferences
 66 | const html = `&lt;a href="https://xregexp.com/api/">XRegExp&lt;/a>
 67 |               &lt;a href="https://www.google.com/">Google&lt;/a>`;
 68 | XRegExp.matchChain(html, [
 69 |   {regex: /&lt;a href="([^"]+)">/i, backref: 1},
 70 |   {regex: XRegExp('(?i)^https?://(?&lt;domain>[^/?#]+)'), backref: 'domain'}
 71 | ]);
 72 | // -> ['xregexp.com', 'www.google.com']
 73 | </pre>
 74 | 
 75 |     <p><strong>Check out more <a href="https://github.com/slevithan/xregexp/blob/master/README.md#usage-examples">usage examples on GitHub &#x21E8;</a>.</strong></p>
 76 | 
 77 |     <h2 id="features">Features</h2>
 78 | 
 79 |     <ul>
 80 |     	<li>Adds <a href="./syntax/index.html"><strong>new regex and replacement text syntax</strong></a>, including comprehensive support for <a href="./syntax/index.html#namedCapture"><strong>named capture</strong></a>.</li>
 81 |     	<li>Adds <a href="./flags/index.html"><strong>new regex flags</strong></a>: <code>s</code>, to make <a href="./flags/index.html#singleline"><strong>dot match all</strong></a> characters; <code>x</code>, for <a href="./flags/index.html#extended"><strong>free-spacing</strong></a> and line comments; <code>n</code>, for <a href="./flags/index.html#explicitCapture"><strong>explicit capture</strong></a> mode; and <code>A</code>, for <a href="./flags/index.html#astral"><strong>astral</strong></a> mode (full 21-bit Unicode matching).</li>
 82 |     	<li>Provides a <a href="./api/index.html"><strong>suite of functions</strong></a> that make complex regex processing easier.</li>
 83 |     	<li>Supports addons that add even more new regex syntax, flags, and methods. Offical addons support <a href="./unicode/index.html"><strong>Unicode</strong></a>, <a href="./api/index.html#matchRecursive"><strong>recursive matching</strong></a>, and <a href="./api/index.html#build"><strong>grammatical patterns</strong></a>.</li>
 84 |     </ul>
 85 | 
 86 |     <h2 id="performance">Performance</h2>
 87 | 
 88 |     <p>XRegExp compiles to native <code>RegExp</code> objects. Therefore regexes built with XRegExp perform just as fast as native regular expressions. There is a tiny extra cost when compiling a pattern for the first time.</p>
 89 | 
 90 |     <h2 id="installation">Installation and usage</h2>
 91 | 
 92 |     <p>In browsers (bundle XRegExp with all of its addons):</p>
 93 | 
 94 | <pre class="sh_html">&lt;script src="https://unpkg.com/xregexp/xregexp-all.js">&lt;/script>
 95 | </pre>
 96 | 
 97 |     <p>Using <a href="https://www.npmjs.com/">npm</a>:</p>
 98 | 
 99 | <pre class="sh_html">npm install xregexp
100 | </pre>
101 | 
102 |     <p>In <a href="https://nodejs.org/en/">Node.js</a>:</p>
103 | 
104 | <pre class="sh_javascript">const XRegExp = require('xregexp');
105 | </pre>
106 | 
107 |     <h2 id="v5-breaking">Named Capture Breaking Change in XRegExp 5</h2>
108 | 
109 |     <p>XRegExp 5 introduced a breaking change where named backreference properties now appear on the result's <code>groups</code> object (following ES2018), rather than directly on the result. To restore the old handling so you don't need to update old code, run the following line after importing XRegExp:</p>
110 | 
111 | <pre class="sh_javascript">XRegExp.uninstall('namespacing');
112 | </pre>
113 | 
114 |     <p>XRegExp 4.1.0 and later allow introducing the new behavior without upgrading to XRegExp 5 by running <code>XRegExp.install('namespacing')</code>.</p>
115 | 
116 |     <p>Following is the most commonly needed change to update code for the new behavior:</p>
117 | 
118 | <pre class="sh_javascript">// Change this
119 | const name = XRegExp.exec(str, regexWithNamedCapture).name;
120 | 
121 | // To this
122 | const name = XRegExp.exec(str, regexWithNamedCapture).groups.name;
123 | </pre>
124 | 
125 |     <p>See <a href="https://github.com/slevithan/xregexp/blob/master/README.md#usage-examples">the README on GitHub &#x21E8;</a> for more examples of using named capture with <code>XRegExp.exec</code> and <code>XRegExp.replace</code>.</p>
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 	</div>
132 | </div>
133 | <div id="footer">
134 | 	<p>&copy; <a href="https://slev.life/">Steven Levithan</a> :: <a href="https://github.com/slevithan/xregexp">GitHub</a> :: <a href="https://xregexp.com/">XRegExp.com</a></p>
135 | </div>
136 | </body>
137 | </html>
138 | 


--------------------------------------------------------------------------------
/tests/spec/s-addons-build.js:
--------------------------------------------------------------------------------
  1 | beforeEach(function() {
  2 |     global.resetFeatures();
  3 |     global.addToEqualMatchMatcher();
  4 | });
  5 | 
  6 | describe('XRegExp.build addon:', function() {
  7 | 
  8 |     describe('XRegExp.tag()', function() {
  9 | 
 10 |         it('should escape the metacharacters of interpolated strings', function() {
 11 |             var inner = '.html';
 12 |             var re = XRegExp.tag()`^index${inner}$`;
 13 | 
 14 |             expect(re.test('index.html')).toBe(true);
 15 |             expect(re.test('index-html')).toBe(false);
 16 |         });
 17 | 
 18 |         it('should rewrite the backreferences of interpolated regexes', function() {
 19 |             var inner = /(.)\1/;
 20 |             var re = XRegExp.tag()`^${inner}${inner}$`;
 21 | 
 22 |             expect(re.test('aabb')).toBe(true);
 23 |             expect(re.test('aaba')).toBe(false);
 24 |         });
 25 | 
 26 |         it('should treat interpolated strings as atomic tokens', function() {
 27 |             var inner = 'ab';
 28 |             var re = XRegExp.tag()`^${inner}+$`;
 29 | 
 30 |             expect(re.test('abab')).toBe(true);
 31 |             expect(re.test('abb')).toBe(false);
 32 |         });
 33 | 
 34 |         it('should treat interpolated regexes as atomic tokens', function() {
 35 |             var inner = /ab/;
 36 |             var re = XRegExp.tag()`^${inner}+$`;
 37 | 
 38 |             expect(re.test('abab')).toBe(true);
 39 |             expect(re.test('abb')).toBe(false);
 40 |         });
 41 | 
 42 |         it('should support the "x" flag', function() {
 43 |             var inner = /ab/;
 44 |             var re = XRegExp.tag('x')`
 45 |                 ^
 46 |                 ${inner}
 47 |                 +
 48 |                 $
 49 |             `;
 50 | 
 51 |             expect(re.test('abab')).toBe(true);
 52 |             expect(re.test('abb')).toBe(false);
 53 |         });
 54 | 
 55 |         it('should support the "n" flag', function() {
 56 |             var inner = XRegExp('(unnamed), (?<name>named)');
 57 |             var re = XRegExp.tag('n')`${inner}`;
 58 | 
 59 |             expect(re.exec('unnamed, named')[1]).toBe('named');
 60 |         });
 61 | 
 62 |         it('should support the "g" flag', function() {
 63 |             var inner = 'a';
 64 |             var re = XRegExp.tag('g')`${inner}`;
 65 | 
 66 |             expect('aaa'.match(re)).toEqual(['a', 'a', 'a']);
 67 |         });
 68 | 
 69 |         it('should allow `false` to be interpolated', function() {
 70 |             var inner = false;
 71 |             var re = XRegExp.tag()`^${inner}$`;
 72 | 
 73 |             expect(re.test('false')).toBe(true);
 74 |         });
 75 | 
 76 |         it('should allow unescaped character classes', function() {
 77 |             var re = XRegExp.tag()`\d`;
 78 | 
 79 |             expect(re.test('1')).toBe(true);
 80 |         });
 81 | 
 82 |         it('should work as described in the comment @example', function() {
 83 |             var h12 = /1[0-2]|0?[1-9]/;
 84 |             var h24 = /2[0-3]|[01][0-9]/;
 85 |             var hours = XRegExp.tag('x')`${h12} : | ${h24}`;
 86 |             var minutes = /^[0-5][0-9]$/;
 87 |             var time = XRegExp.tag('x')`^ ${hours} (?<minutes>${minutes}) $`;
 88 | 
 89 |             expect(time.test('10:59')).toBe(true);
 90 |             expect(XRegExp.exec('10:59', time).groups.minutes).toEqual('59');
 91 |         });
 92 | 
 93 |     });
 94 | 
 95 |     describe('XRegExp.build()', function() {
 96 | 
 97 |         it('should apply a mode modifier in the outer pattern to the full regex with interpolated values', function() {
 98 |             expect(XRegExp.build('(?x){{a}}', {a: /1 2/}).test('12')).toBe(true);
 99 |             // IE 7 and 8 (not 6 or 9) throw an Error rather than SyntaxError
100 |             expect(function() {XRegExp.build('(?x)({{a}})', {a: /#/});}).toThrow();
101 |         });
102 | 
103 |         it('should ignore newlines when using flag x', function() {
104 |             expect(XRegExp.build('(?x)\n', {}).test('')).toBe(true);
105 |             expect(XRegExp.build('\n', {}, 'x').test('')).toBe(true);
106 |             expect(XRegExp.build('{{sub}}', {sub: '\n'}, 'x').test('')).toBe(true);
107 |         });
108 | 
109 |         it('should apply a mode modifier with a native flag in the outer pattern to the final result', function() {
110 |             expect(XRegExp.build('(?m){{a}}', {a: /a/}).multiline).toBe(true);
111 |             expect(XRegExp.build('(?i){{a}}', {a: /a/}).ignoreCase).toBe(true);
112 |         });
113 | 
114 |         it('should throw an exception when a mode modifier with g or y is used in the outer pattern', function() {
115 |             expect(function() {XRegExp.build('(?g){{a}}', {a: /a/});}).toThrowError(SyntaxError);
116 |             expect(function() {XRegExp.build('(?y){{a}}', {a: /a/});}).toThrowError(SyntaxError);
117 |             expect(function() {XRegExp.build('(?migs){{a}}', {a: /a/});}).toThrowError(SyntaxError);
118 |         });
119 | 
120 |         it('should not interpolate named subpatterns within character classes', function() {
121 |             expect(XRegExp.build('^[{{a}}]$', {a: 'x'}).test('x')).toBe(false);
122 |             expect(XRegExp.build('^{{a}}[{{a}}]$', {a: 'x'}).test('x{')).toBe(true);
123 |         });
124 | 
125 |         it('should strip a leading ^ and trailing unescaped $ in subpatterns, when both are present', function() {
126 |             expect(XRegExp.build('{{x}}', {x: /^123$/}).test('01234')).toBe(true);
127 |             expect(XRegExp.build('{{x}}', {x: '^123$'}).test('01234')).toBe(true);
128 |             expect(
129 |                 XRegExp.build(
130 |                     ' (?#comment) {{sub}} ',
131 |                     {sub: XRegExp(' (?#comment) ^123$ ', 'x')},
132 |                     'x'
133 |                 ).test('01234')
134 |             ).toBe(true);
135 |         });
136 | 
137 |         it('should not strip a leading ^ and trailing unescaped $ in subpatterns, when both are not present', function() {
138 |             expect(XRegExp.build('{{x}}', {x: '^123'}).test('123')).toBe(true);
139 |             expect(XRegExp.build('{{x}}', {x: '^123'}).test('01234')).toBe(false);
140 |             expect(XRegExp.build('{{x}}', {x: '123$'}).test('123')).toBe(true);
141 |             expect(XRegExp.build('{{x}}', {x: '123$'}).test('01234')).toBe(false);
142 |         });
143 | 
144 |         it('should not strip a leading ^ and trailing unescaped $ in subpatterns, when both are present but not leading/trailing', function() {
145 |             expect(XRegExp.build('{{x}}', {x: '^1$'}).test('11')).toBe(true);
146 |             expect(XRegExp.build('{{x}}', {x: '^1$\\b'}).test('11')).toBe(false);
147 |         });
148 | 
149 |         it('should not strip a trailing escaped $ in subpatterns', function() {
150 |             expect(XRegExp.build('{{x}}', {x: '^123\\$'}).test('123$')).toBe(true);
151 |             expect(XRegExp.build('{{x}}', {x: '^123\\$'}).test('0123$4')).toBe(false);
152 |         });
153 | 
154 |         it('should support flag n with mixed named and unnamed groups', function() {
155 |             expect(function() {XRegExp.build('()(?<n>)\\k<n>', {}, 'n');}).not.toThrow();
156 |             expect(function() {XRegExp.build('{{a}}', {a: '()(?<n>)\\k<n>'}, 'n');}).not.toThrow();
157 |             expect(function() {XRegExp.build('()(?<x>)\\k<x>{{a}}', {a: '()(?<n>)\\k<n>'}, 'n');}).not.toThrow();
158 |         });
159 | 
160 |         // TODO: Add complete specs
161 | 
162 |         it('should pass the readme example', function() {
163 |             var time = XRegExp.build('(?x)^ {{hours}} ({{minutes}}) $', {
164 |                 hours: XRegExp.build('{{h12}} : | {{h24}}', {
165 |                     h12: /1[0-2]|0?[1-9]/,
166 |                     h24: /2[0-3]|[01][0-9]/
167 |                 }),
168 |                 minutes: /^[0-5][0-9]$/
169 |             });
170 | 
171 |             expect(time.test('10:59')).toBe(true);
172 |             expect(XRegExp.exec('10:59', time).groups.minutes).toBe('59');
173 |         });
174 | 
175 |         it('should pass a series of complex backreference rewrites', function() {
176 |             // Equivalent to: XRegExp('(?<n1>(?<yo>a)\\2)\\1(?<nX>(?<yo2>b)\\4)\\3()\\5\\1\\3\\k<nX>')
177 |             var built = XRegExp.build('({{n1}})\\1(?<nX>{{n2}})\\2()\\3\\1\\2\\k<nX>', {
178 |                 n1: XRegExp('(?<yo>a)\\1'),
179 |                 n2: XRegExp('(?<yo2>b)\\1')
180 |             });
181 |             var match = XRegExp.exec('aaaabbbbaabbbb', built);
182 | 
183 |             expect(match).toBeTruthy();
184 |             expect(match.groups.n1).toBe('aa');
185 |             expect(match.groups.n2).toBeUndefined();
186 |             expect(match.groups.nX).toBe('bb');
187 |             expect(match.groups.yo).toBe('a');
188 |             expect(match.groups.yo2).toBe('b');
189 |         });
190 | 
191 |     });
192 | 
193 | });
194 | 


--------------------------------------------------------------------------------
/tests/spec/s-addons-matchrecursive.js:
--------------------------------------------------------------------------------
  1 | beforeEach(function() {
  2 |     global.resetFeatures();
  3 |     global.addToEqualMatchMatcher();
  4 | });
  5 | 
  6 | describe('XRegExp.matchRecursive addon:', function() {
  7 | 
  8 |     describe('XRegExp.matchRecursive()', function() {
  9 | 
 10 |         it('should pass the readme example for basic usage', function() {
 11 |             const str = '(t((e))s)t()(ing)';
 12 |             expect(XRegExp.matchRecursive(str, '\\(', '\\)', 'g')).toEqual(['t((e))s', '', 'ing']);
 13 |         });
 14 | 
 15 |         it('should pass the readme example for extended information mode with valueNames', function() {
 16 |             const str = 'Here is <div> <div>an</div></div> example';
 17 |             expect(
 18 |                 XRegExp.matchRecursive(str, '<div\\s*>', '</div>', 'gi', {
 19 |                     valueNames: ['between', 'left', 'match', 'right']
 20 |                 }))
 21 |                 .toEqual([
 22 |                     {name: 'between', value: 'Here is ',       start: 0,  end: 8},
 23 |                     {name: 'left',    value: '<div>',          start: 8,  end: 13},
 24 |                     {name: 'match',   value: ' <div>an</div>', start: 13, end: 27},
 25 |                     {name: 'right',   value: '</div>',         start: 27, end: 33},
 26 |                     {name: 'between', value: ' example',       start: 33, end: 41}
 27 |                 ]);
 28 |         });
 29 | 
 30 |         it('should pass the readme example for omitting unneeded parts with null valueNames and using escapeChar', function() {
 31 |             const str = '...{1}.\\{{function(x,y){return {y:x}}}';
 32 |             expect(
 33 |                 XRegExp.matchRecursive(str, '{', '}', 'g', {
 34 |                     valueNames: ['literal', null, 'value', null],
 35 |                     escapeChar: '\\'
 36 |                 }))
 37 |                 .toEqual([
 38 |                     {name: 'literal', value: '...',  start: 0, end: 3},
 39 |                     {name: 'value',   value: '1',    start: 4, end: 5},
 40 |                     {name: 'literal', value: '.\\{', start: 6, end: 9},
 41 |                     {name: 'value',   value: 'function(x,y){return {y:x}}', start: 10, end: 37}
 42 |                 ]);
 43 |         });
 44 | 
 45 |         it('should pass the readme example for sticky mode via flag y', function() {
 46 |             const str = '<1><<<2>>><3>4<5>';
 47 |             expect(XRegExp.matchRecursive(str, '<', '>', 'gy')).toEqual(['1', '<<2>>', '3']);
 48 |         });
 49 | 
 50 |         it('should pass the readme example for unbalanced delimiters', function() {
 51 |             const str = 'Here is <div> <div>an</div> unbalanced example';
 52 |             expect(XRegExp.matchRecursive(str, '<div\\s*>', '</div>', 'gi', {
 53 |                 unbalanced: 'skip'
 54 |             })).toEqual(['an']);
 55 |         });
 56 | 
 57 |         it('should throw for unbalanced left delimiter in first match without flag g', function() {
 58 |             expect(function() {XRegExp.matchRecursive('<', '<', '>');}).toThrow();
 59 |             expect(function() {XRegExp.matchRecursive('<<>', '<', '>');}).toThrow();
 60 |         });
 61 | 
 62 |         it('should not throw for unbalanced left delimiter after first match without flag g', function() {
 63 |             expect(function() {XRegExp.matchRecursive('<><', '<', '>');}).not.toThrow();
 64 |         });
 65 | 
 66 |         it('should throw for unbalanced left delimiter anywhere in string with flag g', function() {
 67 |             expect(function() {XRegExp.matchRecursive('<', '<', '>', 'g');}).toThrow();
 68 |             expect(function() {XRegExp.matchRecursive('<<>', '<', '>', 'g');}).toThrow();
 69 |             expect(function() {XRegExp.matchRecursive('<><', '<', '>', 'g');}).toThrow();
 70 |             expect(function() {XRegExp.matchRecursive('.<.<>><', '<', '>', 'g');}).toThrow();
 71 |         });
 72 | 
 73 |         it('should throw for unbalanced right delimiter in first match without flag g', function() {
 74 |             expect(function() {XRegExp.matchRecursive('>', '<', '>');}).toThrow();
 75 |         });
 76 | 
 77 |         it('should not throw for unbalanced right delimiter after first match without flag g', function() {
 78 |             expect(function() {XRegExp.matchRecursive('<>>', '<', '>');}).not.toThrow();
 79 |         });
 80 | 
 81 |         it('should throw for unbalanced right delimiter anywhere in string with flag g', function() {
 82 |             expect(function() {XRegExp.matchRecursive('>', '<', '>', 'g');}).toThrow();
 83 |             expect(function() {XRegExp.matchRecursive('<>>', '<', '>', 'g');}).toThrow();
 84 |             expect(function() {XRegExp.matchRecursive('.<.<>>>', '<', '>', 'g');}).toThrow();
 85 |         });
 86 | 
 87 |         it('should handle unbalanced left delimiter with option unbalanced set to skip', function() {
 88 |             const matches = XRegExp.matchRecursive('<><<.>', '<', '>', 'g', {unbalanced: 'skip'});
 89 |             expect(matches).toEqual(['', '.']);
 90 |             const vnMatches = XRegExp.matchRecursive('<><<.>', '<', '>', 'g', {unbalanced: 'skip', valueNames: ['between', 'left', 'match', 'right']});
 91 |             expect(vnMatches).toEqual([
 92 |                 {name: 'left',    value: '<', start: 0, end: 1},
 93 |                 {name: 'match',   value: '',  start: 1, end: 1},
 94 |                 {name: 'right',   value: '>', start: 1, end: 2},
 95 |                 {name: 'between', value: '<', start: 2, end: 3},
 96 |                 {name: 'left',    value: '<', start: 3, end: 4},
 97 |                 {name: 'match',   value: '.', start: 4, end: 5},
 98 |                 {name: 'right',   value: '>', start: 5, end: 6}
 99 |             ]);
100 |         });
101 | 
102 |         it('should handle unbalanced right delimiter with option unbalanced set to skip', function() {
103 |             const matches = XRegExp.matchRecursive('.<>>', '<', '>', 'g', {unbalanced: 'skip'});
104 |             expect(matches).toEqual(['']);
105 |             const vnMatches = XRegExp.matchRecursive('.<>>', '<', '>', 'g', {unbalanced: 'skip', valueNames: ['between', 'left', 'match', 'right']});
106 |             expect(vnMatches).toEqual([
107 |                 {name: 'between', value: '.', start: 0, end: 1},
108 |                 {name: 'left',    value: '<', start: 1, end: 2},
109 |                 {name: 'match',   value: '',  start: 2, end: 2},
110 |                 {name: 'right',   value: '>', start: 2, end: 3},
111 |                 {name: 'between', value: '>', start: 3, end: 4}
112 |             ]);
113 |         });
114 | 
115 |         it('should handle unbalanced overlapping multichar left delimiter with option unbalanced set to skip', function() {
116 |             const matches = XRegExp.matchRecursive('<<<<.>>', '<<', '>>', 'g', {
117 |                 unbalanced: 'skip',
118 |                 valueNames: ['between', 'left', 'match', 'right']
119 |             });
120 |             expect(matches).toEqual([
121 |                 {name: 'between', value: '<<', start: 0, end: 2},
122 |                 {name: 'left',    value: '<<', start: 2, end: 4},
123 |                 {name: 'match',   value: '.',  start: 4, end: 5},
124 |                 {name: 'right',   value: '>>', start: 5, end: 7}
125 |             ]);
126 |         });
127 | 
128 |         it('should handle unbalanced overlapping multichar left delimiter with option unbalanced set to skip-lazy', function() {
129 |             const matches = XRegExp.matchRecursive('<<<<.>>', '<<', '>>', 'g', {
130 |                 unbalanced: 'skip-lazy',
131 |                 valueNames: ['between', 'left', 'match', 'right']
132 |             });
133 |             expect(matches).toEqual([
134 |                 {name: 'between', value: '<',  start: 0, end: 1},
135 |                 {name: 'left',    value: '<<', start: 1, end: 3},
136 |                 {name: 'match',   value: '<.', start: 3, end: 5},
137 |                 {name: 'right',   value: '>>', start: 5, end: 7}
138 |             ]);
139 |         });
140 | 
141 |         it('should handle zero-length delimiters', function() {
142 |             expect(XRegExp.matchRecursive('<>', '(?=<)', '$')).toEqual(['<>']);
143 |         });
144 | 
145 |         it('should handle unbalanced zero-length delimiters', function() {
146 |             expect(function() {XRegExp.matchRecursive('<>', '(?=.)', '(?:)');}).toThrow();
147 |             expect(XRegExp.matchRecursive('<>', '(?=.)', '(?:)', '', {unbalanced: 'skip'})).toEqual(['>']);
148 |         });
149 | 
150 |         it('should return an empty array if no matches', function() {
151 |             expect(XRegExp.matchRecursive('.', '<', '>')).toEqual([]);
152 |             expect(XRegExp.matchRecursive('.', '<', '>', 'g')).toEqual([]);
153 |             expect(
154 |                 XRegExp.matchRecursive('.', '<', '>', '', {
155 |                     valueNames: ['between', 'left', 'match', 'right']
156 |                 })
157 |             ).toEqual([]);
158 |             expect(
159 |                 XRegExp.matchRecursive('.', '<', '>', 'g', {
160 |                     valueNames: ['between', 'left', 'match', 'right']
161 |                 })
162 |             ).toEqual([]);
163 |         });
164 | 
165 |     });
166 | 
167 | });
168 | 


--------------------------------------------------------------------------------
/types/test.ts:
--------------------------------------------------------------------------------
  1 | import XRegExp = require('xregexp');
  2 | 
  3 | // ======================================================
  4 | // constructor
  5 | // ======================================================
  6 | let regex1: RegExp = /a/gi;
  7 | regex1 = XRegExp('/a/');
  8 | regex1 = XRegExp('/a/', 'gi');
  9 | regex1 = XRegExp(/a/gi);
 10 | regex1 = XRegExp(regex1, undefined);
 11 | 
 12 | // ======================================================
 13 | // XRegExp namespace
 14 | // ======================================================
 15 | 
 16 | //#region types
 17 | 
 18 | // TokenScope
 19 | let ts1: XRegExp.TokenScopeOption = 'default';
 20 | ts1 = 'class';
 21 | ts1 = 'all';
 22 | 
 23 | // MatchScope
 24 | let ms: XRegExp.MatchScope = 'one';
 25 | ms = 'all';
 26 | 
 27 | // TokenFlag
 28 | let tf: XRegExp.TokenFlag = 'A';
 29 | tf = 'a';
 30 | tf = '0';
 31 | tf = '_';
 32 | tf = '$';
 33 | 
 34 | // Feature
 35 | let fo: XRegExp.FeatureOptions = 'astral';
 36 | fo = 'namespacing';
 37 | fo = 'astral namespacing';
 38 | fo = 'namespacing astral';
 39 | fo = {};
 40 | fo = { astral: true };
 41 | fo = { namespacing: true };
 42 | fo = { astral: true, namespacing: true };
 43 | 
 44 | // Pattern
 45 | let pat: XRegExp.Pattern = '/a/';
 46 | pat = /a/gi;
 47 | 
 48 | // NamedGroups
 49 | let ng: XRegExp.NamedGroupsArray = {};
 50 | 
 51 | // MatchChainArray
 52 | const mca: XRegExp.MatchChainArray = [];
 53 | mca[0] = /a/gi;
 54 | 
 55 | // ReplacementValue
 56 | let rv: XRegExp.ReplacementValue = (s, args) => 'a';
 57 | rv = 'a';
 58 | 
 59 | // UnicodeCharacterRange
 60 | let ucr: XRegExp.UnicodeCharacterRange = { name: 'a', astral: 'a-z' };
 61 | ucr = { name: 'b', bmp: 'a-z' };
 62 | ucr = { name: 'b', inverseOf: 'a-z' };
 63 | 
 64 | //#endregion
 65 | 
 66 | //#region interfaces
 67 | 
 68 | // TokenOptions
 69 | const to: XRegExp.TokenOptions = {};
 70 | to.scope = ts1;
 71 | to.flag = tf;
 72 | to.optionalFlags = 'gi';
 73 | to.reparse = false;
 74 | to.leadChar = '_';
 75 | 
 76 | // NamedGroupsArray
 77 | ng = { name: 'string1', val: 'string2' };
 78 | const ng_str1: string = ng['name'] + ng['val'];
 79 | 
 80 | // MatchArray
 81 | class XRegExpMatchArrayImpl extends Array<string> implements XRegExp.MatchArray {
 82 |     constructor(...items: string[]) {
 83 |         super(...items);
 84 |         Object.setPrototypeOf(this, Object.create(XRegExpMatchArrayImpl.prototype));
 85 |     }
 86 |     groups?: XRegExp.NamedGroupsArray;
 87 |     input?: string;
 88 |     index?: number;
 89 | }
 90 | let ma: XRegExp.MatchArray = new XRegExpMatchArrayImpl('asdf', 'qwerty');
 91 | ma.index = 0;
 92 | ma.input = 'a';
 93 | ma.groups = ng;
 94 | ma['namedMatch'] = 'b';
 95 | const ma_str1: string | undefined = ma['namedMatch'] as string;
 96 | 
 97 | // ExecArray
 98 | class XRegExpExecArrayImpl extends Array<string> implements XRegExp.ExecArray {
 99 |     constructor(...items: string[]) {
100 |         super(...items);
101 |         Object.setPrototypeOf(this, Object.create(XRegExpExecArrayImpl.prototype));
102 |     }
103 |     groups?: XRegExp.NamedGroupsArray;
104 |     input = '';
105 |     index = 0;
106 | }
107 | const ea: XRegExp.ExecArray = new XRegExpExecArrayImpl('asdf', 'qwerty');
108 | ea.groups = ng;
109 | ma.index = 0;
110 | ma.input = 'a';
111 | ea['namedMatch'] = 'b';
112 | const ea_str1: string | undefined = ea['namedMatch'] as string;
113 | 
114 | // ChainArrayElement
115 | mca[1] = { regex: /a/gi, backref: 1 };
116 | mca[2] = { regex: /a/gi, backref: 'asdf' };
117 | 
118 | // MatchSubString
119 | class XRegExpMatchSubstringImpl extends String implements XRegExp.MatchSubString {
120 |     constructor(value?: any) {
121 |         super(value);
122 |         Object.setPrototypeOf(this, Object.create(XRegExpMatchSubstringImpl.prototype));
123 |     }
124 |     groups?: XRegExp.NamedGroupsArray;
125 | }
126 | const mss: XRegExp.MatchSubString = new XRegExpMatchSubstringImpl('asdf');
127 | mss.groups = ng;
128 | mss['namedMatch'] = 'b';
129 | const mss_str1: string | undefined = mss['namedMatch'] as string;
130 | 
131 | // ReplacementDetail
132 | let rd: XRegExp.ReplacementDetail = [/a/gi, rv];
133 | rd = [/a/gi, rv, null];
134 | rd = [/a/gi, rv, ms];
135 | rd = [/a/gi, rv, ms, 'undefined indexes will be ignored'];
136 | 
137 | // UnionOptions
138 | const uo: XRegExp.UnionOptions = {};
139 | uo.conjunction = null;
140 | uo.conjunction = 'or';
141 | uo.conjunction = 'none';
142 | 
143 | // MatchRecursiveOptions
144 | const mro: XRegExp.MatchRecursiveOptions = {};
145 | mro.escapeChar = null;
146 | mro.escapeChar = '\\';
147 | mro.valueNames = null;
148 | 
149 | // MatchRecursiveValueNames
150 | const mrvn: XRegExp.MatchRecursiveValueNames = [null, null, null, null, 'undefined indexes will be ignored'];
151 | mrvn[0] = 'pre';
152 | mrvn[1] = 'left';
153 | mrvn[2] = 'inside';
154 | mrvn[3] = 'right';
155 | mro.valueNames = mrvn;
156 | 
157 | // MatchRecursiveValueNameMatch
158 | const mrvnm: XRegExp.MatchRecursiveValueNameMatch = { name: 'a', value: 'a', start: 0, end: 1 };
159 | 
160 | // UnicodeCharacterRangeBase
161 | ucr.alias = 'asdf';
162 | ucr.isBmpLast = true;
163 | 
164 | //#endregion
165 | 
166 | //#region constants
167 | 
168 | const version: string =  XRegExp.version;
169 | 
170 | //#endregion
171 | 
172 | //#region methods
173 | 
174 | // addToken
175 | XRegExp.addToken(/a/gi, (m, s, f) => 'a');
176 | XRegExp.addToken(/b/gi, (m, s, f) => 'b', to);
177 | 
178 | // addUnicodeData
179 | XRegExp.addUnicodeData([ ucr ]);
180 | 
181 | // build
182 | regex1 = XRegExp.build('(?x)^ {{v1}}:{{v2}} $', { v1: /a/gi, v2: regex1 });
183 | regex1 = XRegExp.build('(?x)^ {{v1}}:{{v2}} $', { v1: /a/gi, v2: '/a/' }, 'gi');
184 | 
185 | // cache
186 | regex1 = XRegExp.cache('/a/', 'gi');
187 | 
188 | // escape
189 | const escape_str: string = XRegExp.escape('?<.abcde> asdf');
190 | 
191 | // exec
192 | let ean: XRegExp.ExecArray | null = XRegExp.exec('abcdefghijklm', /a/gi);
193 | ean = XRegExp.exec('abcdefghijklm', /a/gi, 0);
194 | ean = XRegExp.exec('abcdefghijklm', /a/gi, 0, true);
195 | ean = XRegExp.exec('abcdefghijklm', /a/gi, 0, 'sticky');
196 | 
197 | // forEach
198 | XRegExp.forEach('ab_ab_ab', /ab/gi, (m, i, s, r) => { /* do action */ });
199 | 
200 | // globalize
201 | regex1 = XRegExp.globalize(/a/gi);
202 | 
203 | // install
204 | XRegExp.install('astral');
205 | XRegExp.install('astral namespacing');
206 | XRegExp.install('namespacing');
207 | XRegExp.install('namespacing astral');
208 | XRegExp.install({});
209 | XRegExp.install({ astral: true });
210 | XRegExp.install({ namespacing: true });
211 | XRegExp.install({ astral: true, namespacing: true });
212 | 
213 | // isInstalled
214 | let ii_bool = XRegExp.isInstalled('astral');
215 | ii_bool = XRegExp.isInstalled('namespacing');
216 | 
217 | // isRegExp
218 | let ire_bool: boolean = XRegExp.isRegExp(/a/gi);
219 | ire_bool = XRegExp.isRegExp(null);
220 | ire_bool = XRegExp.isRegExp(undefined);
221 | ire_bool = XRegExp.isRegExp('a');
222 | ire_bool = XRegExp.isRegExp(0);
223 | ire_bool = XRegExp.isRegExp([]);
224 | ire_bool = XRegExp.isRegExp({});
225 | 
226 | // match
227 | const m_str: string|null = XRegExp.match('asdf', /a/gi, 'one');
228 | const m_strarr: string[] = XRegExp.match('asdf', /a/gi, 'all');
229 | const m_any: string|null|string[] = XRegExp.match('asdf', /a/gi);
230 | 
231 | // matchChain
232 | ma = XRegExp.matchChain('asdf', mca);
233 | 
234 | // matchRecursive
235 | let mr1: string[]
236 |     = XRegExp.matchRecursive('asdf', 'a', 'f');
237 | mr1 = XRegExp.matchRecursive('asdf', 'a', 'f', 'gi');
238 | let mr2: XRegExp.MatchRecursiveValueNameMatch[]
239 |     = XRegExp.matchRecursive('asdf', 'a', 'f', null, { valueNames: [ 'a', 'b', 'c', 'd' ] });
240 | mr2 = XRegExp.matchRecursive('asdf', 'a', 'f', 'gi', { valueNames: [ 'a', 'b', 'c', 'd' ] });
241 | 
242 | // replace
243 | let r_str: string = XRegExp.replace('asdf', '/a/', 'b');
244 | r_str = XRegExp.replace('asdf', /a/gi, (s, args) => 'a', 'all');
245 | r_str = XRegExp.replace('asdf', /a/gi, (s, args) => 'a', 'one');
246 | 
247 | // replaceEach
248 | const re_str: string = XRegExp.replaceEach('asdf', [ rd ]);
249 | 
250 | // split
251 | let s_strarr: string[] = XRegExp.split('asdf', '/a/');
252 | s_strarr = XRegExp.split('asdf', /a/gi, 2);
253 | 
254 | // tag
255 | let tag_re: RegExp = /a/g;
256 | tag_re = XRegExp.tag('i')`(asdf|${tag_re}|qwerty)`;
257 | 
258 | // test
259 | let t_bool: boolean = XRegExp.test('asdf', '/a/');
260 | t_bool = XRegExp.test('asdf', /a/gi, 3);
261 | t_bool = XRegExp.test('asdf', '/a/', undefined, true);
262 | t_bool = XRegExp.test('asdf', /a/gi, 1, 'sticky');
263 | 
264 | // uninstall
265 | XRegExp.uninstall('astral');
266 | XRegExp.uninstall('astral namespacing');
267 | XRegExp.uninstall('namespacing');
268 | XRegExp.uninstall('namespacing astral');
269 | XRegExp.uninstall({});
270 | XRegExp.uninstall({ astral: true });
271 | XRegExp.uninstall({ namespacing: true });
272 | XRegExp.uninstall({ astral: true, namespacing: true });
273 | 
274 | // union
275 | let u_re: RegExp = XRegExp.union([ '/a/', /b/gi, XRegExp(/a/gi) ]);
276 | u_re = XRegExp.union([ '/a/', /b/gi, XRegExp(/a/gi) ], null);
277 | u_re = XRegExp.union([ '/a/', /b/gi, XRegExp(/a/gi) ], 'gi');
278 | u_re = XRegExp.union([ '/a/', /b/gi, XRegExp(/a/gi) ], 'gi', { });
279 | u_re = XRegExp.union([ '/a/', /b/gi, XRegExp(/a/gi) ], 'gi', { conjunction: null });
280 | u_re = XRegExp.union([ '/a/', /b/gi, XRegExp(/a/gi) ], 'gi', { conjunction: 'or' });
281 | u_re = XRegExp.union([ '/a/', /b/gi, XRegExp(/a/gi) ], 'gi', { conjunction: 'none' });
282 | 
283 | //#endregion
284 | 


--------------------------------------------------------------------------------
/.eslintrc.js:
--------------------------------------------------------------------------------
  1 | module.exports = {
  2 |     "env": {
  3 |         "browser": true,
  4 |         "commonjs": true,
  5 |         "es6": true,
  6 |         "node": true
  7 |     },
  8 |     "extends": "eslint:recommended",
  9 |     "parserOptions": {
 10 |         "ecmaVersion": 9,
 11 |         "sourceType": "module"
 12 |     },
 13 |     "rules": {
 14 |         "accessor-pairs": "error",
 15 |         "array-bracket-spacing": [
 16 |             "error",
 17 |             "never"
 18 |         ],
 19 |         "array-callback-return": "error",
 20 |         "arrow-body-style": "error",
 21 |         "arrow-parens": "error",
 22 |         "arrow-spacing": "error",
 23 |         "block-scoped-var": "error",
 24 |         "block-spacing": [
 25 |             "error",
 26 |             "never"
 27 |         ],
 28 |         "brace-style": [
 29 |             "error",
 30 |             "1tbs",
 31 |             {
 32 |                 "allowSingleLine": true
 33 |             }
 34 |         ],
 35 |         "callback-return": "off",
 36 |         "camelcase": "error",
 37 |         "capitalized-comments": "off",
 38 |         "class-methods-use-this": "error",
 39 |         "comma-dangle": "error",
 40 |         "comma-spacing": [
 41 |             "error",
 42 |             {
 43 |                 "after": true,
 44 |                 "before": false
 45 |             }
 46 |         ],
 47 |         "comma-style": [
 48 |             "error",
 49 |             "last"
 50 |         ],
 51 |         "complexity": "off",
 52 |         "computed-property-spacing": [
 53 |             "error",
 54 |             "never"
 55 |         ],
 56 |         "consistent-return": "error",
 57 |         "consistent-this": "error",
 58 |         "curly": "error",
 59 |         "default-case": "error",
 60 |         "dot-location": "error",
 61 |         "dot-notation": "error",
 62 |         "eol-last": "error",
 63 |         "eqeqeq": "off",
 64 |         "func-call-spacing": "error",
 65 |         "func-name-matching": "error",
 66 |         "func-names": [
 67 |             "error",
 68 |             "never"
 69 |         ],
 70 |         "func-style": [
 71 |             "error",
 72 |             "declaration"
 73 |         ],
 74 |         "generator-star-spacing": "error",
 75 |         "global-require": "error",
 76 |         "guard-for-in": "off",
 77 |         "handle-callback-err": "error",
 78 |         "id-blacklist": "error",
 79 |         "id-length": "off",
 80 |         "id-match": "error",
 81 |         "indent": "error",
 82 |         "init-declarations": "off",
 83 |         "jsx-quotes": "error",
 84 |         "key-spacing": "error",
 85 |         "keyword-spacing": [
 86 |             "error",
 87 |             {
 88 |                 "after": true,
 89 |                 "before": true
 90 |             }
 91 |         ],
 92 |         "line-comment-position": "off",
 93 |         "linebreak-style": [
 94 |             "error",
 95 |             "unix"
 96 |         ],
 97 |         "lines-around-comment": "error",
 98 |         "lines-around-directive": "error",
 99 |         "max-depth": "error",
100 |         "max-len": "off",
101 |         "max-lines": "off",
102 |         "max-nested-callbacks": "error",
103 |         "max-params": "off",
104 |         "max-statements": "off",
105 |         "max-statements-per-line": "off",
106 |         "multiline-ternary": "off",
107 |         "new-parens": "error",
108 |         "newline-after-var": "off",
109 |         "newline-before-return": "off",
110 |         "newline-per-chained-call": "off",
111 |         "no-alert": "error",
112 |         "no-array-constructor": "error",
113 |         "no-await-in-loop": "error",
114 |         "no-bitwise": "off",
115 |         "no-caller": "error",
116 |         "no-catch-shadow": "error",
117 |         "no-compare-neg-zero": "error",
118 |         "no-cond-assign": [
119 |             "error",
120 |             "except-parens"
121 |         ],
122 |         "no-confusing-arrow": "error",
123 |         "no-constant-condition": [
124 |             "error",
125 |             {
126 |                 "checkLoops": false
127 |             }
128 |         ],
129 |         "no-continue": "off",
130 |         "no-div-regex": "error",
131 |         "no-duplicate-imports": "error",
132 |         "no-else-return": "error",
133 |         "no-empty-function": "error",
134 |         "no-eq-null": "off",
135 |         "no-eval": "error",
136 |         "no-extend-native": "off",
137 |         "no-extra-bind": "error",
138 |         "no-extra-label": "error",
139 |         "no-extra-parens": "off",
140 |         "no-floating-decimal": "error",
141 |         "no-implicit-coercion": [
142 |             "error",
143 |             {
144 |                 "boolean": false,
145 |                 "number": false,
146 |                 "string": false
147 |             }
148 |         ],
149 |         "no-implicit-globals": "error",
150 |         "no-implied-eval": "error",
151 |         "no-inline-comments": "off",
152 |         "no-inner-declarations": [
153 |             "error",
154 |             "functions"
155 |         ],
156 |         "no-invalid-this": "off",
157 |         "no-iterator": "error",
158 |         "no-label-var": "error",
159 |         "no-labels": "error",
160 |         "no-lone-blocks": "error",
161 |         "no-lonely-if": "error",
162 |         "no-loop-func": "error",
163 |         "no-magic-numbers": "off",
164 |         "no-mixed-operators": "error",
165 |         "no-mixed-requires": "error",
166 |         "no-multi-assign": "error",
167 |         "no-multi-spaces": "error",
168 |         "no-multi-str": "error",
169 |         "no-multiple-empty-lines": "error",
170 |         "no-native-reassign": "error",
171 |         "no-negated-condition": "error",
172 |         "no-negated-in-lhs": "error",
173 |         "no-nested-ternary": "off",
174 |         "no-new": "off",
175 |         "no-new-func": "error",
176 |         "no-new-object": "error",
177 |         "no-new-require": "error",
178 |         "no-new-wrappers": "off",
179 |         "no-octal-escape": "error",
180 |         "no-param-reassign": "off",
181 |         "no-path-concat": "error",
182 |         "no-plusplus": "off",
183 |         "no-process-env": "error",
184 |         "no-process-exit": "error",
185 |         "no-proto": "off",
186 |         "no-prototype-builtins": "off",
187 |         "no-restricted-globals": "error",
188 |         "no-restricted-imports": "error",
189 |         "no-restricted-modules": "error",
190 |         "no-restricted-properties": "error",
191 |         "no-restricted-syntax": "error",
192 |         "no-return-assign": [
193 |             "error",
194 |             "except-parens"
195 |         ],
196 |         "no-return-await": "error",
197 |         "no-script-url": "error",
198 |         "no-self-compare": "error",
199 |         "no-sequences": "error",
200 |         "no-shadow": "off",
201 |         "no-shadow-restricted-names": "error",
202 |         "no-spaced-func": "error",
203 |         "no-sync": "error",
204 |         "no-tabs": "error",
205 |         "no-template-curly-in-string": "error",
206 |         "no-ternary": "off",
207 |         "no-throw-literal": "error",
208 |         "no-trailing-spaces": "error",
209 |         "no-undef-init": "error",
210 |         "no-undefined": "off",
211 |         "no-underscore-dangle": "off",
212 |         "no-unmodified-loop-condition": "error",
213 |         "no-unneeded-ternary": "error",
214 |         "no-unused-expressions": "error",
215 |         "no-use-before-define": "off",
216 |         "no-useless-call": "error",
217 |         "no-useless-computed-key": "error",
218 |         "no-useless-concat": "off",
219 |         "no-useless-constructor": "error",
220 |         "no-useless-escape": "off",
221 |         "no-useless-rename": "error",
222 |         "no-useless-return": "error",
223 |         "no-var": "off",
224 |         "no-void": "error",
225 |         "no-warning-comments": "error",
226 |         "no-whitespace-before-property": "error",
227 |         "no-with": "error",
228 |         "nonblock-statement-body-position": "error",
229 |         "object-curly-newline": "off",
230 |         "object-curly-spacing": [
231 |             "error",
232 |             "never"
233 |         ],
234 |         "object-property-newline": "error",
235 |         "object-shorthand": "off",
236 |         "one-var": "off",
237 |         "one-var-declaration-per-line": "error",
238 |         "operator-assignment": [
239 |             "error",
240 |             "always"
241 |         ],
242 |         "operator-linebreak": [
243 |             "error",
244 |             "after"
245 |         ],
246 |         "padded-blocks": "off",
247 |         "prefer-arrow-callback": "off",
248 |         "prefer-const": "error",
249 |         "prefer-destructuring": [
250 |             "error",
251 |             {
252 |                 "array": true,
253 |                 "object": true
254 |             }
255 |         ],
256 |         "prefer-numeric-literals": "error",
257 |         "prefer-promise-reject-errors": "error",
258 |         "prefer-reflect": "off",
259 |         "prefer-rest-params": "off",
260 |         "prefer-spread": "off",
261 |         "prefer-template": "off",
262 |         "quote-props": "off",
263 |         "quotes": "off",
264 |         "radix": [
265 |             "error",
266 |             "always"
267 |         ],
268 |         "require-await": "error",
269 |         "require-jsdoc": "off",
270 |         "rest-spread-spacing": "error",
271 |         "semi": "error",
272 |         "semi-spacing": [
273 |             "error",
274 |             {
275 |                 "after": true,
276 |                 "before": false
277 |             }
278 |         ],
279 |         "sort-imports": "error",
280 |         "sort-keys": "off",
281 |         "sort-vars": "error",
282 |         "space-before-blocks": "error",
283 |         "space-before-function-paren": [
284 |             "error",
285 |             "never"
286 |         ],
287 |         "space-in-parens": [
288 |             "error",
289 |             "never"
290 |         ],
291 |         "space-infix-ops": "error",
292 |         "space-unary-ops": "error",
293 |         "spaced-comment": "off",
294 |         "strict": "off",
295 |         "symbol-description": "error",
296 |         "template-curly-spacing": "error",
297 |         "template-tag-spacing": "error",
298 |         "unicode-bom": [
299 |             "error",
300 |             "never"
301 |         ],
302 |         "valid-jsdoc": "off",
303 |         "vars-on-top": "off",
304 |         "wrap-iife": "error",
305 |         "wrap-regex": "off",
306 |         "yield-star-spacing": "error",
307 |         "yoda": [
308 |             "error",
309 |             "never"
310 |         ]
311 |     }
312 | };
313 | 


--------------------------------------------------------------------------------
/src/addons/build.js:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * XRegExp.build 5.1.2
  3 |  * <xregexp.com>
  4 |  * Steven Levithan (c) 2012-present MIT License
  5 |  */
  6 | 
  7 | export default (XRegExp) => {
  8 |     const REGEX_DATA = 'xregexp';
  9 |     const subParts = /(\()(?!\?)|\\([1-9]\d*)|\\[\s\S]|\[(?:[^\\\]]|\\[\s\S])*\]/g;
 10 |     const parts = XRegExp.union([/\({{([\w$]+)}}\)|{{([\w$]+)}}/, subParts], 'g', {
 11 |         conjunction: 'or'
 12 |     });
 13 | 
 14 |     /**
 15 |      * Strips a leading `^` and trailing unescaped `$`, if both are present.
 16 |      *
 17 |      * @private
 18 |      * @param {String} pattern Pattern to process.
 19 |      * @returns {String} Pattern with edge anchors removed.
 20 |      */
 21 |     function deanchor(pattern) {
 22 |         // Allow any number of empty noncapturing groups before/after anchors, because regexes
 23 |         // built/generated by XRegExp sometimes include them
 24 |         const leadingAnchor = /^(?:\(\?:\))*\^/;
 25 |         const trailingAnchor = /\$(?:\(\?:\))*$/;
 26 | 
 27 |         if (
 28 |             leadingAnchor.test(pattern) &&
 29 |             trailingAnchor.test(pattern) &&
 30 |             // Ensure that the trailing `$` isn't escaped
 31 |             trailingAnchor.test(pattern.replace(/\\[\s\S]/g, ''))
 32 |         ) {
 33 |             return pattern.replace(leadingAnchor, '').replace(trailingAnchor, '');
 34 |         }
 35 | 
 36 |         return pattern;
 37 |     }
 38 | 
 39 |     /**
 40 |      * Converts the provided value to an XRegExp. Native RegExp flags are not preserved.
 41 |      *
 42 |      * @private
 43 |      * @param {String|RegExp} value Value to convert.
 44 |      * @param {Boolean} [addFlagX] Whether to apply the `x` flag in cases when `value` is not
 45 |      *   already a regex generated by XRegExp
 46 |      * @returns {RegExp} XRegExp object with XRegExp syntax applied.
 47 |      */
 48 |     function asXRegExp(value, addFlagX) {
 49 |         const flags = addFlagX ? 'x' : '';
 50 |         return XRegExp.isRegExp(value) ?
 51 |             (value[REGEX_DATA] && value[REGEX_DATA].captureNames ?
 52 |                 // Don't recompile, to preserve capture names
 53 |                 value :
 54 |                 // Recompile as XRegExp
 55 |                 XRegExp(value.source, flags)
 56 |             ) :
 57 |             // Compile string as XRegExp
 58 |             XRegExp(value, flags);
 59 |     }
 60 | 
 61 |     function interpolate(substitution) {
 62 |         return substitution instanceof RegExp ? substitution : XRegExp.escape(substitution);
 63 |     }
 64 | 
 65 |     function reduceToSubpatternsObject(subpatterns, interpolated, subpatternIndex) {
 66 |         subpatterns[`subpattern${subpatternIndex}`] = interpolated;
 67 |         return subpatterns;
 68 |     }
 69 | 
 70 |     function embedSubpatternAfter(raw, subpatternIndex, rawLiterals) {
 71 |         const hasSubpattern = subpatternIndex < rawLiterals.length - 1;
 72 |         return raw + (hasSubpattern ? `{{subpattern${subpatternIndex}}}` : '');
 73 |     }
 74 | 
 75 |     /**
 76 |      * Provides tagged template literals that create regexes with XRegExp syntax and flags. The
 77 |      * provided pattern is handled as a raw string, so backslashes don't need to be escaped.
 78 |      *
 79 |      * Interpolation of strings and regexes shares the features of `XRegExp.build`. Interpolated
 80 |      * patterns are treated as atomic units when quantified, interpolated strings have their special
 81 |      * characters escaped, a leading `^` and trailing unescaped `$` are stripped from interpolated
 82 |      * regexes if both are present, and any backreferences within an interpolated regex are
 83 |      * rewritten to work within the overall pattern.
 84 |      *
 85 |      * @memberOf XRegExp
 86 |      * @param {String} [flags] Any combination of XRegExp flags.
 87 |      * @returns {Function} Handler for template literals that construct regexes with XRegExp syntax.
 88 |      * @example
 89 |      *
 90 |      * XRegExp.tag()`\b\w+\b`.test('word'); // -> true
 91 |      *
 92 |      * const hours = /1[0-2]|0?[1-9]/;
 93 |      * const minutes = /(?<minutes>[0-5][0-9])/;
 94 |      * const time = XRegExp.tag('x')`\b ${hours} : ${minutes} \b`;
 95 |      * time.test('10:59'); // -> true
 96 |      * XRegExp.exec('10:59', time).groups.minutes; // -> '59'
 97 |      *
 98 |      * const backref1 = /(a)\1/;
 99 |      * const backref2 = /(b)\1/;
100 |      * XRegExp.tag()`${backref1}${backref2}`.test('aabb'); // -> true
101 |      */
102 |     XRegExp.tag = (flags) => (literals, ...substitutions) => {
103 |         const subpatterns = substitutions.map(interpolate).reduce(reduceToSubpatternsObject, {});
104 |         const pattern = literals.raw.map(embedSubpatternAfter).join('');
105 |         return XRegExp.build(pattern, subpatterns, flags);
106 |     };
107 | 
108 |     /**
109 |      * Builds regexes using named subpatterns, for readability and pattern reuse. Backreferences in
110 |      * the outer pattern and provided subpatterns are automatically renumbered to work correctly.
111 |      * Native flags used by provided subpatterns are ignored in favor of the `flags` argument.
112 |      *
113 |      * @memberOf XRegExp
114 |      * @param {String} pattern XRegExp pattern using `{{name}}` for embedded subpatterns. Allows
115 |      *   `({{name}})` as shorthand for `(?<name>{{name}})`. Patterns cannot be embedded within
116 |      *   character classes.
117 |      * @param {Object} subs Lookup object for named subpatterns. Values can be strings or regexes. A
118 |      *   leading `^` and trailing unescaped `$` are stripped from subpatterns, if both are present.
119 |      * @param {String} [flags] Any combination of XRegExp flags.
120 |      * @returns {RegExp} Regex with interpolated subpatterns.
121 |      * @example
122 |      *
123 |      * const time = XRegExp.build('(?x)^ {{hours}} ({{minutes}}) $', {
124 |      *   hours: XRegExp.build('{{h12}} : | {{h24}}', {
125 |      *     h12: /1[0-2]|0?[1-9]/,
126 |      *     h24: /2[0-3]|[01][0-9]/
127 |      *   }, 'x'),
128 |      *   minutes: /^[0-5][0-9]$/
129 |      * });
130 |      * time.test('10:59'); // -> true
131 |      * XRegExp.exec('10:59', time).groups.minutes; // -> '59'
132 |      */
133 |     XRegExp.build = (pattern, subs, flags) => {
134 |         flags = flags || '';
135 |         // Used with `asXRegExp` calls for `pattern` and subpatterns in `subs`, to work around how
136 |         // some browsers convert `RegExp('\n')` to a regex that contains the literal characters `\`
137 |         // and `n`. See more details at <https://github.com/slevithan/xregexp/pull/163>.
138 |         const addFlagX = flags.includes('x');
139 |         const inlineFlags = /^\(\?([\w$]+)\)/.exec(pattern);
140 |         // Add flags within a leading mode modifier to the overall pattern's flags
141 |         if (inlineFlags) {
142 |             flags = XRegExp._clipDuplicates(flags + inlineFlags[1]);
143 |         }
144 | 
145 |         const data = {};
146 |         for (const p in subs) {
147 |             if (subs.hasOwnProperty(p)) {
148 |                 // Passing to XRegExp enables extended syntax and ensures independent validity,
149 |                 // lest an unescaped `(`, `)`, `[`, or trailing `\` breaks the `(?:)` wrapper. For
150 |                 // subpatterns provided as native regexes, it dies on octals and adds the property
151 |                 // used to hold extended regex instance data, for simplicity.
152 |                 const sub = asXRegExp(subs[p], addFlagX);
153 |                 data[p] = {
154 |                     // Deanchoring allows embedding independently useful anchored regexes. If you
155 |                     // really need to keep your anchors, double them (i.e., `^^...$$`).
156 |                     pattern: deanchor(sub.source),
157 |                     names: sub[REGEX_DATA].captureNames || []
158 |                 };
159 |             }
160 |         }
161 | 
162 |         // Passing to XRegExp dies on octals and ensures the outer pattern is independently valid;
163 |         // helps keep this simple. Named captures will be put back.
164 |         const patternAsRegex = asXRegExp(pattern, addFlagX);
165 | 
166 |         // 'Caps' is short for 'captures'
167 |         let numCaps = 0;
168 |         let numPriorCaps;
169 |         let numOuterCaps = 0;
170 |         const outerCapsMap = [0];
171 |         const outerCapNames = patternAsRegex[REGEX_DATA].captureNames || [];
172 |         const output = patternAsRegex.source.replace(parts, ($0, $1, $2, $3, $4) => {
173 |             const subName = $1 || $2;
174 |             let capName;
175 |             let intro;
176 |             let localCapIndex;
177 |             // Named subpattern
178 |             if (subName) {
179 |                 if (!data.hasOwnProperty(subName)) {
180 |                     throw new ReferenceError(`Undefined property ${$0}`);
181 |                 }
182 |                 // Named subpattern was wrapped in a capturing group
183 |                 if ($1) {
184 |                     capName = outerCapNames[numOuterCaps];
185 |                     outerCapsMap[++numOuterCaps] = ++numCaps;
186 |                     // If it's a named group, preserve the name. Otherwise, use the subpattern name
187 |                     // as the capture name
188 |                     intro = `(?<${capName || subName}>`;
189 |                 } else {
190 |                     intro = '(?:';
191 |                 }
192 |                 numPriorCaps = numCaps;
193 |                 const rewrittenSubpattern = data[subName].pattern.replace(subParts, (match, paren, backref) => {
194 |                     // Capturing group
195 |                     if (paren) {
196 |                         capName = data[subName].names[numCaps - numPriorCaps];
197 |                         ++numCaps;
198 |                         // If the current capture has a name, preserve the name
199 |                         if (capName) {
200 |                             return `(?<${capName}>`;
201 |                         }
202 |                     // Backreference
203 |                     } else if (backref) {
204 |                         localCapIndex = +backref - 1;
205 |                         // Rewrite the backreference
206 |                         return data[subName].names[localCapIndex] ?
207 |                             // Need to preserve the backreference name in case using flag `n`
208 |                             `\\k<${data[subName].names[localCapIndex]}>` :
209 |                             `\\${+backref + numPriorCaps}`;
210 |                     }
211 |                     return match;
212 |                 });
213 |                 return `${intro}${rewrittenSubpattern})`;
214 |             }
215 |             // Capturing group
216 |             if ($3) {
217 |                 capName = outerCapNames[numOuterCaps];
218 |                 outerCapsMap[++numOuterCaps] = ++numCaps;
219 |                 // If the current capture has a name, preserve the name
220 |                 if (capName) {
221 |                     return `(?<${capName}>`;
222 |                 }
223 |             // Backreference
224 |             } else if ($4) {
225 |                 localCapIndex = +$4 - 1;
226 |                 // Rewrite the backreference
227 |                 return outerCapNames[localCapIndex] ?
228 |                     // Need to preserve the backreference name in case using flag `n`
229 |                     `\\k<${outerCapNames[localCapIndex]}>` :
230 |                     `\\${outerCapsMap[+$4]}`;
231 |             }
232 |             return $0;
233 |         });
234 | 
235 |         return XRegExp(output, flags);
236 |     };
237 | };
238 | 


--------------------------------------------------------------------------------
/src/addons/matchrecursive.js:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * XRegExp.matchRecursive 5.1.2
  3 |  * <xregexp.com>
  4 |  * Steven Levithan (c) 2009-present MIT License
  5 |  */
  6 | 
  7 | export default (XRegExp) => {
  8 | 
  9 |     /**
 10 |      * Returns a match detail object composed of the provided values.
 11 |      *
 12 |      * @private
 13 |      */
 14 |     function row(name, value, start, end) {
 15 |         return {
 16 |             name,
 17 |             value,
 18 |             start,
 19 |             end
 20 |         };
 21 |     }
 22 | 
 23 |     /**
 24 |      * Returns an array of match strings between outermost left and right delimiters, or an array of
 25 |      * objects with detailed match parts and position data. By default, an error is thrown if
 26 |      * delimiters are unbalanced within the subject string.
 27 |      *
 28 |      * @memberOf XRegExp
 29 |      * @param {String} str String to search.
 30 |      * @param {String} left Left delimiter as an XRegExp pattern.
 31 |      * @param {String} right Right delimiter as an XRegExp pattern.
 32 |      * @param {String} [flags] Any combination of XRegExp flags, used for the left and right delimiters.
 33 |      * @param {Object} [options] Options object with optional properties:
 34 |      *   - `valueNames` {Array} Providing `valueNames` changes the return value from an array of
 35 |      *     matched strings to an array of objects that provide the value and start/end positions
 36 |      *     for the matched strings as well as the matched delimiters and unmatched string segments.
 37 |      *     To use this extended information mode, provide an array of 4 strings that name the parts
 38 |      *     to be returned:
 39 |      *     1. String segments outside of (before, between, and after) matches.
 40 |      *     2. Matched outermost left delimiters.
 41 |      *     3. Matched text between the outermost left and right delimiters.
 42 |      *     4. Matched outermost right delimiters.
 43 |      *     Taken together, these parts include the entire subject string if used with flag g.
 44 |      *     Use `null` for any of these values to omit unneeded parts from the returned results.
 45 |      *   - `escapeChar` {String} Single char used to escape delimiters within the subject string.
 46 |      *   - `unbalanced` {String} Handling mode for unbalanced delimiters. Options are:
 47 |      *     - 'error' - throw (default)
 48 |      *     - 'skip' - unbalanced delimiters are treated as part of the text between delimiters, and
 49 |      *       searches continue at the end of the unbalanced delimiter.
 50 |      *     - 'skip-lazy' - unbalanced delimiters are treated as part of the text between delimiters,
 51 |      *       and searches continue one character after the start of the unbalanced delimiter.
 52 |      * @returns {Array} Array of matches, or an empty array.
 53 |      * @example
 54 |      *
 55 |      * // Basic usage
 56 |      * const str1 = '(t((e))s)t()(ing)';
 57 |      * XRegExp.matchRecursive(str1, '\\(', '\\)', 'g');
 58 |      * // -> ['t((e))s', '', 'ing']
 59 |      *
 60 |      * // Extended information mode with valueNames
 61 |      * const str2 = 'Here is <div> <div>an</div></div> example';
 62 |      * XRegExp.matchRecursive(str2, '<div\\s*>', '</div>', 'gi', {
 63 |      *   valueNames: ['between', 'left', 'match', 'right']
 64 |      * });
 65 |      * // -> [
 66 |      * // {name: 'between', value: 'Here is ',       start: 0,  end: 8},
 67 |      * // {name: 'left',    value: '<div>',          start: 8,  end: 13},
 68 |      * // {name: 'match',   value: ' <div>an</div>', start: 13, end: 27},
 69 |      * // {name: 'right',   value: '</div>',         start: 27, end: 33},
 70 |      * // {name: 'between', value: ' example',       start: 33, end: 41}
 71 |      * // ]
 72 |      *
 73 |      * // Omitting unneeded parts with null valueNames, and using escapeChar
 74 |      * const str3 = '...{1}.\\{{function(x,y){return {y:x}}}';
 75 |      * XRegExp.matchRecursive(str3, '{', '}', 'g', {
 76 |      *   valueNames: ['literal', null, 'value', null],
 77 |      *   escapeChar: '\\'
 78 |      * });
 79 |      * // -> [
 80 |      * // {name: 'literal', value: '...',  start: 0, end: 3},
 81 |      * // {name: 'value',   value: '1',    start: 4, end: 5},
 82 |      * // {name: 'literal', value: '.\\{', start: 6, end: 9},
 83 |      * // {name: 'value',   value: 'function(x,y){return {y:x}}', start: 10, end: 37}
 84 |      * // ]
 85 |      *
 86 |      * // Sticky mode via flag y
 87 |      * const str4 = '<1><<<2>>><3>4<5>';
 88 |      * XRegExp.matchRecursive(str4, '<', '>', 'gy');
 89 |      * // -> ['1', '<<2>>', '3']
 90 |      *
 91 |      * // Skipping unbalanced delimiters instead of erroring
 92 |      * const str5 = 'Here is <div> <div>an</div> unbalanced example';
 93 |      * XRegExp.matchRecursive(str5, '<div\\s*>', '</div>', 'gi', {
 94 |      *     unbalanced: 'skip'
 95 |      * });
 96 |      * // -> ['an']
 97 |      */
 98 |     XRegExp.matchRecursive = (str, left, right, flags, options) => {
 99 |         flags = flags || '';
100 |         options = options || {};
101 |         const global = flags.includes('g');
102 |         const sticky = flags.includes('y');
103 |         // Flag `y` is handled manually
104 |         const basicFlags = flags.replace(/y/g, '');
105 |         left = XRegExp(left, basicFlags);
106 |         right = XRegExp(right, basicFlags);
107 | 
108 |         let esc;
109 |         let {escapeChar} = options;
110 |         if (escapeChar) {
111 |             if (escapeChar.length > 1) {
112 |                 throw new Error('Cannot use more than one escape character');
113 |             }
114 |             escapeChar = XRegExp.escape(escapeChar);
115 |             // Example of concatenated `esc` regex:
116 |             // `escapeChar`: '%'
117 |             // `left`: '<'
118 |             // `right`: '>'
119 |             // Regex is: /(?:%[\S\s]|(?:(?!<|>)[^%])+)+/
120 |             esc = new RegExp(
121 |                 `(?:${escapeChar}[\\S\\s]|(?:(?!${
122 |                     // Using `XRegExp.union` safely rewrites backreferences in `left` and `right`.
123 |                     // Intentionally not passing `basicFlags` to `XRegExp.union` since any syntax
124 |                     // transformation resulting from those flags was already applied to `left` and
125 |                     // `right` when they were passed through the XRegExp constructor above.
126 |                     XRegExp.union([left, right], '', {conjunction: 'or'}).source
127 |                 })[^${escapeChar}])+)+`,
128 |                 // Flags `dgy` not needed here
129 |                 flags.replace(XRegExp._hasNativeFlag('s') ? /[^imsu]/g : /[^imu]/g, '')
130 |             );
131 |         }
132 | 
133 |         let openTokens = 0;
134 |         let delimStart = 0;
135 |         let delimEnd = 0;
136 |         let lastOuterEnd = 0;
137 |         let outerStart;
138 |         let innerStart;
139 |         let leftMatch;
140 |         let rightMatch;
141 |         const vN = options.valueNames;
142 |         const output = [];
143 | 
144 |         while (true) {
145 |             // If using an escape character, advance to the delimiter's next starting position,
146 |             // skipping any escaped characters in between
147 |             if (escapeChar) {
148 |                 delimEnd += (XRegExp.exec(str, esc, delimEnd, 'sticky') || [''])[0].length;
149 |             }
150 | 
151 |             leftMatch = XRegExp.exec(str, left, delimEnd);
152 |             rightMatch = XRegExp.exec(str, right, delimEnd);
153 |             // Keep the leftmost match only
154 |             if (leftMatch && rightMatch) {
155 |                 if (leftMatch.index <= rightMatch.index) {
156 |                     rightMatch = null;
157 |                 } else {
158 |                     leftMatch = null;
159 |                 }
160 |             }
161 | 
162 |             // Paths (LM: leftMatch, RM: rightMatch, OT: openTokens):
163 |             // LM | RM | OT | Result
164 |             // 1  | 0  | 1  | loop
165 |             // 1  | 0  | 0  | loop
166 |             // 0  | 1  | 1  | loop
167 |             // 0  | 1  | 0  | throw
168 |             // 0  | 0  | 1  | throw
169 |             // 0  | 0  | 0  | break
170 |             // The paths above don't include the sticky mode special case. The loop ends after the
171 |             // first completed match if not `global`.
172 |             if (leftMatch || rightMatch) {
173 |                 delimStart = (leftMatch || rightMatch).index;
174 |                 delimEnd = delimStart + (leftMatch || rightMatch)[0].length;
175 |             } else if (!openTokens) {
176 |                 break;
177 |             }
178 |             if (sticky && !openTokens && delimStart > lastOuterEnd) {
179 |                 break;
180 |             }
181 |             if (leftMatch) {
182 |                 if (!openTokens) {
183 |                     outerStart = delimStart;
184 |                     innerStart = delimEnd;
185 |                 }
186 |                 openTokens += 1;
187 |             } else if (rightMatch && openTokens) {
188 |                 openTokens -= 1;
189 |                 if (!openTokens) {
190 |                     if (vN) {
191 |                         if (vN[0] && outerStart > lastOuterEnd) {
192 |                             output.push(row(vN[0], str.slice(lastOuterEnd, outerStart), lastOuterEnd, outerStart));
193 |                         }
194 |                         if (vN[1]) {
195 |                             output.push(row(vN[1], str.slice(outerStart, innerStart), outerStart, innerStart));
196 |                         }
197 |                         if (vN[2]) {
198 |                             output.push(row(vN[2], str.slice(innerStart, delimStart), innerStart, delimStart));
199 |                         }
200 |                         if (vN[3]) {
201 |                             output.push(row(vN[3], str.slice(delimStart, delimEnd), delimStart, delimEnd));
202 |                         }
203 |                     } else {
204 |                         output.push(str.slice(innerStart, delimStart));
205 |                     }
206 |                     lastOuterEnd = delimEnd;
207 |                     if (!global) {
208 |                         break;
209 |                     }
210 |                 }
211 |             // Found unbalanced delimiter
212 |             } else {
213 |                 const unbalanced = options.unbalanced || 'error';
214 |                 if (unbalanced === 'skip' || unbalanced === 'skip-lazy') {
215 |                     if (rightMatch) {
216 |                         rightMatch = null;
217 |                     // No `leftMatch` for unbalanced left delimiter because we've reached the string end
218 |                     } else {
219 |                         if (unbalanced === 'skip') {
220 |                             const outerStartDelimLength = XRegExp.exec(str, left, outerStart, 'sticky')[0].length;
221 |                             delimEnd = outerStart + (outerStartDelimLength || 1);
222 |                         } else {
223 |                             delimEnd = outerStart + 1;
224 |                         }
225 |                         openTokens = 0;
226 |                     }
227 |                 } else if (unbalanced === 'error') {
228 |                     const delimSide = rightMatch ? 'right' : 'left';
229 |                     const errorPos = rightMatch ? delimStart : outerStart;
230 |                     throw new Error(`Unbalanced ${delimSide} delimiter found in string at position ${errorPos}`);
231 |                 } else {
232 |                     throw new Error(`Unsupported value for unbalanced: ${unbalanced}`);
233 |                 }
234 |             }
235 | 
236 |             // If the delimiter matched an empty string, avoid an infinite loop
237 |             if (delimStart === delimEnd) {
238 |                 delimEnd += 1;
239 |             }
240 |         }
241 | 
242 |         if (global && output.length > 0 && !sticky && vN && vN[0] && str.length > lastOuterEnd) {
243 |             output.push(row(vN[0], str.slice(lastOuterEnd), lastOuterEnd, str.length));
244 |         }
245 | 
246 |         return output;
247 |     };
248 | };
249 | 


--------------------------------------------------------------------------------
/src/addons/unicode-base.js:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * XRegExp Unicode Base 5.1.2
  3 |  * <xregexp.com>
  4 |  * Steven Levithan (c) 2008-present MIT License
  5 |  */
  6 | 
  7 | export default (XRegExp) => {
  8 | 
  9 |     /**
 10 |      * Adds base support for Unicode matching:
 11 |      * - Adds syntax `\p{..}` for matching Unicode tokens. Tokens can be inverted using `\P{..}` or
 12 |      *   `\p{^..}`. Token names ignore case, spaces, hyphens, and underscores. You can omit the
 13 |      *   braces for token names that are a single letter (e.g. `\pL` or `PL`).
 14 |      * - Adds flag A (astral), which enables 21-bit Unicode support.
 15 |      * - Adds the `XRegExp.addUnicodeData` method used by other addons to provide character data.
 16 |      *
 17 |      * Unicode Base relies on externally provided Unicode character data. Official addons are
 18 |      * available to provide data for Unicode categories, scripts, and properties.
 19 |      *
 20 |      * @requires XRegExp
 21 |      */
 22 | 
 23 |     // ==--------------------------==
 24 |     // Private stuff
 25 |     // ==--------------------------==
 26 | 
 27 |     // Storage for Unicode data
 28 |     const unicode = {};
 29 |     const unicodeTypes = {};
 30 | 
 31 |     // Reuse utils
 32 |     const dec = XRegExp._dec;
 33 |     const hex = XRegExp._hex;
 34 |     const pad4 = XRegExp._pad4;
 35 | 
 36 |     // Generates a token lookup name: lowercase, with hyphens, spaces, and underscores removed
 37 |     function normalize(name) {
 38 |         return name.replace(/[- _]+/g, '').toLowerCase();
 39 |     }
 40 | 
 41 |     // Gets the decimal code of a literal code unit, \xHH, \uHHHH, or a backslash-escaped literal
 42 |     function charCode(chr) {
 43 |         const esc = /^\\[xu](.+)/.exec(chr);
 44 |         return esc ?
 45 |             dec(esc[1]) :
 46 |             chr.charCodeAt(chr[0] === '\\' ? 1 : 0);
 47 |     }
 48 | 
 49 |     // Inverts a list of ordered BMP characters and ranges
 50 |     function invertBmp(range) {
 51 |         let output = '';
 52 |         let lastEnd = -1;
 53 | 
 54 |         XRegExp.forEach(
 55 |             range,
 56 |             /(\\x..|\\u....|\\?[\s\S])(?:-(\\x..|\\u....|\\?[\s\S]))?/,
 57 |             (m) => {
 58 |                 const start = charCode(m[1]);
 59 |                 if (start > (lastEnd + 1)) {
 60 |                     output += `\\u${pad4(hex(lastEnd + 1))}`;
 61 |                     if (start > (lastEnd + 2)) {
 62 |                         output += `-\\u${pad4(hex(start - 1))}`;
 63 |                     }
 64 |                 }
 65 |                 lastEnd = charCode(m[2] || m[1]);
 66 |             }
 67 |         );
 68 | 
 69 |         if (lastEnd < 0xFFFF) {
 70 |             output += `\\u${pad4(hex(lastEnd + 1))}`;
 71 |             if (lastEnd < 0xFFFE) {
 72 |                 output += '-\\uFFFF';
 73 |             }
 74 |         }
 75 | 
 76 |         return output;
 77 |     }
 78 | 
 79 |     // Generates an inverted BMP range on first use
 80 |     function cacheInvertedBmp(slug) {
 81 |         const prop = 'b!';
 82 |         return (
 83 |             unicode[slug][prop] ||
 84 |             (unicode[slug][prop] = invertBmp(unicode[slug].bmp))
 85 |         );
 86 |     }
 87 | 
 88 |     // Combines and optionally negates BMP and astral data
 89 |     function buildAstral(slug, isNegated) {
 90 |         const item = unicode[slug];
 91 |         let combined = '';
 92 | 
 93 |         if (item.bmp && !item.isBmpLast) {
 94 |             combined = `[${item.bmp}]${item.astral ? '|' : ''}`;
 95 |         }
 96 |         if (item.astral) {
 97 |             combined += item.astral;
 98 |         }
 99 |         if (item.isBmpLast && item.bmp) {
100 |             combined += `${item.astral ? '|' : ''}[${item.bmp}]`;
101 |         }
102 | 
103 |         // Astral Unicode tokens always match a code point, never a code unit
104 |         return isNegated ?
105 |             `(?:(?!${combined})(?:[\uD800-\uDBFF][\uDC00-\uDFFF]|[\0-\uFFFF]))` :
106 |             `(?:${combined})`;
107 |     }
108 | 
109 |     // Builds a complete astral pattern on first use
110 |     function cacheAstral(slug, isNegated) {
111 |         const prop = isNegated ? 'a!' : 'a=';
112 |         return (
113 |             unicode[slug][prop] ||
114 |             (unicode[slug][prop] = buildAstral(slug, isNegated))
115 |         );
116 |     }
117 | 
118 |     // ==--------------------------==
119 |     // Core functionality
120 |     // ==--------------------------==
121 | 
122 |     /*
123 |      * Add astral mode (flag A) and Unicode token syntax: `\p{..}`, `\P{..}`, `\p{^..}`, `\pC`.
124 |      */
125 |     XRegExp.addToken(
126 |         // Use `*` instead of `+` to avoid capturing `^` as the token name in `\p{^}`
127 |         /\\([pP])(?:{(\^?)(?:(\w+)=)?([^}]*)}|([A-Za-z]))/,
128 |         (match, scope, flags) => {
129 |             const ERR_DOUBLE_NEG = 'Invalid double negation ';
130 |             const ERR_UNKNOWN_NAME = 'Unknown Unicode token ';
131 |             const ERR_UNKNOWN_REF = 'Unicode token missing data ';
132 |             const ERR_ASTRAL_ONLY = 'Astral mode required for Unicode token ';
133 |             const ERR_ASTRAL_IN_CLASS = 'Astral mode does not support Unicode tokens within character classes';
134 |             const [
135 |                 fullToken,
136 |                 pPrefix,
137 |                 caretNegation,
138 |                 typePrefix,
139 |                 tokenName,
140 |                 tokenSingleCharName
141 |             ] = match;
142 |             // Negated via \P{..} or \p{^..}
143 |             let isNegated = pPrefix === 'P' || !!caretNegation;
144 |             // Switch from BMP (0-FFFF) to astral (0-10FFFF) mode via flag A
145 |             const isAstralMode = flags.includes('A');
146 |             // Token lookup name. Check `tokenSingleCharName` first to avoid passing `undefined`
147 |             // via `\p{}`
148 |             let slug = normalize(tokenSingleCharName || tokenName);
149 |             // Token data object
150 |             let item = unicode[slug];
151 | 
152 |             if (pPrefix === 'P' && caretNegation) {
153 |                 throw new SyntaxError(ERR_DOUBLE_NEG + fullToken);
154 |             }
155 |             if (!unicode.hasOwnProperty(slug)) {
156 |                 throw new SyntaxError(ERR_UNKNOWN_NAME + fullToken);
157 |             }
158 | 
159 |             if (typePrefix) {
160 |                 if (!(unicodeTypes[typePrefix] && unicodeTypes[typePrefix][slug])) {
161 |                     throw new SyntaxError(ERR_UNKNOWN_NAME + fullToken);
162 |                 }
163 |             }
164 | 
165 |             // Switch to the negated form of the referenced Unicode token
166 |             if (item.inverseOf) {
167 |                 slug = normalize(item.inverseOf);
168 |                 if (!unicode.hasOwnProperty(slug)) {
169 |                     throw new ReferenceError(`${ERR_UNKNOWN_REF + fullToken} -> ${item.inverseOf}`);
170 |                 }
171 |                 item = unicode[slug];
172 |                 isNegated = !isNegated;
173 |             }
174 | 
175 |             if (!(item.bmp || isAstralMode)) {
176 |                 throw new SyntaxError(ERR_ASTRAL_ONLY + fullToken);
177 |             }
178 |             if (isAstralMode) {
179 |                 if (scope === 'class') {
180 |                     throw new SyntaxError(ERR_ASTRAL_IN_CLASS);
181 |                 }
182 | 
183 |                 return cacheAstral(slug, isNegated);
184 |             }
185 | 
186 |             return scope === 'class' ?
187 |                 (isNegated ? cacheInvertedBmp(slug) : item.bmp) :
188 |                 `${(isNegated ? '[^' : '[') + item.bmp}]`;
189 |         },
190 |         {
191 |             scope: 'all',
192 |             optionalFlags: 'A',
193 |             leadChar: '\\'
194 |         }
195 |     );
196 | 
197 |     /**
198 |      * Adds to the list of Unicode tokens that XRegExp regexes can match via `\p` or `\P`.
199 |      *
200 |      * @memberOf XRegExp
201 |      * @param {Array} data Objects with named character ranges. Each object may have properties
202 |      *   `name`, `alias`, `isBmpLast`, `inverseOf`, `bmp`, and `astral`. All but `name` are
203 |      *   optional, although one of `bmp` or `astral` is required (unless `inverseOf` is set). If
204 |      *   `astral` is absent, the `bmp` data is used for BMP and astral modes. If `bmp` is absent,
205 |      *   the name errors in BMP mode but works in astral mode. If both `bmp` and `astral` are
206 |      *   provided, the `bmp` data only is used in BMP mode, and the combination of `bmp` and
207 |      *   `astral` data is used in astral mode. `isBmpLast` is needed when a token matches orphan
208 |      *   high surrogates *and* uses surrogate pairs to match astral code points. The `bmp` and
209 |      *   `astral` data should be a combination of literal characters and `\xHH` or `\uHHHH` escape
210 |      *   sequences, with hyphens to create ranges. Any regex metacharacters in the data should be
211 |      *   escaped, apart from range-creating hyphens. The `astral` data can additionally use
212 |      *   character classes and alternation, and should use surrogate pairs to represent astral code
213 |      *   points. `inverseOf` can be used to avoid duplicating character data if a Unicode token is
214 |      *   defined as the exact inverse of another token.
215 |      * @param {String} [typePrefix] Enables optionally using this type as a prefix for all of the
216 |      *   provided Unicode tokens, e.g. if given `'Type'`, then `\p{TokenName}` can also be written
217 |      *   as `\p{Type=TokenName}`.
218 |      * @example
219 |      *
220 |      * // Basic use
221 |      * XRegExp.addUnicodeData([{
222 |      *   name: 'XDigit',
223 |      *   alias: 'Hexadecimal',
224 |      *   bmp: '0-9A-Fa-f'
225 |      * }]);
226 |      * XRegExp('\\p{XDigit}:\\p{Hexadecimal}+').test('0:3D'); // -> true
227 |      */
228 |     XRegExp.addUnicodeData = (data, typePrefix) => {
229 |         const ERR_NO_NAME = 'Unicode token requires name';
230 |         const ERR_NO_DATA = 'Unicode token has no character data ';
231 | 
232 |         if (typePrefix) {
233 |             // Case sensitive to match ES2018
234 |             unicodeTypes[typePrefix] = {};
235 |         }
236 | 
237 |         for (const item of data) {
238 |             if (!item.name) {
239 |                 throw new Error(ERR_NO_NAME);
240 |             }
241 |             if (!(item.inverseOf || item.bmp || item.astral)) {
242 |                 throw new Error(ERR_NO_DATA + item.name);
243 |             }
244 | 
245 |             const normalizedName = normalize(item.name);
246 |             unicode[normalizedName] = item;
247 |             if (typePrefix) {
248 |                 unicodeTypes[typePrefix][normalizedName] = true;
249 |             }
250 | 
251 |             if (item.alias) {
252 |                 const normalizedAlias = normalize(item.alias);
253 |                 unicode[normalizedAlias] = item;
254 |                 if (typePrefix) {
255 |                     unicodeTypes[typePrefix][normalizedAlias] = true;
256 |                 }
257 |             }
258 |         }
259 | 
260 |         // Reset the pattern cache used by the `XRegExp` constructor, since the same pattern and
261 |         // flags might now produce different results
262 |         XRegExp.cache.flush('patterns');
263 |     };
264 | 
265 |     /**
266 |      * @ignore
267 |      *
268 |      * Return a reference to the internal Unicode definition structure for the given Unicode
269 |      * Property if the given name is a legal Unicode Property for use in XRegExp `\p` or `\P` regex
270 |      * constructs.
271 |      *
272 |      * @memberOf XRegExp
273 |      * @param {String} name Name by which the Unicode Property may be recognized (case-insensitive),
274 |      *   e.g. `'N'` or `'Number'`. The given name is matched against all registered Unicode
275 |      *   Properties and Property Aliases.
276 |      * @returns {Object} Reference to definition structure when the name matches a Unicode Property.
277 |      *
278 |      * @note
279 |      * For more info on Unicode Properties, see also http://unicode.org/reports/tr18/#Categories.
280 |      *
281 |      * @note
282 |      * This method is *not* part of the officially documented API and may change or be removed in
283 |      * the future. It is meant for userland code that wishes to reuse the (large) internal Unicode
284 |      * structures set up by XRegExp.
285 |      */
286 |     XRegExp._getUnicodeProperty = (name) => {
287 |         const slug = normalize(name);
288 |         return unicode[slug];
289 |     };
290 | };
291 | 


--------------------------------------------------------------------------------
/docs/syntax/named_capture_comparison/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 |   <meta charset="utf-8" />
  5 |   <title>Named capture comparison :: XRegExp</title>
  6 |   <link href="../../assets/index.css" rel="stylesheet" type="text/css" />
  7 | </head>
  8 | <body>
  9 | <div id="header">
 10 |   <h1><a href="../../index.html"><span id="logoX">X</span>RegExp</a></h1>
 11 |   <h1 class="subtitle">The one of a kind JavaScript regular expression library</h1>
 12 | </div>
 13 | <div id="body">
 14 |   <div id="navBar">
 15 |     <div class="menu">
 16 |       <ul>
 17 |         <li><a href="../../index.html">Home</a></li>
 18 |         <li><a href="../../api/index.html">API</a></li>
 19 |         <li><a href="../../syntax/index.html" class="selected">New syntax</a></li>
 20 |         <li><a href="../../flags/index.html">New flags</a></li>
 21 |         <li><a href="../../unicode/index.html">Unicode</a></li>
 22 |       </ul>
 23 |     </div>
 24 |   </div>
 25 |   <div id="main">
 26 | 
 27 | 
 28 | 
 29 | 
 30 | 
 31 |     <h1><a href="../index.html">New syntax</a> &raquo; Named capture comparison</h1>
 32 | 
 33 |     <p>There are several different syntaxes used for named capture. Although Python was the first to implement the feature, most libraries have adopted .NET's alternative syntax.</p>
 34 | 
 35 |     <table summary="Named capture in various libraries" cellspacing="0" border="1">
 36 |       <thead>
 37 |         <tr>
 38 |           <th>Library</th>
 39 |           <th style="width:120px;">Capture</th>
 40 |           <th style="width:100px;">Backref in regex</th>
 41 |           <th>Backref in replacement</th>
 42 |           <th>Stored at</th>
 43 |           <th>Backref numbering</th>
 44 |           <th>Multiple groups with same name</th>
 45 |         </tr>
 46 |       </thead>
 47 |       <tbody>
 48 |         <tr class="highlight">
 49 |           <td>XRegExp <!--0.5--></td>
 50 |           <td>
 51 |             <ul>
 52 |               <li><code>(?&lt;<em>name</em>&gt;&hellip;)</code></li>
 53 |               <li><code>(?P&lt;<em>name</em>&gt;&hellip;)</code><sup>1</sup></li>
 54 |             </ul>
 55 |           </td>
 56 |           <td>
 57 |             <ul>
 58 |               <li><code>\k&lt;<em>name</em>&gt;</code></li>
 59 |             </ul>
 60 |           </td>
 61 |           <td>
 62 |             <ul>
 63 |               <li><code>$&lt;<em>name</em>&gt;</code><sup>2</sup></li>
 64 |               <li><code>${<em>name</em>}</code></li>
 65 |             </ul>
 66 |           </td>
 67 |           <td>
 68 |             <code><em>result</em>.groups.<em>name</em></code><sup>3</sup>
 69 |           </td>
 70 |           <td>Sequential</td>
 71 |           <td>Error<sup>4</sup></td>
 72 |         </tr>
 73 |         <tr>
 74 |           <td>EcmaScript 2018</td>
 75 |           <td>
 76 |             <ul>
 77 |               <li><code>(?&lt;<em>name</em>&gt;&hellip;)</code></li>
 78 |             </ul>
 79 |           </td>
 80 |           <td>
 81 |             <ul>
 82 |               <li><code>\k&lt;<em>name</em>&gt;</code></li>
 83 |             </ul>
 84 |           </td>
 85 |           <td>
 86 |             <ul>
 87 |               <li><code>$&lt;<em>name</em>&gt;</code></li>
 88 |             </ul>
 89 |           </td>
 90 |           <td>
 91 |             <code><em>result</em>.groups.<em>name</em></code>
 92 |           </td>
 93 |           <td>Sequential</td>
 94 |           <td>Error</td>
 95 |         </tr>
 96 |         <tr class="alt">
 97 |           <td>.NET</td>
 98 |           <td>
 99 |             <ul>
100 |               <li><code>(?&lt;<em>name</em>&gt;&hellip;)</code></li>
101 |               <li><code>(?'<em>name</em>'&hellip;)</code></li>
102 |             </ul>
103 |           </td>
104 |           <td>
105 |             <ul>
106 |               <li><code>\k&lt;<em>name</em>&gt;</code></li>
107 |               <li><code>\k'<em>name</em>'</code></li>
108 |             </ul>
109 |           </td>
110 |           <td>
111 |             <ul>
112 |               <li><code>${<em>name</em>}</code></li>
113 |             </ul>
114 |           </td>
115 |           <td><code><em>matcher</em>.Groups('<em>name</em>')</code></td>
116 |           <td>Unnamed first, then named</td>
117 |           <td>Backref to last executed participating group</td>
118 |         </tr>
119 |         <tr>
120 |           <td>Perl 5.10</td>
121 |           <td>
122 |             <ul>
123 |               <li><code>(?&lt;<em>name</em>&gt;&hellip;)</code></li>
124 |               <li><code>(?'<em>name</em>'&hellip;)</code></li>
125 |               <li><code>(?P&lt;<em>name</em>&gt;&hellip;)</code></li>
126 |             </ul>
127 |           </td>
128 |           <td>
129 |             <ul>
130 |               <li><code>\k&lt;<em>name</em>&gt;</code></li>
131 |               <li><code>\k'<em>name</em>'</code></li>
132 |               <li><code>\k{<em>name</em>}</code></li>
133 |               <li><code>\g{<em>name</em>}</code></li>
134 |               <li><code>(?P=<em>name</em>)</code></li>
135 |             </ul>
136 |           </td>
137 |           <td>
138 |             <ul>
139 |               <li><code>$+{<em>name</em>}</code></li>
140 |             </ul>
141 |           </td>
142 |           <td><code>$+{<em>name</em>}</code></td>
143 |           <td>Sequential</td>
144 |           <td>Backref to leftmost participating group</td>
145 |         </tr>
146 |         <tr class="alt">
147 |           <td>PCRE 7</td>
148 |           <td>
149 |             <ul>
150 |               <li><code>(?&lt;<em>name</em>&gt;&hellip;)</code></li>
151 |               <li><code>(?'<em>name</em>'&hellip;)</code></li>
152 |               <li><code>(?P&lt;<em>name</em>&gt;&hellip;)</code></li>
153 |             </ul>
154 |           </td>
155 |           <td>
156 |             <ul>
157 |               <li><code>\k&lt;<em>name</em>&gt;</code></li>
158 |               <li><code>\k'<em>name</em>'</code></li>
159 |               <li><code>\k{<em>name</em>}</code><sup>5</sup></li>
160 |               <li><code>\g{<em>name</em>}</code><sup>5</sup></li>
161 |               <li><code>(?P=<em>name</em>)</code></li>
162 |             </ul>
163 |           </td>
164 |           <td colspan="2">N/A</td>
165 |           <td>Sequential</td>
166 |           <td>Error</td>
167 |         </tr>
168 |         <tr>
169 |           <td>PCRE 4</td>
170 |           <td>
171 |             <ul>
172 |               <li><code>(?P&lt;<em>name</em>&gt;&hellip;)</code></li>
173 |             </ul>
174 |           </td>
175 |           <td>
176 |             <ul>
177 |               <li><code>(?P=<em>name</em>)</code></li>
178 |             </ul>
179 |           </td>
180 |           <td colspan="2">N/A</td>
181 |           <td>Sequential</td>
182 |           <td>Error</td>
183 |         </tr>
184 |         <tr class="alt">
185 |           <td>Python</td>
186 |           <td>
187 |             <ul>
188 |               <li><code>(?P&lt;<em>name</em>&gt;&hellip;)</code></li>
189 |             </ul>
190 |           </td>
191 |           <td>
192 |             <ul>
193 |               <li><code>(?P=<em>name</em>)</code></li>
194 |             </ul>
195 |           </td>
196 |           <td>
197 |             <ul>
198 |               <li><code>\g&lt;<em>name</em>&gt;</code></li>
199 |             </ul>
200 |           </td>
201 |           <td><code><em>result</em>.group('<em>name</em>')</code></td>
202 |           <td>Sequential</td>
203 |           <td>Error</td>
204 |         </tr>
205 |         <tr>
206 |           <td>Oniguruma <!--1.8.4--></td>
207 |           <td>
208 |             <ul>
209 |               <li><code>(?&lt;<em>name</em>&gt;&hellip;)</code></li>
210 |               <li><code>(?'<em>name</em>'&hellip;)</code></li>
211 |             </ul>
212 |           </td>
213 |           <td>
214 |             <ul>
215 |               <li><code>\k&lt;<em>name</em>&gt;</code></li>
216 |               <li><code>\k'<em>name</em>'</code></li>
217 |             </ul>
218 |           </td>
219 |           <td>
220 |             <ul>
221 |               <li><code>\k&lt;<em>name</em>&gt;</code></li>
222 |               <li><code>\k'<em>name</em>'</code></li>
223 |             </ul>
224 |           </td>
225 |           <td>N/A</td>
226 |           <td>Unnamed groups default to noncapturing when mixed with named groups</td>
227 |           <td>Backref to rightmost participating group. Backrefs within a regex work as alternation of matches of all preceding groups with the same name, in reverse order.</td>
228 |         </tr>
229 |         <tr class="alt">
230 |           <td>Java 7</td>
231 |           <td>
232 |             <ul>
233 |               <li><code>(?&lt;<em>name</em>&gt;&hellip;)</code></li>
234 |             </ul>
235 |           </td>
236 |           <td>
237 |             <ul>
238 |               <li><code>\k&lt;<em>name</em>&gt;</code></li>
239 |             </ul>
240 |           </td>
241 |           <td>
242 |             <ul>
243 |               <li><code>${<em>name</em>}</code></li>
244 |             </ul>
245 |           </td>
246 |           <td><code><em>matcher</em>.group('<em>name</em>')</code></td>
247 |           <td>Sequential</td>
248 |           <td>Error</td>
249 |         </tr>
250 |         <tr>
251 |           <td>JGsoft</td>
252 |           <td>
253 |             <ul>
254 |               <li><code>(?&lt;<em>name</em>&gt;&hellip;)</code></li>
255 |               <li><code>(?'<em>name</em>'&hellip;)</code></li>
256 |               <li><code>(?P&lt;<em>name</em>&gt;&hellip;)</code></li>
257 |             </ul>
258 |           </td>
259 |           <td>
260 |             <ul>
261 |               <li><code>\k&lt;<em>name</em>&gt;</code></li>
262 |               <li><code>\k'<em>name</em>'</code></li>
263 |               <li><code>(?P=<em>name</em>)</code></li>
264 |             </ul>
265 |           </td>
266 |           <td>
267 |             <ul>
268 |               <li><code>${<em>name</em>}</code></li>
269 |               <li><code>\g&lt;<em>name</em>&gt;</code></li>
270 |             </ul>
271 |           </td>
272 |           <td>N/A</td>
273 |           <td>.NET and Python styles, depending on capture syntax</td>
274 |           <td>Same as .NET</td>
275 |         </tr>
276 |         <tr class="alt">
277 |           <td>Boost.Regex</td>
278 |           <td>
279 |             <ul>
280 |               <li><code>(?&lt;<em>name</em>&gt;&hellip;)</code></li>
281 |               <li><code>(?'<em>name</em>'&hellip;)</code></li>
282 |             </ul>
283 |           </td>
284 |           <td>
285 |             <ul>
286 |               <li><code>\k&lt;<em>name</em>&gt;</code></li>
287 |               <li><code>\g{<em>name</em>}</code></li>
288 |             </ul>
289 |           </td>
290 |           <td>?</td>
291 |           <td>?</td>
292 |           <td>?</td>
293 |           <td>?</td>
294 |         </tr>
295 |         <tr>
296 |           <td>RE2</td>
297 |           <td>
298 |             <ul>
299 |               <li><code>(?P&lt;<em>name</em>&gt;&hellip;)</code></li>
300 |             </ul>
301 |           </td>
302 |           <td>N/A</td>
303 |           <td>?</td>
304 |           <td>?</td>
305 |           <td>?</td>
306 |           <td>?</td>
307 |         </tr>
308 |         <tr class="alt">
309 |           <td>JRegex</td>
310 |           <td>
311 |             <ul>
312 |               <li><code>({<em>name</em>}&hellip;)</code></li>
313 |             </ul>
314 |           </td>
315 |           <td>
316 |             <ul>
317 |               <li><code>{\<em>name</em>}</code></li>
318 |             </ul>
319 |           </td>
320 |           <td>
321 |             <ul>
322 |               <li><code>${<em>name</em>}</code></li>
323 |             </ul>
324 |           </td>
325 |           <td><code><em>matcher</em>.group('<em>name</em>')</code></td>
326 |           <td>?</td>
327 |           <td>?</td>
328 |         </tr>
329 |       </tbody>
330 |     </table>
331 | 
332 |     <p><sup>1</sup> As of XRegExp 2. Not recommended for use, because support for the <code>(?P&lt;<em>name</em>&gt;&hellip;)</code> syntax may be removed in future versions of XRegExp. It is currently supported only to avoid an octal escape versus backreference issue in old Opera. Opera supported the Python named capture syntax natively, but did not provide full named capture functionality.</p>
333 | 
334 |     <p><sup>2</sup> As of XRegExp 4.</p>
335 | 
336 |     <p><sup>3</sup> As of XRegExp 4.1, when the <code>namespacing</code> option is on (it's on by default in XRegExp 5). Stored at <code><em>result</em>.<em>name</em></code> when <code>namespacing</code> is off.<br>
337 |     Note: Within <code><em>string</em>.replace</code> callbacks, stored at: <code>arguments[arguments.length - 1].<em>name</em></code> (with <code>namespacing</code> on) or <code>arguments[0].<em>name</em></code> (with <code>namespacing</code> off).</p>
338 | 
339 |     <p><sup>4</sup> As of XRegExp 3.</p>
340 | 
341 |     <p><sup>5</sup> As of PCRE 7.2.</p>
342 | 
343 |     <p>TODO: Add a column comparing the use of capture names in regex conditionals (not supported by XRegExp).</p>
344 | 
345 | 
346 | 
347 | 
348 | 
349 |   </div>
350 | </div>
351 | <div id="footer">
352 |   <p>&copy; <a href="https://slev.life/">Steven Levithan</a> :: <a href="https://github.com/slevithan/xregexp">GitHub</a> :: <a href="https://xregexp.com/">XRegExp.com</a></p>
353 | </div>
354 | </body>
355 | </html>
356 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # XRegExp
  2 | 
  3 | [![npm version][npm-version-src]][npm-version-href]
  4 | [![npm downloads][npm-downloads-src]][npm-downloads-href]
  5 | 
  6 | > [!TIP]
  7 | > XRegExp was indispensable for heavy regex users in its time, but many of its best features have been adopted into modern JavaScript. Check out [Regex+](https://github.com/slevithan/regex), the lightweight spiritual successor to XRegExp that once again takes JavaScript regexes to the next level.
  8 | 
  9 | XRegExp provides augmented (and extensible) JavaScript regular expressions. You get modern syntax and flags beyond what browsers support natively. XRegExp is also a regex utility belt with tools to make your grepping and parsing easier, while freeing you from regex cross-browser inconsistencies and other annoyances.
 10 | 
 11 | XRegExp supports ES5+ browsers, and you can use it with Node.js or as a RequireJS module. Over the years, many of XRegExp's features have been adopted by new JavaScript standards (named capturing, Unicode properties/scripts/categories, flag `s`, sticky matching, etc.), so using XRegExp can be a way to extend these features into older browsers.
 12 | 
 13 | ## Performance
 14 | 
 15 | XRegExp compiles to native `RegExp` objects. Therefore regexes built with XRegExp perform just as fast as native regular expressions. There is a tiny extra cost when compiling a pattern for the first time.
 16 | 
 17 | ## Named capture breaking change in XRegExp 5
 18 | 
 19 | XRegExp 5 introduced a breaking change where named backreference properties now appear on the result's `groups` object (following ES2018), rather than directly on the result. To restore the old handling so you don't need to update old code, run the following line after importing XRegExp:
 20 | 
 21 | ```js
 22 | XRegExp.uninstall('namespacing');
 23 | ```
 24 | 
 25 | XRegExp 4.1.0 and later allow introducing the new behavior without upgrading to XRegExp 5 by running `XRegExp.install('namespacing')`.
 26 | 
 27 | Following is the most commonly needed change to update code for the new behavior:
 28 | 
 29 | ```js
 30 | // Change this
 31 | const name = XRegExp.exec(str, regexWithNamedCapture).name;
 32 | 
 33 | // To this
 34 | const name = XRegExp.exec(str, regexWithNamedCapture).groups.name;
 35 | ```
 36 | 
 37 | See below for more examples of using named capture with `XRegExp.exec` and `XRegExp.replace`.
 38 | 
 39 | ## Usage examples
 40 | 
 41 | ```js
 42 | // Using named capture and flag x for free-spacing and line comments
 43 | const date = XRegExp(
 44 |     `(?<year>  [0-9]{4} ) -?  # year
 45 |      (?<month> [0-9]{2} ) -?  # month
 46 |      (?<day>   [0-9]{2} )     # day`, 'x');
 47 | 
 48 | // XRegExp.exec provides named backreferences on the result's groups property
 49 | let match = XRegExp.exec('2021-02-22', date);
 50 | match.groups.year; // -> '2021'
 51 | 
 52 | // It also includes optional pos and sticky arguments
 53 | let pos = 3;
 54 | const result = [];
 55 | while (match = XRegExp.exec('<1><2><3>4<5>', /<(\d+)>/, pos, 'sticky')) {
 56 |     result.push(match[1]);
 57 |     pos = match.index + match[0].length;
 58 | }
 59 | // result -> ['2', '3']
 60 | 
 61 | // XRegExp.replace allows named backreferences in replacements
 62 | XRegExp.replace('2021-02-22', date, '$<month>/$<day>/$<year>');
 63 | // -> '02/22/2021'
 64 | XRegExp.replace('2021-02-22', date, (...args) => {
 65 |     // Named backreferences are on the last argument
 66 |     const {day, month, year} = args.at(-1);
 67 |     return `${month}/${day}/${year}`;
 68 | });
 69 | // -> '02/22/2021'
 70 | 
 71 | // XRegExps compile to RegExps and work with native methods
 72 | date.test('2021-02-22');
 73 | // -> true
 74 | // However, named captures must be referenced using numbered backreferences
 75 | // if used with native methods
 76 | '2021-02-22'.replace(date, '$2/$3/$1');
 77 | // -> '02/22/2021'
 78 | 
 79 | // Use XRegExp.forEach to extract every other digit from a string
 80 | const evens = [];
 81 | XRegExp.forEach('1a2345', /\d/, (match, i) => {
 82 |     if (i % 2) evens.push(+match[0]);
 83 | });
 84 | // evens -> [2, 4]
 85 | 
 86 | // Use XRegExp.matchChain to get numbers within <b> tags
 87 | XRegExp.matchChain('1 <b>2</b> 3 <B>4 \n 56</B>', [
 88 |     XRegExp('<b>.*?</b>', 'is'),
 89 |     /\d+/
 90 | ]);
 91 | // -> ['2', '4', '56']
 92 | 
 93 | // You can also pass forward and return specific backreferences
 94 | const html =
 95 |     `<a href="https://xregexp.com/">XRegExp</a>
 96 |      <a href="https://www.google.com/">Google</a>`;
 97 | XRegExp.matchChain(html, [
 98 |     {regex: /<a href="([^"]+)">/i, backref: 1},
 99 |     {regex: XRegExp('(?i)^https?://(?<domain>[^/?#]+)'), backref: 'domain'}
100 | ]);
101 | // -> ['xregexp.com', 'www.google.com']
102 | 
103 | // Merge strings and regexes, with updated backreferences
104 | XRegExp.union(['m+a*n', /(bear)\1/, /(pig)\1/], 'i', {conjunction: 'or'});
105 | // -> /m\+a\*n|(bear)\1|(pig)\2/i
106 | ```
107 | 
108 | These examples give the flavor of what's possible, but XRegExp has more syntax, flags, methods, options, and browser fixes that aren't shown here. You can also augment XRegExp's regular expression syntax with addons (see below) or write your own. See [xregexp.com](https://xregexp.com/) for details.
109 | 
110 | ## Addons
111 | 
112 | You can either load addons individually, or bundle all addons with XRegExp by loading `xregexp-all.js` from https://unpkg.com/xregexp/xregexp-all.js.
113 | 
114 | ### Unicode
115 | 
116 | If not using `xregexp-all.js`, first include the Unicode Base script and then one or more of the addons for Unicode categories, properties, or scripts.
117 | 
118 | Then you can do this:
119 | 
120 | ```js
121 | // Test some Unicode scripts
122 | // Can also use the Script= prefix to match ES2018: \p{Script=Hiragana}
123 | XRegExp('^\\p{Hiragana}+$').test('ひらがな'); // -> true
124 | XRegExp('^[\\p{Latin}\\p{Common}]+$').test('Über Café.'); // -> true
125 | 
126 | // Test the Unicode categories Letter and Mark
127 | // Can also use the short names \p{L} and \p{M}
128 | const unicodeWord = XRegExp.tag()`^\p{Letter}[\p{Letter}\p{Mark}]*$`;
129 | unicodeWord.test('Русский'); // -> true
130 | unicodeWord.test('日本語'); // -> true
131 | unicodeWord.test('العربية'); // -> true
132 | ```
133 | 
134 | By default, `\p{…}` and `\P{…}` support the Basic Multilingual Plane (i.e. code points up to `U+FFFF`). You can opt-in to full 21-bit Unicode support (with code points up to `U+10FFFF`) on a per-regex basis by using flag `A`. This is called *astral mode*. You can automatically add flag `A` for all new regexes by running `XRegExp.install('astral')`. When in astral mode, `\p{…}` and `\P{…}` always match a full code point rather than a code unit, using surrogate pairs for code points above `U+FFFF`.
135 | 
136 | ```js
137 | // Using flag A to match astral code points
138 | XRegExp('^\\p{S}$').test('💩'); // -> false
139 | XRegExp('^\\p{S}$', 'A').test('💩'); // -> true
140 | // Using surrogate pair U+D83D U+DCA9 to represent U+1F4A9 (pile of poo)
141 | XRegExp('^\\p{S}$', 'A').test('\uD83D\uDCA9'); // -> true
142 | 
143 | // Implicit flag A
144 | XRegExp.install('astral');
145 | XRegExp('^\\p{S}$').test('💩'); // -> true
146 | ```
147 | 
148 | Opting in to astral mode disables the use of `\p{…}` and `\P{…}` within character classes. In astral mode, use e.g. `(\pL|[0-9_])+` instead of `[\pL0-9_]+`.
149 | 
150 | XRegExp uses Unicode 14.0.0.
151 | 
152 | ### XRegExp.build
153 | 
154 | Build regular expressions using named subpatterns, for readability and pattern reuse:
155 | 
156 | ```js
157 | const time = XRegExp.build('(?x)^ {{hours}} ({{minutes}}) $', {
158 |     hours: XRegExp.build('{{h12}} : | {{h24}}', {
159 |         h12: /1[0-2]|0?[1-9]/,
160 |         h24: /2[0-3]|[01][0-9]/
161 |     }),
162 |     minutes: /^[0-5][0-9]$/
163 | });
164 | 
165 | time.test('10:59'); // -> true
166 | XRegExp.exec('10:59', time).groups.minutes; // -> '59'
167 | ```
168 | 
169 | Named subpatterns can be provided as strings or regex objects. A leading `^` and trailing unescaped `$` are stripped from subpatterns if both are present, which allows embedding independently-useful anchored patterns. `{{…}}` tokens can be quantified as a single unit. Any backreferences in the outer pattern or provided subpatterns are automatically renumbered to work correctly within the larger combined pattern. The syntax `({{name}})` works as shorthand for named capture via `(?<name>{{name}})`. Named subpatterns cannot be embedded within character classes.
170 | 
171 | #### XRegExp.tag (included with XRegExp.build)
172 | 
173 | Provides tagged template literals that create regexes with XRegExp syntax and flags:
174 | 
175 | ```js
176 | XRegExp.tag()`\b\w+\b`.test('word'); // -> true
177 | 
178 | const hours = /1[0-2]|0?[1-9]/;
179 | const minutes = /(?<minutes>[0-5][0-9])/;
180 | const time = XRegExp.tag('x')`\b ${hours} : ${minutes} \b`;
181 | time.test('10:59'); // -> true
182 | XRegExp.exec('10:59', time).groups.minutes; // -> '59'
183 | 
184 | const backref1 = /(a)\1/;
185 | const backref2 = /(b)\1/;
186 | XRegExp.tag()`${backref1}${backref2}`.test('aabb'); // -> true
187 | ```
188 | 
189 | `XRegExp.tag` does more than just interpolation. You get all the XRegExp syntax and flags, and since it reads patterns as raw strings, you no longer need to escape all your backslashes. `XRegExp.tag` also uses `XRegExp.build` under the hood, so you get all of its extras for free. Leading `^` and trailing unescaped `$` are stripped from interpolated patterns if both are present (to allow embedding independently useful anchored regexes), interpolating into a character class is an error (to avoid unintended meaning in edge cases), interpolated patterns are treated as atomic units when quantified, interpolated strings have their special characters escaped, and any backreferences within an interpolated regex are rewritten to work within the overall pattern.
190 | 
191 | ### XRegExp.matchRecursive
192 | 
193 | A robust and flexible API for matching recursive constructs using XRegExp pattern strings as left and right delimiters:
194 | 
195 | ```js
196 | const str1 = '(t((e))s)t()(ing)';
197 | XRegExp.matchRecursive(str1, '\\(', '\\)', 'g');
198 | // -> ['t((e))s', '', 'ing']
199 | 
200 | // Extended information mode with valueNames
201 | const str2 = 'Here is <div> <div>an</div></div> example';
202 | XRegExp.matchRecursive(str2, '<div\\s*>', '</div>', 'gi', {
203 |     valueNames: ['between', 'left', 'match', 'right']
204 | });
205 | /* -> [
206 | {name: 'between', value: 'Here is ',       start: 0,  end: 8},
207 | {name: 'left',    value: '<div>',          start: 8,  end: 13},
208 | {name: 'match',   value: ' <div>an</div>', start: 13, end: 27},
209 | {name: 'right',   value: '</div>',         start: 27, end: 33},
210 | {name: 'between', value: ' example',       start: 33, end: 41}
211 | ] */
212 | 
213 | // Omitting unneeded parts with null valueNames, and using escapeChar
214 | const str3 = '...{1}.\\{{function(x,y){return {y:x}}}';
215 | XRegExp.matchRecursive(str3, '{', '}', 'g', {
216 |     valueNames: ['literal', null, 'value', null],
217 |     escapeChar: '\\'
218 | });
219 | /* -> [
220 | {name: 'literal', value: '...',  start: 0, end: 3},
221 | {name: 'value',   value: '1',    start: 4, end: 5},
222 | {name: 'literal', value: '.\\{', start: 6, end: 9},
223 | {name: 'value',   value: 'function(x,y){return {y:x}}', start: 10, end: 37}
224 | ] */
225 | 
226 | // Sticky mode via flag y
227 | const str4 = '<1><<<2>>><3>4<5>';
228 | XRegExp.matchRecursive(str4, '<', '>', 'gy');
229 | // -> ['1', '<<2>>', '3']
230 | 
231 | // Skipping unbalanced delimiters instead of erroring
232 | const str5 = 'Here is <div> <div>an</div> unbalanced example';
233 | XRegExp.matchRecursive(str5, '<div\\s*>', '</div>', 'gi', {
234 |     unbalanced: 'skip'
235 | });
236 | // -> ['an']
237 | ```
238 | 
239 | By default, `XRegExp.matchRecursive` throws an error if it scans past an unbalanced delimiter in the target string. Multiple alternative options are available for handling unbalanced delimiters.
240 | 
241 | ## Installation and usage
242 | 
243 | In browsers (bundle XRegExp with all of its addons):
244 | 
245 | ```html
246 | <script src="https://unpkg.com/xregexp/xregexp-all.js"></script>
247 | ```
248 | 
249 | Using [npm](https://www.npmjs.com/):
250 | 
251 | ```bash
252 | npm install xregexp
253 | ```
254 | 
255 | In [Node.js](https://nodejs.org/en/):
256 | 
257 | ```js
258 | const XRegExp = require('xregexp');
259 | ```
260 | 
261 | <!--
262 | ## Contribution guide
263 | 
264 | 1. Fork the repository and clone the forked version locally.
265 | 2. Ensure you have the `typescript` module installed globally.
266 | 3. Run `npm install`.
267 | 4. Ensure all tests pass with `npm test`.
268 | 5. Add tests for new functionality or that fail from the bug not fixed.
269 | 6. Implement functionality or bug fix to pass the test.
270 | -->
271 | 
272 | ## Credits
273 | 
274 | XRegExp project collaborators are:
275 | 
276 | - [Steven Levithan](https://blog.stevenlevithan.com/)
277 | - [Joseph Frazier](https://github.com/josephfrazier)
278 | - [Mathias Bynens](https://mathiasbynens.be/)
279 | 
280 | Thanks to all contributors and others who have submitted code, provided feedback, reported bugs, and inspired new features.
281 | 
282 | XRegExp is released under the [MIT License](https://mit-license.org/). Learn more at [xregexp.com](https://xregexp.com/).
283 | 
284 | <!-- Badges -->
285 | 
286 | [npm-version-src]: https://img.shields.io/npm/v/xregexp?color=78C372
287 | [npm-version-href]: https://npmjs.com/package/xregexp
288 | [npm-downloads-src]: https://img.shields.io/npm/dm/xregexp?color=78C372
289 | [npm-downloads-href]: https://npmjs.com/package/xregexp
290 | 


--------------------------------------------------------------------------------
/tests/perf/perf.js:
--------------------------------------------------------------------------------
  1 | (function() {
  2 |     var outputBox = document.getElementById('log');
  3 |     var suites = [];
  4 | 
  5 |     // Used to skip 21-bit Unicode tests when running older XRegExp versions
  6 |     var hasAstralSupport = parseInt(XRegExp.version, 10) >= 3;
  7 |     // The `cache.flush` method was added in v3
  8 |     XRegExp.cache.flush = XRegExp.cache.flush || function() {};
  9 |     // The `install` and `uninstall` methods were added in v2
 10 |     XRegExp.install = XRegExp.install || function() {};
 11 |     XRegExp.uninstall = XRegExp.uninstall || function() {};
 12 |     // The `exec` method was renamed from `execAt` in v2
 13 |     XRegExp.exec = XRegExp.exec || XRegExp.execAt;
 14 | 
 15 |     function log(msg) {
 16 |         outputBox.insertAdjacentHTML('beforeend', msg.replace(/\n/g, '<br>'));
 17 |     }
 18 |     function scrollToEnd() {
 19 |         window.scroll(0, document.body.scrollHeight);
 20 |     }
 21 | 
 22 |     var suiteOptions = {
 23 |         onStart: function() {
 24 |             log('\n' + this.name + ':');
 25 |         },
 26 | 
 27 |         onCycle: function(event) {
 28 |             log('\n' + String(event.target));
 29 |             scrollToEnd();
 30 |         },
 31 | 
 32 |         onComplete: function() {
 33 |             log('\nFastest is ' + this.filter('fastest').map('name') + '\n');
 34 |             // Remove current suite from queue
 35 |             suites.shift();
 36 |             if (suites.length) {
 37 |                 // Run next suite
 38 |                 suites[0].run();
 39 |             } else {
 40 |                 log('\nFinished. &#x263A;');
 41 |             }
 42 |             scrollToEnd();
 43 |         }
 44 |     };
 45 | 
 46 |     // run async
 47 |     var benchmarkOptions = {
 48 |         async: true
 49 |     };
 50 | 
 51 |     // Expose as global
 52 |     window.run = function() {
 53 |         log('Testing XRegExp ' + XRegExp.version + '.\n');
 54 |         log('Sit back and relax. This might take a while.\n');
 55 |         suites[0].run();
 56 |     };
 57 | 
 58 |     /*--------------------------------------
 59 |      *  Start of perf suites
 60 |      *------------------------------------*/
 61 | 
 62 |     (function() {
 63 |         var configs = [
 64 |             {
 65 |                 name: 'Constructor with short pattern',
 66 |                 pattern: '^([.])\\1+$'
 67 |             },
 68 |             {
 69 |                 name: 'Constructor with medium pattern',
 70 |                 pattern: '^([.])\\1+$ this is a test of a somewhat longer pattern'
 71 |             },
 72 |             {
 73 |                 name: 'Constructor with long pattern',
 74 |                 pattern: XRegExp('\\p{L}').source
 75 |             },
 76 |             {
 77 |                 name: 'Constructor with x flag, whitespace, and comments',
 78 |                 pattern: '\n                       # comment\n                       # comment\n',
 79 |                 flags: 'x'
 80 |             }
 81 |         ];
 82 | 
 83 |         configs.forEach(function(config) {
 84 |             var flags = config.flags || '';
 85 |             var allFlagsNative = /^[gimuy]*$/.test(flags);
 86 | 
 87 |             var suite = new Benchmark.Suite(config.name, suiteOptions)
 88 |                 .add('XRegExp with pattern cache flush', function() {
 89 |                     XRegExp(config.pattern, flags);
 90 |                     XRegExp.cache.flush('patterns');
 91 |                 }, benchmarkOptions)
 92 |                 .add('XRegExp', function() {
 93 |                     XRegExp(config.pattern, flags);
 94 |                 }, benchmarkOptions)
 95 |                 .add('XRegExp.cache', function() {
 96 |                     XRegExp.cache(config.pattern, flags);
 97 |                 }, benchmarkOptions);
 98 |             if (allFlagsNative) {
 99 |                 suite.add('RegExp', function() {
100 |                     new RegExp(config.pattern, flags);
101 |                 }, benchmarkOptions);
102 |             }
103 | 
104 |             suites.push(suite);
105 |         });
106 |     }());
107 | 
108 |     (function() {
109 |         var regexG = /(((?=x).)\2)+/g;
110 |         var str = Array(30 + 1).join('hello world x ') + 'xx!';
111 |         var pos = 5;
112 | 
113 |         suites.push(new Benchmark.Suite('exec', suiteOptions)
114 |             .add('Native exec', function() {
115 |                 regexG.lastIndex = pos;
116 |                 regexG.exec(str);
117 |             }, benchmarkOptions)
118 |             .add('XRegExp.exec', function() {
119 |                 XRegExp.exec(str, regexG, pos);
120 |             }, benchmarkOptions)
121 |         );
122 | 
123 |         var numStrs = 2e5;
124 |         var strs = [];
125 |         var i;
126 | 
127 |         // Use lots of different strings to remove the benefit of Opera's regex/string match cache
128 |         for (i = 0; i < numStrs; ++i) {
129 |             strs.push(str + i);
130 |         }
131 | 
132 |         suites.push(new Benchmark.Suite('exec with ' + numStrs + ' different strings', suiteOptions)
133 |             .add('Native exec', function() {
134 |                 regexG.lastIndex = pos;
135 |                 regexG.exec(strs[++i] || strs[i = 0]);
136 |             }, benchmarkOptions)
137 |             .add('XRegExp.exec', function() {
138 |                 XRegExp.exec(strs[++i] || strs[i = 0], regexG, pos);
139 |             }, benchmarkOptions)
140 |         );
141 | 
142 |         suites.push(new Benchmark.Suite('Sticky exec with ' + numStrs + ' different strings', suiteOptions)
143 |             .add('Native exec', function() {
144 |                 regexG.lastIndex = pos;
145 |                 var match = regexG.exec(strs[++i] || strs[i = 0]);
146 |                 if (match && match.index !== pos) {
147 |                     match = null;
148 |                 }
149 |             }, benchmarkOptions)
150 |             .add('XRegExp.exec', function() {
151 |                 var match = XRegExp.exec(strs[++i] || strs[i = 0], regexG, pos, 'sticky'); // eslint-disable-line no-unused-vars
152 |             }, benchmarkOptions)
153 |         );
154 |     }());
155 | 
156 |     (function() {
157 |         var str = Array(30 + 1).join('hello xx world ');
158 | 
159 |         suites.push(Benchmark.Suite('Iteration with a nonglobal regex', suiteOptions)
160 |             .add('replace with callback', function() {
161 |                 var r = /^|(((?=x).)\2)+/;
162 |                 var matches = [];
163 |                 if (!r.global) {
164 |                     // globalize
165 |                     r = new RegExp(
166 |                         r.source,
167 |                         'g' +
168 |                             (r.ignoreCase ? 'i' : '') +
169 |                             (r.multiline ? 'm' : '') +
170 |                             (r.unicode ? 'u' : '') +
171 |                             (r.sticky ? 'y' : '')
172 |                     );
173 |                 }
174 |                 str.replace(r, function(match) {
175 |                     matches.push(match);
176 |                 });
177 |             }, benchmarkOptions)
178 |             .add('while/exec', function() {
179 |                 var r = /^|(((?=x).)\2)+/;
180 |                 var matches = [];
181 |                 var match;
182 |                 if (r.global) {
183 |                     r.lastIndex = 0;
184 |                 } else {
185 |                     // globalize
186 |                     r = new RegExp(
187 |                         r.source,
188 |                         'g' +
189 |                             (r.ignoreCase ? 'i' : '') +
190 |                             (r.multiline ? 'm' : '') +
191 |                             (r.unicode ? 'u' : '') +
192 |                             (r.sticky ? 'y' : '')
193 |                     );
194 |                 }
195 |                 while (match = r.exec(str)) { // eslint-disable-line no-cond-assign
196 |                     matches.push(match[0]);
197 |                     if (r.lastIndex === match.index) {
198 |                         ++r.lastIndex;
199 |                     }
200 |                 }
201 |             }, benchmarkOptions)
202 |             .add('while/XRegExp.exec', function() {
203 |                 var r = /^|(((?=x).)\2)+/;
204 |                 var matches = [];
205 |                 var match;
206 |                 var pos = 0;
207 |                 while (match = XRegExp.exec(str, r, pos)) { // eslint-disable-line no-cond-assign
208 |                     matches.push(match[0]);
209 |                     pos = match.index + (match[0].length || 1);
210 |                 }
211 |             }, benchmarkOptions)
212 |             .add('XRegExp.forEach', function() {
213 |                 var r = /^|(((?=x).)\2)+/;
214 |                 var matches = [];
215 |                 XRegExp.forEach(str, r, function(match) {
216 |                     matches.push(match[0]);
217 |                 });
218 |             }, benchmarkOptions)
219 |         );
220 |     }());
221 | 
222 |     (function() {
223 |         var str = Array(30 + 1).join('hello world ') + 'http://xregexp.com/path/to/file?q=1';
224 |         var pattern = '\\b([^:/?\\s]+)://([^/?\\s]+)([^?\\s]*)\\??([^\\s]*)';
225 |         var regexp = new RegExp(pattern);
226 |         var xregexp = XRegExp(pattern);
227 | 
228 |         suites.push(new Benchmark.Suite('Regex object type', suiteOptions)
229 |             .add('RegExp object', function() {
230 |                 regexp.exec(str);
231 |             }, benchmarkOptions)
232 |             .add('XRegExp object', function() {
233 |                 xregexp.exec(str);
234 |             }, benchmarkOptions)
235 |         );
236 | 
237 |         var xregexpNamed4 =
238 |             XRegExp('\\b(?<scheme> [^:/?\\s]+ ) ://   # aka protocol   \n' +
239 |                     '   (?<host>   [^/?\\s]+  )       # domain name/IP \n' +
240 |                     '   (?<path>   [^?\\s]*   ) \\??  # optional path  \n' +
241 |                     '   (?<query>  [^\\s]*    )       # optional query', 'x');
242 |         var xregexpNamed1 =
243 |             XRegExp('\\b(?<scheme> [^:/?\\s]+ ) ://   # aka protocol   \n' +
244 |                     '   (          [^/?\\s]+  )       # domain name/IP \n' +
245 |                     '   (          [^?\\s]*   ) \\??  # optional path  \n' +
246 |                     '   (          [^\\s]*    )       # optional query', 'x');
247 |         var xregexpNumbered =
248 |             XRegExp('\\b(          [^:/?\\s]+ ) ://   # aka protocol   \n' +
249 |                     '   (          [^/?\\s]+  )       # domain name/IP \n' +
250 |                     '   (          [^?\\s]*   ) \\??  # optional path  \n' +
251 |                     '   (          [^\\s]*    )       # optional query', 'x');
252 | 
253 |         suites.push(new Benchmark.Suite('Capturing', suiteOptions)
254 |             .add('Numbered capture', function() {
255 |                 XRegExp.exec(str, xregexpNumbered);
256 |             }, benchmarkOptions)
257 |             .add('Named capture (one name)', function() {
258 |                 XRegExp.exec(str, xregexpNamed1);
259 |             }, benchmarkOptions)
260 |             .add('Named capture (four names)', function() {
261 |                 XRegExp.exec(str, xregexpNamed4);
262 |             }, benchmarkOptions)
263 |         );
264 |     }());
265 | 
266 |     suites.push(new Benchmark.Suite('Unicode letter construction', suiteOptions)
267 |         .add('Incomplete set: /[a-z]/i', function() {
268 |             XRegExp('(?i)[a-z]');
269 |             XRegExp.cache.flush('patterns');
270 |         }, benchmarkOptions)
271 |         .add('BMP only: /\\p{L}/', function() {
272 |             XRegExp('\\p{L}');
273 |             XRegExp.cache.flush('patterns');
274 |         }, benchmarkOptions)
275 |         .add('Full Unicode: /\\p{L}/A', (hasAstralSupport ?
276 |             function() {
277 |                 XRegExp('(?A)\\p{L}');
278 |                 XRegExp.cache.flush('patterns');
279 |             } :
280 |             function() {
281 |                 throw new Error('Astral mode unsupported');
282 |             }
283 |         ), benchmarkOptions)
284 |     );
285 | 
286 |     (function() {
287 |         var asciiText = 'Now is the time for all good men to come to the aid of the party!';
288 |         var mixedText = 'We are looking for a letter/word followed by an exclamation mark, ☃ ☃ ☃ ☃ ☃ and δοκεῖ δέ μοι καὶ Καρχηδόνα μὴ εἶναι!';
289 |         var unicodeText = 'Зоммерфельд получил ряд важных результатов в рамках «старой квантовой теории», предшествовавшей появлению современной квантовой механики!';
290 |         var unicodeText2 = 'როგორც სამედიცინო ფაკულტეტის ახალგაზრდა სტუდენტი, გევარა მთელს ლათინურ ამერიკაში მოგზაურობდა და იგი სწრაფად!';
291 | 
292 |         function test(regex) {
293 |             regex.test(asciiText);
294 |             regex.test(mixedText);
295 |             regex.test(unicodeText);
296 |             regex.test(unicodeText2);
297 |         }
298 | 
299 |         var azCaselessChar = XRegExp('(?i)[a-z]!');
300 |         var bmpLetterChar = XRegExp('\\p{L}!');
301 |         var astralLetterChar = hasAstralSupport ? XRegExp('(?A)\\p{L}!') : null;
302 | 
303 |         suites.push(new Benchmark.Suite('Unicode letter matching', suiteOptions)
304 |             .add('a-z caseless', function() {
305 |                 test(azCaselessChar);
306 |             }, benchmarkOptions)
307 |             .add('\\p{L}', function() {
308 |                 test(bmpLetterChar);
309 |             }, benchmarkOptions)
310 |             .add('\\p{L} astral', (hasAstralSupport ?
311 |                 function() {
312 |                     test(astralLetterChar);
313 |                 } :
314 |                 function() {
315 |                     throw new Error('Astral mode unsupported');
316 |                 }), benchmarkOptions
317 |             )
318 |         );
319 | 
320 |         var azCaselessWord = XRegExp('(?i)[a-z]+!');
321 |         var bmpLetterWord = XRegExp('\\p{L}+!');
322 |         var astralLetterWord = hasAstralSupport ? XRegExp('(?A)\\p{L}+!') : null;
323 | 
324 |         suites.push(new Benchmark.Suite('Unicode word matching', suiteOptions)
325 |             .add('a-z caseless', function() {
326 |                 test(azCaselessWord);
327 |             }, benchmarkOptions)
328 |             .add('\\p{L}', function() {
329 |                 test(bmpLetterWord);
330 |             }, benchmarkOptions)
331 |             .add('\\p{L} astral', (hasAstralSupport ?
332 |                 function() {
333 |                     test(astralLetterWord);
334 |                 } :
335 |                 function() {
336 |                     throw new Error('Astral mode unsupported');
337 |                 }), benchmarkOptions
338 |             )
339 |         );
340 |     }());
341 | }());
342 | 


--------------------------------------------------------------------------------
/docs/flags/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 |   <meta charset="utf-8" />
  5 |   <title>New flags :: XRegExp</title>
  6 |   <link href="../assets/index.css" rel="stylesheet" type="text/css" />
  7 | </head>
  8 | <body>
  9 | <div id="header">
 10 |   <h1><a href="../index.html"><span id="logoX">X</span>RegExp</a></h1>
 11 |   <h1 class="subtitle">The one of a kind JavaScript regular expression library</h1>
 12 | </div>
 13 | <div id="body">
 14 |   <div id="navBar">
 15 |     <div class="menu">
 16 |       <ul>
 17 |         <li><a href="../index.html">Home</a></li>
 18 |         <li><a href="../api/index.html">API</a></li>
 19 |         <li><a href="../syntax/index.html">New syntax</a></li>
 20 |         <li><a href="../flags/index.html" class="selected">New flags</a></li>
 21 |         <li><a href="../unicode/index.html">Unicode</a></li>
 22 |       </ul>
 23 |     </div>
 24 |   </div>
 25 |   <div id="main">
 26 | 
 27 | 
 28 | 
 29 | 
 30 | 
 31 |     <div id="tocContainer">
 32 |       <div id="toc">
 33 |         <h2>Table of contents</h2>
 34 |         <ul>
 35 |           <li><a href="#about">About flags</a></li>
 36 |           <li><a href="#explicitCapture">Named capture only (n)</a></li>
 37 |           <li><a href="#singleline">Dot matches all (s)</a></li>
 38 |           <li><a href="#extended">Free-spacing and line comments (x)</a></li>
 39 |           <li><a href="#astral">21-bit Unicode properties (A)</a></li>
 40 |         </ul>
 41 |       </div>
 42 |     </div>
 43 | 
 44 |     <h1>New flags</h1>
 45 | 
 46 |     <h2 id="about">About flags</h2>
 47 | 
 48 |     <p>XRegExp provides four new flags (<code>n</code>, <code>s</code>, <code>x</code>, <code>A</code>), which can be combined with native flags and arranged in any order. Unlike native flags, non-native flags do not show up as properties on regular expression objects.</p>
 49 | 
 50 |     <ul>
 51 |       <li><strong>New flags</strong>
 52 |         <ul>
 53 |           <li><strong><code>n</code></strong> &mdash; Named capture only</li>
 54 |           <li><strong><code>s</code></strong> &mdash; Dot matches all (<em>singleline</em>) &mdash; <em>Added as a native flag in ES2018, but XRegExp always supports it</em></li>
 55 |           <li><strong><code>x</code></strong> &mdash; Free-spacing and line comments (<em>extended</em>)</li>
 56 |           <li><strong><code>A</code></strong> &mdash; 21-bit Unicode properties (<em>astral</em>) &mdash; <em>Requires the Unicode Base addon</em></li>
 57 |         </ul>
 58 |       </li>
 59 |       <li><strong>Native flags</strong>
 60 |         <ul>
 61 |           <li><strong><code>g</code></strong> &mdash; All matches, or advance <code>lastIndex</code> after matches (<code>global</code>)</li>
 62 |           <li><strong><code>i</code></strong> &mdash; Case insensitive (<code>ignoreCase</code>)</li>
 63 |           <li><strong><code>m</code></strong> &mdash; <code>^</code> and <code>$</code> match at newlines (<code>multiline</code>)</li>
 64 |           <li><strong><code>u</code></strong> &mdash; Handle surrogate pairs as code points and enable <code>\u{&hellip;}</code> and <code>\p{&hellip;}</code> (<code>unicode</code>) &mdash; <em>Requires native ES6 support</em></li>
 65 |           <li><strong><code>y</code></strong> &mdash; Matches must start at <code>lastIndex</code> (<code>sticky</code>) &mdash; <em>Requires Firefox 3+ or native ES6 support</em></li>
 66 |           <li><strong><code>d</code></strong> &mdash; Include indices for capturing groups on match results (<code>hasIndices</code>) &mdash; <em>Requires native ES2021 support</em></li>
 67 |         </ul>
 68 |       </li>
 69 |     </ul>
 70 | 
 71 | 
 72 |     <h2 id="explicitCapture">Named capture only <span class="plain">(<code>n</code>)</span></h2>
 73 | 
 74 |     <p>Specifies that the only captures are explicitly named groups of the form <code>(?&lt;name>&hellip;)</code>. This allows unnamed <code>(&hellip;)</code> parentheses to act as noncapturing groups without the syntactic clumsiness of the expression <code>(?:&hellip;)</code>.</p>
 75 | 
 76 |     <h3>Annotations</h3>
 77 |     <ul>
 78 |       <li><strong>Rationale:</strong> Backreference capturing adds performance overhead and is needed far less often than simple grouping. The <code>n</code> flag frees the <code>(&hellip;)</code> syntax from its often-undesired capturing side effect, while still allowing explicitly-named capturing groups.</li>
 79 |       <li><strong>Compatibility:</strong> No known problems; the <code>n</code> flag is illegal in native JavaScript regular expressions.</li>
 80 |       <li><strong>Prior art:</strong> The <code>n</code> flag comes from .NET, where it's called "explicit capture."</li>
 81 |     </ul>
 82 | 
 83 | 
 84 |     <h2 id="singleline">Dot matches all <span class="plain">(<code>s</code>)</span></h2>
 85 | 
 86 |     <!--<div class="aside right">
 87 |       <p>The now abandoned ES4 proposals called for recognizing the <a href="https://en.wikipedia.org/wiki/C0_and_C1_control_codes">C1</a>/Unicode NEL "next line" control code (<code>U+0085</code>) as an additional newline code point in that standard.</p>
 88 |     </div>-->
 89 | 
 90 |     <!--<div class="aside right">
 91 |       <p>To make unescaped dots outside of character classes match any code point rather than code unit, you can use <a href="https://gist.github.com/2921183">this tiny XRegExp addon</a>.</p>
 92 |     </div>-->
 93 | 
 94 |     <p>Usually, a dot does not match newlines. However, a mode in which dots match any code unit (including newlines) can be as useful as one where dots don't. The <code>s</code> flag allows the mode to be selected on a per-regex basis. Escaped dots (<code>\.</code>) and dots within character classes (<code>[.]</code>) are always equivalent to literal dots. The newline code points are as follows:</p>
 95 | 
 96 |     <ul>
 97 |       <li><code>U+000A</code> &mdash; Line feed &mdash; <code>\n</code></li>
 98 |       <li><code>U+000D</code> &mdash; Carriage return &mdash; <code>\r</code></li>
 99 |       <li><code>U+2028</code> &mdash; Line separator</li>
100 |       <li><code>U+2029</code> &mdash; Paragraph separator</li>
101 |     </ul>
102 | 
103 |     <h3>Annotations</h3>
104 |     <ul>
105 |       <li><strong>Rationale:</strong> All popular Perl-style regular expression flavors except JavaScript (prior to ES2018) include a flag that allows dots to match newlines. Without this mode, matching any single code unit requires, e.g., <code>[\s\S]</code>, <code>[\0-\uFFFF]</code>, <code>[^]</code> (JavaScript only; doesn't work in some browsers without XRegExp), or god forbid <code>(.|\s)</code> (which requires unnecessary backtracking).</li>
106 |       <li><strong>Compatibility:</strong> No known problems; the <code>s</code> flag is illegal in native JavaScript regular expressions prior to ES2018.</li>
107 |       <li><strong>Prior art:</strong> The <code>s</code> flag comes from Perl.</li>
108 |     </ul>
109 | 
110 |     <div class="aside">
111 |       <p>When using XRegExp's <a href="../unicode/index.html">Unicode Properties addon</a>, you can match any code point without using the <code>s</code> flag via <code>\p{Any}</code>.</p>
112 |     </div>
113 | 
114 | 
115 |     <h2 id="extended">Free-spacing and line comments <span class="plain">(<code>x</code>)</span></h2>
116 | 
117 |     <p>This flag has two complementary effects. First, it causes all whitespace recognized natively by <code>\s</code> to be ignored, so you can free-format the regex pattern for readability. Second, it allows comments with a leading <code>#</code>. Specifically, it turns whitespace into an "ignore me" metacharacter, and <code>#</code> into an "ignore me and everything else up to the next newline" metacharacter. They aren't taken as metacharacters within character classes (which means that classes are <em>not</em> free-format even with <code>x</code>, following precedent from most other regex libraries that support <code>x</code>), and as with other metacharacters, you can escape whitespace and <code>#</code> that you want to be taken literally. Of course, you can always use <code>\s</code> to match whitespace.</p>
118 | 
119 |     <div class="aside">
120 |       <p>It might be better to think of whitespace and comments as do-nothing (rather than ignore-me) metacharacters. This distinction is important with something like <code>\12&nbsp;3</code>, which with the <code>x</code> flag is taken as <code>\12</code> followed by <code>3</code>, and not <code>\123</code>. However, quantifiers following whitespace or comments apply to the preceeding token, so <code>x&nbsp;+</code> is equivalent to <code>x+</code>.</p>
121 |     </div>
122 | 
123 |     <p>The ignored whitespace characters are those matched natively by <code>\s</code>. ES3 whitespace is based on Unicode 2.1.0 or later. ES5 whitespace is based on Unicode 3.0.0 or later, plus <code>U+FEFF</code>. Following are the code points that should be matched by <code>\s</code> according to ES5 and Unicode 4.0.1:</p>
124 | 
125 |     <ul style="-webkit-column-count:3; -moz-column-count:3; column-count:3;">
126 |       <li><code>U+0009</code> &mdash; Tab &mdash; <code>\t</code></li>
127 |       <li><code>U+000A</code> &mdash; Line feed &mdash; <code>\n</code></li>
128 |       <li><code>U+000B</code> &mdash; Vertical tab &mdash; <code>\v</code></li>
129 |       <li><code>U+000C</code> &mdash; Form feed &mdash; <code>\f</code></li>
130 |       <li><code>U+000D</code> &mdash; Carriage return &mdash; <code>\r</code></li>
131 |       <li><code>U+0020</code> &mdash; Space</li>
132 |       <li><code>U+00A0</code> &mdash; No-break space</li>
133 |       <li><code>U+1680</code> &mdash; Ogham space mark</li>
134 |       <li><code>U+180E</code> &mdash; Mongolian vowel separator</li>
135 |       <li><code>U+2000</code> &mdash; En quad</li>
136 |       <li><code>U+2001</code> &mdash; Em quad</li>
137 |       <li><code>U+2002</code> &mdash; En space</li>
138 |       <li><code>U+2003</code> &mdash; Em space</li>
139 |       <li><code>U+2004</code> &mdash; Three-per-em space</li>
140 |       <li><code>U+2005</code> &mdash; Four-per-em space</li>
141 |       <li><code>U+2006</code> &mdash; Six-per-em space</li>
142 |       <li><code>U+2007</code> &mdash; Figure space</li>
143 |       <li><code>U+2008</code> &mdash; Punctuation space</li>
144 |       <li><code>U+2009</code> &mdash; Thin space</li>
145 |       <li><code>U+200A</code> &mdash; Hair space</li>
146 |       <li><code>U+2028</code> &mdash; Line separator</li>
147 |       <li><code>U+2029</code> &mdash; Paragraph separator</li>
148 |       <li><code>U+202F</code> &mdash; Narrow no-break space</li>
149 |       <li><code>U+205F</code> &mdash; Medium mathematical space</li>
150 |       <li><code>U+3000</code> &mdash; Ideographic space</li>
151 |       <li><code>U+FEFF</code> &mdash; Zero width no-break space</li>
152 |     </ul>
153 | 
154 |     <h3>Annotations</h3>
155 |     <ul>
156 |       <li><strong>Rationale:</strong> Regular expressions are notoriously hard to read; adding whitespace and comments makes regular expressions easier to read.</li>
157 |       <li><strong>Compatibility:</strong> No known problems; the <code>x</code> flag is illegal in native JavaScript regular expressions.</li>
158 |       <li><strong>Prior art:</strong> The <code>x</code> flag comes from Perl, and was originally inspired by Jeffrey Friedl's pretty-printing of complex regexes.</li>
159 |     </ul>
160 | 
161 |     <div class="aside">
162 |       <p>Unicode 1.1.5&ndash;4.0.0 assigned code point <code>U+200B</code> (ZWSP) to the <code>Zs</code> (Space separator) category, which means that some browsers or regex engines might include this additional code point in those matched by <code>\s</code>, etc. Unicode 4.0.1 moved ZWSP to the <code>Cf</code> (Format) category.</p>
163 | 
164 |       <p>Unicode 1.1.5 assigned code point <code>U+FEFF</code> (ZWNBSP) to the <code>Zs</code> category. Unicode 2.0.14 moved ZWNBSP to the <code>Cf</code> category. ES5 explicitly includes ZWNBSP in its list of whitespace characters, even though this does not match any version of the Unicode standard since 1996.</p>
165 | 
166 |       <p><code>U+180E</code> (Mongolian vowel separator) was introduced in Unicode 3.0.0, which assigned it the <code>Cf</code> category. Unicode 4.0.0 moved it into the <code>Zs</code> category, and Unicode 6.3.0 moved it back to the <code>Cf</code> category.</p>
167 |     </div>
168 | 
169 |     <div class="aside">
170 |       <p>JavaScript's <code>\s</code> is similar but not equivalent to <code>\p{Z}</code> (the Separator category) from regex libraries that support Unicode categories, including XRegExp's own <a href="../unicode/index.html">Unicode Categories addon</a>. The difference is that <code>\s</code> includes code points <code>U+0009</code>&ndash;<code>U+000D</code> and <code>U+FEFF</code>, which are not assigned the Separator category in the Unicode character database.</p>
171 | 
172 |       <p>JavaScript's <code>\s</code> is nearly equivalent to <code>\p{White_Space}</code> from the <a href="../unicode/index.html">Unicode Properties addon</a>. The differences are: 1. <code>\p{White_Space}</code> does not include <code>U+FEFF</code> (ZWNBSP), and 2. <code>\p{White_Space}</code> includes <code>U+0085</code> (NEL), which is not assigned the Separator category in the Unicode character database.</p>
173 | 
174 |       <p>Aside: Not all JavaScript regex syntax is Unicode-aware. According to JavaScript specs, <code>\s</code>, <code>\S</code>, <code>.</code>, <code>^</code>, and <code>$</code> use Unicode-based interpretations of <em>whitespace</em> and <em>newline</em>, while <code>\d</code>, <code>\D</code>, <code>\w</code>, <code>\W</code>, <code>\b</code>, and <code>\B</code> use ASCII-only interpretations of <em>digit</em>, <em>word character</em>, and <em>word boundary</em><!-- (e.g., <code>/a\b/.test("na&iuml;ve")</code> returns <code>true</code>)-->. Some browsers and browser versions get aspects of these details wrong.</p>
175 | 
176 |       <p>For more details, see <a href="https://blog.stevenlevithan.com/archives/javascript-regex-and-unicode"><em>JavaScript, Regex, and Unicode</em></a>.</p>
177 |     </div>
178 | 
179 | 
180 |     <h2 id="astral">21-bit Unicode properties <span class="plain">(<code>A</code>)</span></h2>
181 | 
182 |     <p><strong>Requires the <a href="../unicode/index.html">Unicode Base</a> addon.</strong></p>
183 | 
184 |     <p>By default, <code>\p{&hellip;}</code> and <code>\P{&hellip;}</code> support the Basic Multilingual Plane (i.e. code points up to <code>U+FFFF</code>). You can opt-in to full 21-bit Unicode support (with code points up to <code>U+10FFFF</code>) on a per-regex basis by using flag <code>A</code>. In XRegExp, this is called <em>astral mode</em>. You can automatically add flag <code>A</code> for all new regexes by running <code>XRegExp.install('astral')</code>. When in astral mode, <code>\p{&hellip;}</code> and <code>\P{&hellip;}</code> always match a full code point rather than a code unit, using surrogate pairs for code points above <code>U+FFFF</code>.</p>
185 | 
186 | <pre class="sh_javascript">// Using flag A to match astral code points
187 | XRegExp('^\\p{S}$').test('💩'); // -> false
188 | XRegExp('^\\p{S}$', 'A').test('💩'); // -> true
189 | XRegExp('(?A)^\\p{S}$').test('💩'); // -> true
190 | // Using surrogate pair U+D83D U+DCA9 to represent U+1F4A9 (pile of poo)
191 | XRegExp('(?A)^\\p{S}$').test('\uD83D\uDCA9'); // -> true
192 | 
193 | // Implicit flag A
194 | XRegExp.install('astral');
195 | XRegExp('^\\p{S}$').test('💩'); // -> true
196 | </pre>
197 | 
198 |     <p><strong>Important:</strong> Opting in to astral mode disables the use of <code>\p{&hellip;}</code> and <code>\P{&hellip;}</code> within character classes. In astral mode, use e.g. <code>(\p{L}|[0-9_])+</code> instead of <code>[\p{L}0-9_]+</code>.</p>
199 | 
200 |     <h3>Annotations</h3>
201 |     <ul>
202 |       <li><strong>Rationale:</strong> Astral code point matching uses surrogate pairs and is somewhat slower than BMP-only matching. Enabling astral code point matching on a per-regex basis can therefore be useful.</li>
203 |       <li><strong>Compatibility:</strong> No known problems; the <code>A</code> flag is illegal in native JavaScript regular expressions.</li>
204 |       <li><strong>Prior art:</strong> None.</li>
205 |     </ul>
206 | 
207 | 
208 | 
209 | 
210 | 
211 |   </div>
212 | </div>
213 | <div id="footer">
214 |   <p>&copy; <a href="https://slev.life/">Steven Levithan</a> :: <a href="https://github.com/slevithan/xregexp">GitHub</a> :: <a href="https://xregexp.com/">XRegExp.com</a></p>
215 | </div>
216 | </body>
217 | </html>
218 | 


--------------------------------------------------------------------------------
/docs/syntax/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 |   <meta charset="utf-8" />
  5 |   <title>New syntax :: XRegExp</title>
  6 |   <link href="../assets/index.css" rel="stylesheet" type="text/css" />
  7 | </head>
  8 | <body>
  9 | <div id="header">
 10 |   <h1><a href="../index.html"><span id="logoX">X</span>RegExp</a></h1>
 11 |   <h1 class="subtitle">The one of a kind JavaScript regular expression library</h1>
 12 | </div>
 13 | <div id="body">
 14 |   <div id="navBar">
 15 |     <div class="menu">
 16 |       <ul>
 17 |         <li><a href="../index.html">Home</a></li>
 18 |         <li><a href="../api/index.html">API</a></li>
 19 |         <li><a href="../syntax/index.html" class="selected">New syntax</a></li>
 20 |         <li><a href="../flags/index.html">New flags</a></li>
 21 |         <li><a href="../unicode/index.html">Unicode</a></li>
 22 |       </ul>
 23 |     </div>
 24 |   </div>
 25 |   <div id="main">
 26 | 
 27 | 
 28 | 
 29 | 
 30 | 
 31 |     <div id="tocContainer">
 32 |       <div id="toc">
 33 |         <h2>Table of contents</h2>
 34 |         <ul>
 35 |           <li><a href="#namedCapture">Named capture</a></li>
 36 |           <li><a href="#inlineComments">Inline comments</a></li>
 37 |           <li><a href="#modeModifier">Leading mode modifier</a></li>
 38 |           <li><a href="#strictErrors">Stricter error handling</a></li>
 39 |           <li><a href="#unicode">Unicode</a></li>
 40 |           <li><a href="#replacementText">Replacement text</a></li>
 41 |         </ul>
 42 |       </div>
 43 |     </div>
 44 | 
 45 |     <h1>New syntax</h1>
 46 | 
 47 |     <h2 id="namedCapture">Named capture</h2>
 48 | 
 49 |     <p>XRegExp includes comprehensive support for named capture. Following are the details of XRegExp's named capture syntax:</p>
 50 | 
 51 |     <ul>
 52 |       <li>Capture: <code>(?&lt;<em>name</em>>&hellip;)</code></li>
 53 |       <li>Backreference in regex: <code>\k&lt;<em>name</em>></code></li>
 54 |       <li>Backreference in replacement text: <code>$&lt;<em>name</em>&gt;</code></li>
 55 |       <li>Backreference stored at: <code><em>result</em>.groups.<em>name</em></code></li>
 56 |       <li>Backreference numbering: Sequential (i.e., left to right for both named and unnamed capturing groups)</li>
 57 |       <li>Multiple groups with same name: <code>SyntaxError</code></li>
 58 |     </ul>
 59 | 
 60 |     <h3>Notes</h3>
 61 |     <ul>
 62 |       <li>See additional details and compare to named capture in other regex flavors here: <mark><a href="./named_capture_comparison/index.html">Named capture comparison</a></mark>.</li>
 63 |       <li>JavaScript added native support for named capture in ES2018. XRegExp support predates this, and it extends this support into pre-ES2018 browsers.</li>
 64 |       <li>Capture names can use a wide range of Unicode characters (see the definition of <a href="https://tc39.es/proposal-regexp-named-groups/#prod-RegExpIdentifierName"><code>RegExpIdentifierName</code></a>).</li>
 65 |     </ul>
 66 | 
 67 |     <h3>Example</h3>
 68 | <pre class="sh_javascript">const repeatedWords = XRegExp.tag('gi')`\b(?&lt;word>[a-z]+)\s+\k&lt;word>\b`;
 69 | // Alternatively: XRegExp('\\b(?&lt;word>[a-z]+)\\s+\\k&lt;word>\\b', 'gi');
 70 | 
 71 | // Check for repeated words
 72 | repeatedWords.test('The the test data');
 73 | // -> true
 74 | 
 75 | // Remove any repeated words
 76 | const withoutRepeated = XRegExp.replace('The the test data', repeatedWords, '${word}');
 77 | // -> 'The test data'
 78 | 
 79 | const url = XRegExp(`^(?&lt;scheme> [^:/?]+ ) ://   # aka protocol
 80 |                       (?&lt;host>   [^/?]+  )       # domain name/IP
 81 |                       (?&lt;path>   [^?]*   ) \\??  # optional path
 82 |                       (?&lt;query>  .*      )       # optional query`, 'x');
 83 | 
 84 | // Get the URL parts
 85 | const parts = XRegExp.exec('https://google.com/path/to/file?q=1', url);
 86 | // parts -> ['https://google.com/path/to/file?q=1', 'https', 'google.com', '/path/to/file', 'q=1']
 87 | // parts.groups.scheme -> 'https'
 88 | // parts.groups.host   -> 'google.com'
 89 | // parts.groups.path   -> '/path/to/file'
 90 | // parts.groups.query  -> 'q=1'
 91 | 
 92 | // Named backreferences are available in replacement functions as properties of the last argument
 93 | XRegExp.replace('https://google.com/path/to/file?q=1', url, (match, ...args) => {
 94 |   const groups = args.pop();
 95 |   return match.replace(groups.host, 'xregexp.com');
 96 | });
 97 | // -> 'https://xregexp.com/path/to/file?q=1'
 98 | </pre>
 99 | 
100 |     <p>Regexes that use named capture work with all native methods. However, you need to use <code><a href="../api/index.html#exec">XRegExp.exec</a></code> and <code><a href="../api/index.html#replace">XRegExp.replace</a></code> for access to named backreferences, otherwise only numbered backreferences are available.</p>
101 | 
102 |     <h3>Annotations</h3>
103 |     <ul>
104 |       <li><strong>Rationale:</strong> Named capture can help make regular expressions and related code self-documenting, and thereby easier to read and use.</li>
105 |       <li><strong>Compatibility:</strong> The named capture syntax is illegal in pre-ES2018 native JavaScript regular expressions and hence does not cause problems. Backreferences to undefined named groups throw a <code>SyntaxError</code>.</li>
106 |       <li><strong>Compatibility with deprecated features:</strong> XRegExp's named capture functionality does not support the <code>lastMatch</code> property of the global <code>RegExp</code> object or the <code>RegExp.prototype.compile</code> method, since those features were <a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Deprecated_and_obsolete_features">deprecated</a> in JavaScript 1.5.</li>
107 |       <li><strong>Prior art:</strong> Comes from Python (feature) and .NET (syntax).</li>
108 |     </ul>
109 | 
110 | 
111 |     <h2 id="inlineComments">Inline comments</h2>
112 | 
113 |     <p>Inline comments use the syntax <code>(?#<em>comment</em>)</code>. They are an alternative to the line comments allowed in <a href="../flags/index.html#extended">free-spacing mode</a>.</p>
114 | 
115 |     <p>Comments are a do-nothing (rather than ignore-me) metasequence. This distinction is important with something like <code>\1(?#comment)2</code>, which is taken as <code>\1</code> followed by <code>2</code>, and not <code>\12</code>. However, quantifiers following comments apply to the preceeding token, so <code>x(?#comment)+</code> is equivalent to <code>x+</code>.</p>
116 | 
117 |     <h3 style="margin-top:20px;">Example</h3>
118 | <pre class="sh_javascript">const regex = XRegExp('^(?#month)\\d{1,2}/(?#day)\\d{1,2}/(?#year)(\\d{2}){1,2}', 'n');
119 | const isDate = regex.test('04/20/2008'); // -> true
120 | 
121 | // Can still be useful when combined with free-spacing, because inline comments
122 | // don't need to end with \n
123 | const regex = XRegExp('^ \\d{1,2}      (?#month)' +
124 |                       '/ \\d{1,2}      (?#day  )' +
125 |                       '/ (\\d{2}){1,2} (?#year )', 'nx');
126 | </pre>
127 | 
128 |     <h3>Annotations</h3>
129 |     <ul>
130 |       <li><strong>Rationale:</strong> Comments make regular expressions more readable.</li>
131 |       <li><strong>Compatibility:</strong> No known problems with this syntax; it is illegal in native JavaScript regular expressions.</li>
132 |       <li><strong>Prior art:</strong> The syntax comes from Perl. It is also available in .NET, PCRE, Python, Ruby, and Tcl, among other regular expression flavors.</li>
133 |     </ul>
134 | 
135 | 
136 |     <h2 id="modeModifier">Leading mode modifier</h2>
137 | 
138 |     <p>A mode modifier uses the syntax <code>(?<em>imnsuxA</em>)</code>, where <code><em>imnsuxA</em></code> is any combination of XRegExp flags except <code>g</code>, <code>y</code>, or <code>d</code>. Mode modifiers provide an alternate way to enable the specified flags. XRegExp allows the use of a single mode modifier at the very beginning of a pattern only.</p>
139 | 
140 |     <h3 style="margin-top:20px;">Example</h3>
141 | <pre class="sh_javascript">const regex = XRegExp('(?im)^[a-z]+$');
142 | regex.ignoreCase; // -> true
143 | regex.multiline; // -> true
144 | </pre>
145 | 
146 |     <p>When creating a regex, it's okay to include flags in a mode modifier that are also provided via the separate <code>flags</code> argument. For instance, <code>XRegExp('(?s).+', 's')</code> is valid.</p>
147 | 
148 |     <p>Flags <code>g</code>, <code>y</code>, and <code>d</code> cannot be included in a mode modifier, or an error is thrown. This is because <code>g</code>, <code>y</code>, and <code>d</code>, unlike all other flags, have no impact on the meaning of a regex. Rather, they change how particular methods choose to apply the regex. XRegExp methods provide e.g. <code>scope</code>, <code>sticky</code>, and <code>pos</code> arguments that allow you to use and change such functionality on a per-run rather than per-regex basis. Additionally, consider that it makes sense to apply all other flags to a particular subsection of a regex, whereas flags <code>g</code>, <code>y</code>, and <code>d</code> only make sense when applied to the regex as a whole. Allowing <code>g</code>, <code>y</code>, and <code>d</code> in a mode modifier might therefore create future compatibility problems.</p>
149 | 
150 |     <p>The use of unknown flags in a mode modifier causes an error to be thrown. However, XRegExp addons can add new flags that are then automatically valid within mode modifiers.</p>
151 | 
152 |     <h3>Annotations</h3>
153 |     <ul>
154 |       <li><strong>Rationale:</strong> Mode modifiers allow you to enable flags in situations where a regex pattern can be provided as a string only. They can also improve readability, since flags are read first rather than after the pattern.</li>
155 |       <li><strong>Compatibility:</strong> No known problems with this syntax; it is illegal in native JavaScript regular expressions.</li>
156 |       <li><strong>Compatibility with other regex flavors:</strong> Some regex flavors support the use of multiple mode modifiers anywhere in a pattern, and allow extended syntax for unsetting flags via <code>(?-i)</code>, simultaneously setting and unsetting flags via <code>(?i-m)</code>, and enabling flags for subpatterns only via <code>(?i:&hellip;)</code>. XRegExp does not support these extended options.</li>
157 |       <li><strong>Prior art:</strong> The syntax comes from Perl. It is also available in .NET, Java, PCRE, Python, Ruby, and Tcl, among other regular expression flavors.</li>
158 |     </ul>
159 | 
160 | 
161 |     <h2 id="strictErrors">Stricter error handling</h2>
162 | 
163 |     <p>XRegExp makes any escaped letters or numbers a <code>SyntaxError</code> unless they form a valid and complete metasequence or backreference. This helps to catch errors early, and makes it safe for future versions of ES or XRegExp to introduce new escape sequences. It also means that octal escapes are always an error in XRegExp. ES3/5 do not allow octal escapes, but browsers support them anyway for backward compatibility, which often leads to unintended behavior.</p>
164 | 
165 |     <p>XRegExp requires all backreferences, whether written as <code>\<em>n</em></code>, <code>\k&lt;<em>n</em>></code>, or <code>\k&lt;<em>name</em>></code>, to appear to the right of the opening parenthesis of the group they reference.</p>
166 | 
167 |     <p>XRegExp never allows <code>\<em>n</em></code>-style backreferences to be followed by literal numbers. To match backreference 1 followed by a literal <code>2</code> character, you can use, e.g., <code>(a)\k&lt;1>2</code>, <code>(?x)(a)\1 2</code>, or <code>(a)\1(?#)2</code>.</p>
168 | 
169 | 
170 |     <h2 id="unicode">Unicode</h2>
171 | 
172 |     <p>XRegExp supports matching Unicode categories, scripts, and other properties via addon scripts. Such tokens are matched using <code>\p{&hellip;}</code>, <code>\P{&hellip;}</code>, and <code>\p{^&hellip;}</code>. See <a href="../unicode/index.html">XRegExp Unicode addons</a> for more details.</p>
173 | 
174 |     <p>XRegExp additionally supports the <code>\u{N&hellip;}</code> syntax for matching individual code points. In ES6 this is supported natively, but only when using the <code>u</code> flag. XRegExp supports this syntax for code points <code>0</code>&ndash;<code>FFFF</code> even when not using the <code>u</code> flag, and it supports the complete Unicode range <code>0</code>&ndash;<code>10FFFF</code> when using <code>u</code>.</p>
175 | 
176 | 
177 |     <h2 id="replacementText">Replacement text</h2>
178 | 
179 |     <p>XRegExp's replacement text syntax is used by the <code><a href="../api/index.html#replace">XRegExp.replace</a></code> function. It adds <code>$0</code> as a synonym of <code>$&amp;</code> (to refer to the entire match), and adds <code>$&lt;<em>n</em>&gt;</code> and <code>${<em>n</em>}</code> for backreferences to named and numbered capturing groups (in addition to <code>$1</code>, etc.). When the braces syntax is used for numbered backreferences, it allows numbers with three or more digits (not possible natively) and allows separating a backreference from an immediately-following digit (not always possible natively). XRegExp uses stricter replacement text error handling than native JavaScript, to help you catch errors earlier (e.g., the use of a <code>$</code> character that isn't part of a valid metasequence causes an error to be thrown).</p>
180 | 
181 |     <p>Following are the special tokens that can be used in XRegExp replacement strings:</p>
182 | 
183 |     <ul>
184 |       <li><code>$$</code> - Inserts a literal <code>$</code> character.</li>
185 |       <li><code>$&amp;</code>, <code>$0</code> - Inserts the matched substring.</li>
186 |       <li><code>$`</code> - Inserts the string that precedes the matched substring (left context).</li>
187 |       <li><code>$'</code> - Inserts the string that follows the matched substring (right context).</li>
188 |       <li><code>$n</code>, <code>$nn</code> - Where n/nn are digits referencing an existing capturing group, inserts
189 |         backreference n/nn.</li>
190 |       <li><code>$&lt;n&gt;</code>, <code>${n}</code> - Where n is a name or any number of digits that reference an existent capturing
191 |         group, inserts backreference n.</li>
192 |     </ul>
193 | 
194 |     <p>XRegExp behavior for <code>$&lt;n&gt;</code> and <code>${n}</code>:</p>
195 | 
196 |     <ul>
197 |       <li>Backreference to numbered capture, if <code>n</code> is an integer. Use <code>0</code> for the entire match. Any number of leading zeros may be used.</li>
198 |       <li>Backreference to named capture <code>n</code>, if it exists. Does not overlap with numbered capture since XRegExp does not allow named capture to use a bare integer as the name.</li>
199 |       <li>If the name or number does not refer to an existing capturing group, it's an error.</li>
200 |     </ul>
201 | 
202 |     <p>XRegExp behavior for <code>$n</code> and <code>$nn</code>:</p>
203 | 
204 |     <ul>
205 |       <li>Backreferences without curly braces end after 1 or 2 digits. Use <code>${&hellip;}</code> for more digits.</li>
206 |       <li><code>$1</code> is an error if there are no capturing groups.</li>
207 |       <li><code>$10</code> is an error if there are less than 10 capturing groups. Use <code>${1}0</code> instead.</li>
208 |       <li><code>$01</code> is equivalent to <code>$1</code> if a capturing group exists, otherwise it's an error.</li>
209 |       <li><code>$0</code> (not followed by 1-9) and <code>$00</code> are the entire match.</li>
210 |     </ul>
211 | 
212 |     <p>For comparison, following is JavaScript's native behavior for <code>$n</code> and <code>$nn</code>:</p>
213 | 
214 |     <ul>
215 |       <li>Backreferences end after 1 or 2 digits. Cannot use backreference to capturing group 100+.</li>
216 |       <li><code>$1</code> is a literal <code>$1</code> if there are no capturing groups.</li>
217 |       <li><code>$10</code> is <code>$1</code> followed by a literal <code>0</code> if there are less than 10 capturing groups.</li>
218 |       <li><code>$01</code> is equivalent to <code>$1</code> if a capturing group exists, otherwise it's a literal <code>$01</code>.</li>
219 |       <li><code>$0</code> is a literal <code>$0</code>.</li>
220 |     </ul>
221 | 
222 | 
223 | 
224 | 
225 | 
226 |   </div>
227 | </div>
228 | <div id="footer">
229 |   <p>&copy; <a href="https://slev.life/">Steven Levithan</a> :: <a href="https://github.com/slevithan/xregexp">GitHub</a> :: <a href="https://xregexp.com/">XRegExp.com</a></p>
230 | </div>
231 | </body>
232 | </html>
233 | 


--------------------------------------------------------------------------------
/tools/output/properties.js:
--------------------------------------------------------------------------------
 1 | module.exports = [
 2 |     {
 3 |         'name': 'ASCII',
 4 |         'bmp': '\0-\x7F'
 5 |     },
 6 |     {
 7 |         'name': 'Alphabetic',
 8 |         'bmp': 'A-Za-z\xAA\xB5\xBA\xC0-\xD6\xD8-\xF6\xF8-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0345\u0370-\u0374\u0376\u0377\u037A-\u037D\u037F\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03F5\u03F7-\u0481\u048A-\u052F\u0531-\u0556\u0559\u0560-\u0588\u05B0-\u05BD\u05BF\u05C1\u05C2\u05C4\u05C5\u05C7\u05D0-\u05EA\u05EF-\u05F2\u0610-\u061A\u0620-\u0657\u0659-\u065F\u066E-\u06D3\u06D5-\u06DC\u06E1-\u06E8\u06ED-\u06EF\u06FA-\u06FC\u06FF\u0710-\u073F\u074D-\u07B1\u07CA-\u07EA\u07F4\u07F5\u07FA\u0800-\u0817\u081A-\u082C\u0840-\u0858\u0860-\u086A\u0870-\u0887\u0889-\u088E\u08A0-\u08C9\u08D4-\u08DF\u08E3-\u08E9\u08F0-\u093B\u093D-\u094C\u094E-\u0950\u0955-\u0963\u0971-\u0983\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BD-\u09C4\u09C7\u09C8\u09CB\u09CC\u09CE\u09D7\u09DC\u09DD\u09DF-\u09E3\u09F0\u09F1\u09FC\u0A01-\u0A03\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A3E-\u0A42\u0A47\u0A48\u0A4B\u0A4C\u0A51\u0A59-\u0A5C\u0A5E\u0A70-\u0A75\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABD-\u0AC5\u0AC7-\u0AC9\u0ACB\u0ACC\u0AD0\u0AE0-\u0AE3\u0AF9-\u0AFC\u0B01-\u0B03\u0B05-\u0B0C\u0B0F\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32\u0B33\u0B35-\u0B39\u0B3D-\u0B44\u0B47\u0B48\u0B4B\u0B4C\u0B56\u0B57\u0B5C\u0B5D\u0B5F-\u0B63\u0B71\u0B82\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCC\u0BD0\u0BD7\u0C00-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3D-\u0C44\u0C46-\u0C48\u0C4A-\u0C4C\u0C55\u0C56\u0C58-\u0C5A\u0C5D\u0C60-\u0C63\u0C80-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBD-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCC\u0CD5\u0CD6\u0CDD\u0CDE\u0CE0-\u0CE3\u0CF1\u0CF2\u0D00-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D-\u0D44\u0D46-\u0D48\u0D4A-\u0D4C\u0D4E\u0D54-\u0D57\u0D5F-\u0D63\u0D7A-\u0D7F\u0D81-\u0D83\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DF2\u0DF3\u0E01-\u0E3A\u0E40-\u0E46\u0E4D\u0E81\u0E82\u0E84\u0E86-\u0E8A\u0E8C-\u0EA3\u0EA5\u0EA7-\u0EB9\u0EBB-\u0EBD\u0EC0-\u0EC4\u0EC6\u0ECD\u0EDC-\u0EDF\u0F00\u0F40-\u0F47\u0F49-\u0F6C\u0F71-\u0F81\u0F88-\u0F97\u0F99-\u0FBC\u1000-\u1036\u1038\u103B-\u103F\u1050-\u108F\u109A-\u109D\u10A0-\u10C5\u10C7\u10CD\u10D0-\u10FA\u10FC-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u1380-\u138F\u13A0-\u13F5\u13F8-\u13FD\u1401-\u166C\u166F-\u167F\u1681-\u169A\u16A0-\u16EA\u16EE-\u16F8\u1700-\u1713\u171F-\u1733\u1740-\u1753\u1760-\u176C\u176E-\u1770\u1772\u1773\u1780-\u17B3\u17B6-\u17C8\u17D7\u17DC\u1820-\u1878\u1880-\u18AA\u18B0-\u18F5\u1900-\u191E\u1920-\u192B\u1930-\u1938\u1950-\u196D\u1970-\u1974\u1980-\u19AB\u19B0-\u19C9\u1A00-\u1A1B\u1A20-\u1A5E\u1A61-\u1A74\u1AA7\u1ABF\u1AC0\u1ACC-\u1ACE\u1B00-\u1B33\u1B35-\u1B43\u1B45-\u1B4C\u1B80-\u1BA9\u1BAC-\u1BAF\u1BBA-\u1BE5\u1BE7-\u1BF1\u1C00-\u1C36\u1C4D-\u1C4F\u1C5A-\u1C7D\u1C80-\u1C88\u1C90-\u1CBA\u1CBD-\u1CBF\u1CE9-\u1CEC\u1CEE-\u1CF3\u1CF5\u1CF6\u1CFA\u1D00-\u1DBF\u1DE7-\u1DF4\u1E00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2071\u207F\u2090-\u209C\u2102\u2107\u210A-\u2113\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u212F-\u2139\u213C-\u213F\u2145-\u2149\u214E\u2160-\u2188\u24B6-\u24E9\u2C00-\u2CE4\u2CEB-\u2CEE\u2CF2\u2CF3\u2D00-\u2D25\u2D27\u2D2D\u2D30-\u2D67\u2D6F\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\u2DE0-\u2DFF\u2E2F\u3005-\u3007\u3021-\u3029\u3031-\u3035\u3038-\u303C\u3041-\u3096\u309D-\u309F\u30A1-\u30FA\u30FC-\u30FF\u3105-\u312F\u3131-\u318E\u31A0-\u31BF\u31F0-\u31FF\u3400-\u4DBF\u4E00-\uA48C\uA4D0-\uA4FD\uA500-\uA60C\uA610-\uA61F\uA62A\uA62B\uA640-\uA66E\uA674-\uA67B\uA67F-\uA6EF\uA717-\uA71F\uA722-\uA788\uA78B-\uA7CA\uA7D0\uA7D1\uA7D3\uA7D5-\uA7D9\uA7F2-\uA805\uA807-\uA827\uA840-\uA873\uA880-\uA8C3\uA8C5\uA8F2-\uA8F7\uA8FB\uA8FD-\uA8FF\uA90A-\uA92A\uA930-\uA952\uA960-\uA97C\uA980-\uA9B2\uA9B4-\uA9BF\uA9CF\uA9E0-\uA9EF\uA9FA-\uA9FE\uAA00-\uAA36\uAA40-\uAA4D\uAA60-\uAA76\uAA7A-\uAABE\uAAC0\uAAC2\uAADB-\uAADD\uAAE0-\uAAEF\uAAF2-\uAAF5\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E\uAB30-\uAB5A\uAB5C-\uAB69\uAB70-\uABEA\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uF900-\uFA6D\uFA70-\uFAD9\uFB00-\uFB06\uFB13-\uFB17\uFB1D-\uFB28\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE70-\uFE74\uFE76-\uFEFC\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC',
 9 |         'astral': '\uD800[\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1F\uDF2D-\uDF4A\uDF50-\uDF7A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5]|\uD801[\uDC00-\uDC9D\uDCB0-\uDCD3\uDCD8-\uDCFB\uDD00-\uDD27\uDD30-\uDD63\uDD70-\uDD7A\uDD7C-\uDD8A\uDD8C-\uDD92\uDD94\uDD95\uDD97-\uDDA1\uDDA3-\uDDB1\uDDB3-\uDDB9\uDDBB\uDDBC\uDE00-\uDF36\uDF40-\uDF55\uDF60-\uDF67\uDF80-\uDF85\uDF87-\uDFB0\uDFB2-\uDFBA]|\uD802[\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDC60-\uDC76\uDC80-\uDC9E\uDCE0-\uDCF2\uDCF4\uDCF5\uDD00-\uDD15\uDD20-\uDD39\uDD80-\uDDB7\uDDBE\uDDBF\uDE00-\uDE03\uDE05\uDE06\uDE0C-\uDE13\uDE15-\uDE17\uDE19-\uDE35\uDE60-\uDE7C\uDE80-\uDE9C\uDEC0-\uDEC7\uDEC9-\uDEE4\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72\uDF80-\uDF91]|\uD803[\uDC00-\uDC48\uDC80-\uDCB2\uDCC0-\uDCF2\uDD00-\uDD27\uDE80-\uDEA9\uDEAB\uDEAC\uDEB0\uDEB1\uDF00-\uDF1C\uDF27\uDF30-\uDF45\uDF70-\uDF81\uDFB0-\uDFC4\uDFE0-\uDFF6]|\uD804[\uDC00-\uDC45\uDC71-\uDC75\uDC82-\uDCB8\uDCC2\uDCD0-\uDCE8\uDD00-\uDD32\uDD44-\uDD47\uDD50-\uDD72\uDD76\uDD80-\uDDBF\uDDC1-\uDDC4\uDDCE\uDDCF\uDDDA\uDDDC\uDE00-\uDE11\uDE13-\uDE34\uDE37\uDE3E\uDE80-\uDE86\uDE88\uDE8A-\uDE8D\uDE8F-\uDE9D\uDE9F-\uDEA8\uDEB0-\uDEE8\uDF00-\uDF03\uDF05-\uDF0C\uDF0F\uDF10\uDF13-\uDF28\uDF2A-\uDF30\uDF32\uDF33\uDF35-\uDF39\uDF3D-\uDF44\uDF47\uDF48\uDF4B\uDF4C\uDF50\uDF57\uDF5D-\uDF63]|\uD805[\uDC00-\uDC41\uDC43-\uDC45\uDC47-\uDC4A\uDC5F-\uDC61\uDC80-\uDCC1\uDCC4\uDCC5\uDCC7\uDD80-\uDDB5\uDDB8-\uDDBE\uDDD8-\uDDDD\uDE00-\uDE3E\uDE40\uDE44\uDE80-\uDEB5\uDEB8\uDF00-\uDF1A\uDF1D-\uDF2A\uDF40-\uDF46]|\uD806[\uDC00-\uDC38\uDCA0-\uDCDF\uDCFF-\uDD06\uDD09\uDD0C-\uDD13\uDD15\uDD16\uDD18-\uDD35\uDD37\uDD38\uDD3B\uDD3C\uDD3F-\uDD42\uDDA0-\uDDA7\uDDAA-\uDDD7\uDDDA-\uDDDF\uDDE1\uDDE3\uDDE4\uDE00-\uDE32\uDE35-\uDE3E\uDE50-\uDE97\uDE9D\uDEB0-\uDEF8]|\uD807[\uDC00-\uDC08\uDC0A-\uDC36\uDC38-\uDC3E\uDC40\uDC72-\uDC8F\uDC92-\uDCA7\uDCA9-\uDCB6\uDD00-\uDD06\uDD08\uDD09\uDD0B-\uDD36\uDD3A\uDD3C\uDD3D\uDD3F-\uDD41\uDD43\uDD46\uDD47\uDD60-\uDD65\uDD67\uDD68\uDD6A-\uDD8E\uDD90\uDD91\uDD93-\uDD96\uDD98\uDEE0-\uDEF6\uDFB0]|\uD808[\uDC00-\uDF99]|\uD809[\uDC00-\uDC6E\uDC80-\uDD43]|\uD80B[\uDF90-\uDFF0]|[\uD80C\uD81C-\uD820\uD822\uD840-\uD868\uD86A-\uD86C\uD86F-\uD872\uD874-\uD879\uD880-\uD883][\uDC00-\uDFFF]|\uD80D[\uDC00-\uDC2E]|\uD811[\uDC00-\uDE46]|\uD81A[\uDC00-\uDE38\uDE40-\uDE5E\uDE70-\uDEBE\uDED0-\uDEED\uDF00-\uDF2F\uDF40-\uDF43\uDF63-\uDF77\uDF7D-\uDF8F]|\uD81B[\uDE40-\uDE7F\uDF00-\uDF4A\uDF4F-\uDF87\uDF8F-\uDF9F\uDFE0\uDFE1\uDFE3\uDFF0\uDFF1]|\uD821[\uDC00-\uDFF7]|\uD823[\uDC00-\uDCD5\uDD00-\uDD08]|\uD82B[\uDFF0-\uDFF3\uDFF5-\uDFFB\uDFFD\uDFFE]|\uD82C[\uDC00-\uDD22\uDD50-\uDD52\uDD64-\uDD67\uDD70-\uDEFB]|\uD82F[\uDC00-\uDC6A\uDC70-\uDC7C\uDC80-\uDC88\uDC90-\uDC99\uDC9E]|\uD835[\uDC00-\uDC54\uDC56-\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD1E-\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD52-\uDEA5\uDEA8-\uDEC0\uDEC2-\uDEDA\uDEDC-\uDEFA\uDEFC-\uDF14\uDF16-\uDF34\uDF36-\uDF4E\uDF50-\uDF6E\uDF70-\uDF88\uDF8A-\uDFA8\uDFAA-\uDFC2\uDFC4-\uDFCB]|\uD837[\uDF00-\uDF1E]|\uD838[\uDC00-\uDC06\uDC08-\uDC18\uDC1B-\uDC21\uDC23\uDC24\uDC26-\uDC2A\uDD00-\uDD2C\uDD37-\uDD3D\uDD4E\uDE90-\uDEAD\uDEC0-\uDEEB]|\uD839[\uDFE0-\uDFE6\uDFE8-\uDFEB\uDFED\uDFEE\uDFF0-\uDFFE]|\uD83A[\uDC00-\uDCC4\uDD00-\uDD43\uDD47\uDD4B]|\uD83B[\uDE00-\uDE03\uDE05-\uDE1F\uDE21\uDE22\uDE24\uDE27\uDE29-\uDE32\uDE34-\uDE37\uDE39\uDE3B\uDE42\uDE47\uDE49\uDE4B\uDE4D-\uDE4F\uDE51\uDE52\uDE54\uDE57\uDE59\uDE5B\uDE5D\uDE5F\uDE61\uDE62\uDE64\uDE67-\uDE6A\uDE6C-\uDE72\uDE74-\uDE77\uDE79-\uDE7C\uDE7E\uDE80-\uDE89\uDE8B-\uDE9B\uDEA1-\uDEA3\uDEA5-\uDEA9\uDEAB-\uDEBB]|\uD83C[\uDD30-\uDD49\uDD50-\uDD69\uDD70-\uDD89]|\uD869[\uDC00-\uDEDF\uDF00-\uDFFF]|\uD86D[\uDC00-\uDF38\uDF40-\uDFFF]|\uD86E[\uDC00-\uDC1D\uDC20-\uDFFF]|\uD873[\uDC00-\uDEA1\uDEB0-\uDFFF]|\uD87A[\uDC00-\uDFE0]|\uD87E[\uDC00-\uDE1D]|\uD884[\uDC00-\uDF4A]'
10 |     },
11 |     {
12 |         'name': 'Any',
13 |         'isBmpLast': true,
14 |         'bmp': '\0-\uFFFF',
15 |         'astral': '[\uD800-\uDBFF][\uDC00-\uDFFF]'
16 |     },
17 |     {
18 |         'name': 'Default_Ignorable_Code_Point',
19 |         'bmp': '\xAD\u034F\u061C\u115F\u1160\u17B4\u17B5\u180B-\u180F\u200B-\u200F\u202A-\u202E\u2060-\u206F\u3164\uFE00-\uFE0F\uFEFF\uFFA0\uFFF0-\uFFF8',
20 |         'astral': '\uD82F[\uDCA0-\uDCA3]|\uD834[\uDD73-\uDD7A]|[\uDB40-\uDB43][\uDC00-\uDFFF]'
21 |     },
22 |     {
23 |         'name': 'Lowercase',
24 |         'bmp': 'a-z\xAA\xB5\xBA\xDF-\xF6\xF8-\xFF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E-\u0180\u0183\u0185\u0188\u018C\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F\u0240\u0242\u0247\u0249\u024B\u024D\u024F-\u0293\u0295-\u02B8\u02C0\u02C1\u02E0-\u02E4\u0345\u0371\u0373\u0377\u037A-\u037D\u0390\u03AC-\u03CE\u03D0\u03D1\u03D5-\u03D7\u03D9\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F3\u03F5\u03F8\u03FB\u03FC\u0430-\u045F\u0461\u0463\u0465\u0467\u0469\u046B\u046D\u046F\u0471\u0473\u0475\u0477\u0479\u047B\u047D\u047F\u0481\u048B\u048D\u048F\u0491\u0493\u0495\u0497\u0499\u049B\u049D\u049F\u04A1\u04A3\u04A5\u04A7\u04A9\u04AB\u04AD\u04AF\u04B1\u04B3\u04B5\u04B7\u04B9\u04BB\u04BD\u04BF\u04C2\u04C4\u04C6\u04C8\u04CA\u04CC\u04CE\u04CF\u04D1\u04D3\u04D5\u04D7\u04D9\u04DB\u04DD\u04DF\u04E1\u04E3\u04E5\u04E7\u04E9\u04EB\u04ED\u04EF\u04F1\u04F3\u04F5\u04F7\u04F9\u04FB\u04FD\u04FF\u0501\u0503\u0505\u0507\u0509\u050B\u050D\u050F\u0511\u0513\u0515\u0517\u0519\u051B\u051D\u051F\u0521\u0523\u0525\u0527\u0529\u052B\u052D\u052F\u0560-\u0588\u10D0-\u10FA\u10FD-\u10FF\u13F8-\u13FD\u1C80-\u1C88\u1D00-\u1DBF\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFF-\u1F07\u1F10-\u1F15\u1F20-\u1F27\u1F30-\u1F37\u1F40-\u1F45\u1F50-\u1F57\u1F60-\u1F67\u1F70-\u1F7D\u1F80-\u1F87\u1F90-\u1F97\u1FA0-\u1FA7\u1FB0-\u1FB4\u1FB6\u1FB7\u1FBE\u1FC2-\u1FC4\u1FC6\u1FC7\u1FD0-\u1FD3\u1FD6\u1FD7\u1FE0-\u1FE7\u1FF2-\u1FF4\u1FF6\u1FF7\u2071\u207F\u2090-\u209C\u210A\u210E\u210F\u2113\u212F\u2134\u2139\u213C\u213D\u2146-\u2149\u214E\u2170-\u217F\u2184\u24D0-\u24E9\u2C30-\u2C5F\u2C61\u2C65\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73\u2C74\u2C76-\u2C7D\u2C81\u2C83\u2C85\u2C87\u2C89\u2C8B\u2C8D\u2C8F\u2C91\u2C93\u2C95\u2C97\u2C99\u2C9B\u2C9D\u2C9F\u2CA1\u2CA3\u2CA5\u2CA7\u2CA9\u2CAB\u2CAD\u2CAF\u2CB1\u2CB3\u2CB5\u2CB7\u2CB9\u2CBB\u2CBD\u2CBF\u2CC1\u2CC3\u2CC5\u2CC7\u2CC9\u2CCB\u2CCD\u2CCF\u2CD1\u2CD3\u2CD5\u2CD7\u2CD9\u2CDB\u2CDD\u2CDF\u2CE1\u2CE3\u2CE4\u2CEC\u2CEE\u2CF3\u2D00-\u2D25\u2D27\u2D2D\uA641\uA643\uA645\uA647\uA649\uA64B\uA64D\uA64F\uA651\uA653\uA655\uA657\uA659\uA65B\uA65D\uA65F\uA661\uA663\uA665\uA667\uA669\uA66B\uA66D\uA681\uA683\uA685\uA687\uA689\uA68B\uA68D\uA68F\uA691\uA693\uA695\uA697\uA699\uA69B-\uA69D\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7BB\uA7BD\uA7BF\uA7C1\uA7C3\uA7C8\uA7CA\uA7D1\uA7D3\uA7D5\uA7D7\uA7D9\uA7F6\uA7F8-\uA7FA\uAB30-\uAB5A\uAB5C-\uAB68\uAB70-\uABBF\uFB00-\uFB06\uFB13-\uFB17\uFF41-\uFF5A',
25 |         'astral': '\uD801[\uDC28-\uDC4F\uDCD8-\uDCFB\uDD97-\uDDA1\uDDA3-\uDDB1\uDDB3-\uDDB9\uDDBB\uDDBC\uDF80\uDF83-\uDF85\uDF87-\uDFB0\uDFB2-\uDFBA]|\uD803[\uDCC0-\uDCF2]|\uD806[\uDCC0-\uDCDF]|\uD81B[\uDE60-\uDE7F]|\uD835[\uDC1A-\uDC33\uDC4E-\uDC54\uDC56-\uDC67\uDC82-\uDC9B\uDCB6-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDCCF\uDCEA-\uDD03\uDD1E-\uDD37\uDD52-\uDD6B\uDD86-\uDD9F\uDDBA-\uDDD3\uDDEE-\uDE07\uDE22-\uDE3B\uDE56-\uDE6F\uDE8A-\uDEA5\uDEC2-\uDEDA\uDEDC-\uDEE1\uDEFC-\uDF14\uDF16-\uDF1B\uDF36-\uDF4E\uDF50-\uDF55\uDF70-\uDF88\uDF8A-\uDF8F\uDFAA-\uDFC2\uDFC4-\uDFC9\uDFCB]|\uD837[\uDF00-\uDF09\uDF0B-\uDF1E]|\uD83A[\uDD22-\uDD43]'
26 |     },
27 |     {
28 |         'name': 'Noncharacter_Code_Point',
29 |         'bmp': '\uFDD0-\uFDEF\uFFFE\uFFFF',
30 |         'astral': '[\uD83F\uD87F\uD8BF\uD8FF\uD93F\uD97F\uD9BF\uD9FF\uDA3F\uDA7F\uDABF\uDAFF\uDB3F\uDB7F\uDBBF\uDBFF][\uDFFE\uDFFF]'
31 |     },
32 |     {
33 |         'name': 'Uppercase',
34 |         'bmp': 'A-Z\xC0-\xD6\xD8-\xDE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u0370\u0372\u0376\u037F\u0386\u0388-\u038A\u038C\u038E\u038F\u0391-\u03A1\u03A3-\u03AB\u03CF\u03D2-\u03D4\u03D8\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F4\u03F7\u03F9\u03FA\u03FD-\u042F\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476\u0478\u047A\u047C\u047E\u0480\u048A\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C0\u04C1\u04C3\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04D2\u04D4\u04D6\u04D8\u04DA\u04DC\u04DE\u04E0\u04E2\u04E4\u04E6\u04E8\u04EA\u04EC\u04EE\u04F0\u04F2\u04F4\u04F6\u04F8\u04FA\u04FC\u04FE\u0500\u0502\u0504\u0506\u0508\u050A\u050C\u050E\u0510\u0512\u0514\u0516\u0518\u051A\u051C\u051E\u0520\u0522\u0524\u0526\u0528\u052A\u052C\u052E\u0531-\u0556\u10A0-\u10C5\u10C7\u10CD\u13A0-\u13F5\u1C90-\u1CBA\u1CBD-\u1CBF\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFE\u1F08-\u1F0F\u1F18-\u1F1D\u1F28-\u1F2F\u1F38-\u1F3F\u1F48-\u1F4D\u1F59\u1F5B\u1F5D\u1F5F\u1F68-\u1F6F\u1FB8-\u1FBB\u1FC8-\u1FCB\u1FD8-\u1FDB\u1FE8-\u1FEC\u1FF8-\u1FFB\u2102\u2107\u210B-\u210D\u2110-\u2112\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u2130-\u2133\u213E\u213F\u2145\u2160-\u216F\u2183\u24B6-\u24CF\u2C00-\u2C2F\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E-\u2C80\u2C82\u2C84\u2C86\u2C88\u2C8A\u2C8C\u2C8E\u2C90\u2C92\u2C94\u2C96\u2C98\u2C9A\u2C9C\u2C9E\u2CA0\u2CA2\u2CA4\u2CA6\u2CA8\u2CAA\u2CAC\u2CAE\u2CB0\u2CB2\u2CB4\u2CB6\u2CB8\u2CBA\u2CBC\u2CBE\u2CC0\u2CC2\u2CC4\u2CC6\u2CC8\u2CCA\u2CCC\u2CCE\u2CD0\u2CD2\u2CD4\u2CD6\u2CD8\u2CDA\u2CDC\u2CDE\u2CE0\u2CE2\u2CEB\u2CED\u2CF2\uA640\uA642\uA644\uA646\uA648\uA64A\uA64C\uA64E\uA650\uA652\uA654\uA656\uA658\uA65A\uA65C\uA65E\uA660\uA662\uA664\uA666\uA668\uA66A\uA66C\uA680\uA682\uA684\uA686\uA688\uA68A\uA68C\uA68E\uA690\uA692\uA694\uA696\uA698\uA69A\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\uA7BA\uA7BC\uA7BE\uA7C0\uA7C2\uA7C4-\uA7C7\uA7C9\uA7D0\uA7D6\uA7D8\uA7F5\uFF21-\uFF3A',
35 |         'astral': '\uD801[\uDC00-\uDC27\uDCB0-\uDCD3\uDD70-\uDD7A\uDD7C-\uDD8A\uDD8C-\uDD92\uDD94\uDD95]|\uD803[\uDC80-\uDCB2]|\uD806[\uDCA0-\uDCBF]|\uD81B[\uDE40-\uDE5F]|\uD835[\uDC00-\uDC19\uDC34-\uDC4D\uDC68-\uDC81\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB5\uDCD0-\uDCE9\uDD04\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD38\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD6C-\uDD85\uDDA0-\uDDB9\uDDD4-\uDDED\uDE08-\uDE21\uDE3C-\uDE55\uDE70-\uDE89\uDEA8-\uDEC0\uDEE2-\uDEFA\uDF1C-\uDF34\uDF56-\uDF6E\uDF90-\uDFA8\uDFCA]|\uD83A[\uDD00-\uDD21]|\uD83C[\uDD30-\uDD49\uDD50-\uDD69\uDD70-\uDD89]'
36 |     },
37 |     {
38 |         'name': 'White_Space',
39 |         'bmp': '\t-\r \x85\xA0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000'
40 |     }
41 | ];
42 | 


--------------------------------------------------------------------------------