├── .eslintignore ├── .gitattributes ├── tests ├── perf │ ├── .eslintrc.js │ ├── index.html │ └── perf.js ├── vendor │ └── benchmark.js │ │ ├── nano.jar │ │ ├── LICENSE.txt │ │ └── README.md ├── helpers │ ├── .eslintrc.js │ ├── h.js │ ├── h-matchers.js │ └── h-unicode.js ├── jasmine.json ├── .eslintrc.js ├── index.html └── spec │ ├── s-addons-build.js │ └── s-addons-matchrecursive.js ├── types ├── tslint.json ├── tsconfig.json └── test.ts ├── .editorconfig ├── tools ├── scripts │ ├── .eslintrc.js │ ├── script-regex.js │ ├── property-regex.js │ ├── category-regex.js │ └── utils.js └── output │ └── properties.js ├── .github └── workflows │ └── build.yml ├── .gitignore ├── .babelrc ├── src ├── index.js └── addons │ ├── unicode-scripts.js │ ├── unicode-categories.js │ ├── unicode-properties.js │ ├── build.js │ ├── matchrecursive.js │ └── unicode-base.js ├── LICENSE ├── package.json ├── docs ├── assets │ └── index.css ├── unicode │ └── index.html ├── index.html ├── syntax │ ├── named_capture_comparison │ │ └── index.html │ └── index.html └── flags │ └── index.html ├── .eslintrc.js └── README.md /.eslintignore: -------------------------------------------------------------------------------- 1 | xregexp-all.js 2 | lib 3 | tests/perf/versions 4 | tests/vendor 5 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Automatically normalize line endings for all text-based files 2 | * text=auto eol=lf 3 | -------------------------------------------------------------------------------- /tests/perf/.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | "globals": { 3 | "Benchmark": true 4 | } 5 | }; 6 | -------------------------------------------------------------------------------- /tests/vendor/benchmark.js/nano.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slevithan/xregexp/HEAD/tests/vendor/benchmark.js/nano.jar -------------------------------------------------------------------------------- /types/tslint.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "dtslint/dtslint.json", 3 | "rules": { 4 | "no-inferrable-types": false 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /tests/helpers/.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | "rules": { 3 | "global-require": 0, 4 | "no-global-assign": 0, 5 | "no-native-reassign": 0 6 | } 7 | }; 8 | -------------------------------------------------------------------------------- /tests/jasmine.json: -------------------------------------------------------------------------------- 1 | { 2 | "spec_dir": "tests", 3 | "spec_files": [ 4 | "spec/**/*.js" 5 | ], 6 | "helpers": [ 7 | "helpers/**/*.js" 8 | ], 9 | "stopSpecOnExpectationFailure": false, 10 | "random": false 11 | } 12 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | indent_style = space 6 | indent_size = 4 7 | end_of_line = lf 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | max_line_length = 100 11 | 12 | [*.yml] 13 | indent_size = 2 14 | -------------------------------------------------------------------------------- /tools/scripts/.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | "rules": { 3 | "global-require": "off", 4 | "no-console": "off", 5 | "no-sync": "off", 6 | "func-style": [ 7 | "error", 8 | "declaration", 9 | {"allowArrowFunctions": true} 10 | ] 11 | } 12 | }; 13 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Node.js CI 2 | on: 3 | - push 4 | - pull_request 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v6 10 | - name: Use Node.js 11 | uses: actions/setup-node@v6 12 | with: 13 | node-version: 20 14 | - run: npm install 15 | - run: npm test 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | lib 2 | xregexp-all.js 3 | .nyc_output 4 | coverage 5 | 6 | # Compiled Python files 7 | *.pyc 8 | 9 | # Installed npm modules 10 | node_modules 11 | 12 | # Folder view configuration files 13 | .DS_Store 14 | Desktop.ini 15 | 16 | # Thumbnail cache files 17 | ._* 18 | Thumbs.db 19 | 20 | # Files that might appear on external disks 21 | .Spotlight-V100 22 | .Trashes 23 | -------------------------------------------------------------------------------- /types/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "lib": [ 5 | "es6" 6 | ], 7 | "noImplicitAny": true, 8 | "noImplicitThis": true, 9 | "strictNullChecks": true, 10 | "strictFunctionTypes": true, 11 | "noEmit": true, 12 | "forceConsistentCasingInFileNames": true, 13 | "baseUrl": ".", 14 | "types": [], 15 | "paths": { "xregexp": ["."] } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "plugins": [ 3 | ["@babel/plugin-transform-unicode-property-regex", { "useUnicodeFlag": false }], 4 | [ 5 | "@babel/plugin-transform-runtime", 6 | { 7 | "corejs": 3 8 | } 9 | ], 10 | "add-module-exports", 11 | "transform-xregexp", 12 | "array-includes" 13 | ], 14 | "presets": [ 15 | [ 16 | "@babel/env", { 17 | "exclude": [ 18 | "transform-literals", 19 | "transform-sticky-regex" 20 | ] 21 | } 22 | ] 23 | ] 24 | } 25 | -------------------------------------------------------------------------------- /tools/scripts/script-regex.js: -------------------------------------------------------------------------------- 1 | const { 2 | assemble, 3 | writeFile, 4 | unicodeVersion 5 | } = require('./utils.js'); 6 | 7 | const scripts = require(`${unicodeVersion}`).Script; 8 | 9 | const result = []; 10 | for (const script of scripts) { 11 | if (script === 'Unknown') { 12 | continue; 13 | } 14 | const codePoints = require(`${unicodeVersion}/Script/${script}/code-points.js`); 15 | result.push(assemble({ 16 | name: script, 17 | codePoints 18 | })); 19 | } 20 | writeFile('scripts.js', result); 21 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | import XRegExp from './xregexp'; 2 | 3 | import build from './addons/build'; 4 | import matchRecursive from './addons/matchrecursive'; 5 | import unicodeBase from './addons/unicode-base'; 6 | import unicodeCategories from './addons/unicode-categories'; 7 | import unicodeProperties from './addons/unicode-properties'; 8 | import unicodeScripts from './addons/unicode-scripts'; 9 | 10 | build(XRegExp); 11 | matchRecursive(XRegExp); 12 | unicodeBase(XRegExp); 13 | unicodeCategories(XRegExp); 14 | unicodeProperties(XRegExp); 15 | unicodeScripts(XRegExp); 16 | 17 | export default XRegExp; 18 | -------------------------------------------------------------------------------- /src/addons/unicode-scripts.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * XRegExp Unicode Scripts 5.1.2 3 | * 4 | * Steven Levithan (c) 2010-present MIT License 5 | * Unicode data by Mathias Bynens 6 | */ 7 | 8 | import scripts from '../../tools/output/scripts'; 9 | 10 | export default (XRegExp) => { 11 | 12 | /** 13 | * Adds support for all Unicode scripts. E.g., `\p{Latin}`. Token names are case insensitive, 14 | * and any spaces, hyphens, and underscores are ignored. 15 | * 16 | * Uses Unicode 14.0.0. 17 | * 18 | * @requires XRegExp, Unicode Base 19 | */ 20 | 21 | if (!XRegExp.addUnicodeData) { 22 | throw new ReferenceError('Unicode Base must be loaded before Unicode Scripts'); 23 | } 24 | 25 | XRegExp.addUnicodeData(scripts, 'Script'); 26 | }; 27 | -------------------------------------------------------------------------------- /tests/.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | "env": { 3 | "jasmine": true 4 | }, 5 | "globals": { 6 | "XRegExp": true, 7 | "resetFeatures": true, 8 | "REGEX_DATA": true, 9 | "hasNativeD": true, 10 | "hasNativeS": true, 11 | "hasNativeU": true, 12 | "hasNativeY": true, 13 | "hasStrictMode": true, 14 | "testUnicodeToken": true 15 | }, 16 | "rules": { 17 | "brace-style": 0, 18 | "dot-location": 0, 19 | "key-spacing": 0, 20 | "no-control-regex": 0, 21 | "no-empty-function": 0, 22 | "no-loop-func": 0, 23 | "no-multi-assign": 0, 24 | "no-multi-spaces": 0, 25 | "no-template-curly-in-string": 0, 26 | "no-useless-call": 0, 27 | "no-warning-comments": 0, 28 | "object-property-newline": 0 29 | } 30 | }; 31 | -------------------------------------------------------------------------------- /src/addons/unicode-categories.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * XRegExp Unicode Categories 5.1.2 3 | * 4 | * Steven Levithan (c) 2010-present MIT License 5 | * Unicode data by Mathias Bynens 6 | */ 7 | 8 | import categories from '../../tools/output/categories'; 9 | 10 | export default (XRegExp) => { 11 | 12 | /** 13 | * Adds support for Unicode's general categories. E.g., `\p{Lu}` or `\p{Uppercase Letter}`. See 14 | * category descriptions in UAX #44 . Token 15 | * names are case insensitive, and any spaces, hyphens, and underscores are ignored. 16 | * 17 | * Uses Unicode 14.0.0. 18 | * 19 | * @requires XRegExp, Unicode Base 20 | */ 21 | 22 | if (!XRegExp.addUnicodeData) { 23 | throw new ReferenceError('Unicode Base must be loaded before Unicode Categories'); 24 | } 25 | 26 | XRegExp.addUnicodeData(categories); 27 | }; 28 | -------------------------------------------------------------------------------- /tools/scripts/property-regex.js: -------------------------------------------------------------------------------- 1 | const { 2 | assemble, 3 | writeFile, 4 | unicodeVersion 5 | } = require('./utils.js'); 6 | 7 | // This includes only the binary properties required by UTS18 RL1.2 for level 1 Unicode regex 8 | // support, minus `Assigned` which has special handling since it is the inverse of Unicode category 9 | // `Unassigned`. To include all binary properties, change this to: 10 | // `const properties = require(`${unicodeVersion}`).Binary_Property;` 11 | const properties = [ 12 | 'ASCII', 13 | 'Alphabetic', 14 | 'Any', 15 | 'Default_Ignorable_Code_Point', 16 | 'Lowercase', 17 | 'Noncharacter_Code_Point', 18 | 'Uppercase', 19 | 'White_Space' 20 | ]; 21 | 22 | const result = []; 23 | for (const property of properties) { 24 | const codePoints = require(`${unicodeVersion}/Binary_Property/${property}/code-points.js`); 25 | result.push(assemble({ 26 | name: property, 27 | codePoints 28 | })); 29 | } 30 | writeFile('properties.js', result); 31 | -------------------------------------------------------------------------------- /tools/scripts/category-regex.js: -------------------------------------------------------------------------------- 1 | const aliasesToNames = require('unicode-property-value-aliases').get('General_Category'); 2 | 3 | const namesToAliases = new Map(); 4 | for (const [alias, name] of aliasesToNames) { 5 | if (!namesToAliases.has(name) || namesToAliases.get(name).length > name) { 6 | namesToAliases.set(name, alias); 7 | } 8 | } 9 | 10 | const { 11 | assemble, 12 | writeFile, 13 | unicodeVersion 14 | } = require('./utils.js'); 15 | 16 | const categories = require(`${unicodeVersion}`).General_Category; 17 | 18 | const aliases = []; 19 | for (const category of categories) { 20 | const alias = namesToAliases.get(category); 21 | aliases.push({ 22 | alias, 23 | category 24 | }); 25 | } 26 | aliases.sort(function(a, b) { 27 | return a.alias < b.alias ? -1 : 1; 28 | }); 29 | 30 | const result = []; 31 | for (const {alias, category} of aliases) { 32 | const codePoints = require(`${unicodeVersion}/General_Category/${category}/code-points.js`); 33 | result.push(assemble({ 34 | name: alias, 35 | alias: category, 36 | codePoints 37 | })); 38 | } 39 | writeFile('categories.js', result); 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2007-present Steven Levithan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/helpers/h.js: -------------------------------------------------------------------------------- 1 | if (typeof global === 'undefined') { 2 | global = window; 3 | } else { 4 | global.XRegExp = require('../../xregexp-all'); 5 | } 6 | 7 | // Ensure that all features are reset to default when each spec starts 8 | global.resetFeatures = function() { 9 | XRegExp.uninstall('astral'); 10 | XRegExp.install('namespacing'); 11 | }; 12 | 13 | // Property name used for extended regex instance data 14 | global.REGEX_DATA = 'xregexp'; 15 | 16 | // Check for ES2021 `d` flag support 17 | global.hasNativeD = XRegExp._hasNativeFlag('d'); 18 | // Check for ES2018 `s` flag support 19 | global.hasNativeS = XRegExp._hasNativeFlag('s'); 20 | // Check for ES6 `u` flag support 21 | global.hasNativeU = XRegExp._hasNativeFlag('u'); 22 | // Check for ES6 `y` flag support 23 | global.hasNativeY = XRegExp._hasNativeFlag('y'); 24 | // Check for strict mode support 25 | global.hasStrictMode = (function() { 26 | 'use strict'; 27 | 28 | return !this; 29 | }()); 30 | 31 | // Naive polyfill of String.prototype.repeat 32 | if (!String.prototype.repeat) { 33 | String.prototype.repeat = function(count) { 34 | return count ? Array(count + 1).join(this) : ''; 35 | }; 36 | } 37 | -------------------------------------------------------------------------------- /tests/vendor/benchmark.js/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2010-2016 Mathias Bynens 2 | Based on JSLitmus.js, copyright Robert Kieffer 3 | Modified by John-David Dalton 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /tests/helpers/h-matchers.js: -------------------------------------------------------------------------------- 1 | if (typeof global === 'undefined') { 2 | global = window; 3 | } 4 | 5 | global.addToEqualMatchMatcher = function() { 6 | jasmine.addMatchers({ 7 | // Similar to toEqual with arrays, but ignores custom properties of arrays. Useful when 8 | // comparing regex matches with array literals. 9 | toEqualMatch: function() { 10 | return { 11 | compare: function(actual, expected) { 12 | var isA = jasmine.isA_; 13 | var result = {}; 14 | 15 | if (isA('Array', actual)) { 16 | if (!isA('Array', expected) || actual.length !== expected.length) { 17 | result.pass = false; 18 | } else { 19 | for (var i = 0; i < actual.length; ++i) { 20 | if (actual[i] !== expected[i]) { 21 | result.pass = false; 22 | } 23 | } 24 | if (result.pass === undefined) { 25 | result.pass = true; 26 | } 27 | } 28 | } else { 29 | result.pass = false; 30 | } 31 | 32 | return result; 33 | } 34 | }; 35 | } 36 | }); 37 | }; 38 | -------------------------------------------------------------------------------- /tests/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | XRegExp Specs 7 | 8 | 9 | 10 | 11 | 12 | 13 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "xregexp", 3 | "version": "5.1.2", 4 | "description": "Extended regular expressions", 5 | "homepage": "http://xregexp.com/", 6 | "author": "Steven Levithan ", 7 | "license": "MIT", 8 | "repository": { 9 | "type": "git", 10 | "url": "https://github.com/slevithan/xregexp.git" 11 | }, 12 | "keywords": [ 13 | "regex", 14 | "regexp", 15 | "regular expression", 16 | "unicode" 17 | ], 18 | "browser": "./lib/index.js", 19 | "main": "./lib/index.js", 20 | "module": "./src/index.js", 21 | "files": [ 22 | "src", 23 | "lib", 24 | "tools/output", 25 | "xregexp-all.js", 26 | "types/index.d.ts" 27 | ], 28 | "scripts": { 29 | "lint": "eslint .", 30 | "babel": "babel src -d lib", 31 | "build-unicode-data": "node tools/scripts/category-regex.js && node tools/scripts/property-regex.js && node tools/scripts/script-regex.js", 32 | "prebuild": "npm run build-unicode-data && npm run lint && npm run babel", 33 | "build": "browserify lib/index.js --standalone XRegExp > xregexp-all.js", 34 | "pretest": "npm run build", 35 | "test": "nyc --reporter=lcov --reporter=text-summary jasmine JASMINE_CONFIG_PATH=tests/jasmine.json", 36 | "prepublish": "npm test" 37 | }, 38 | "types": "types/index.d.ts", 39 | "devDependencies": { 40 | "@babel/cli": "^7.28.3", 41 | "@babel/core": "^7.28.5", 42 | "@babel/plugin-transform-unicode-property-regex": "^7.27.1", 43 | "@babel/plugin-transform-runtime": "^7.28.5", 44 | "@babel/preset-env": "^7.28.5", 45 | "@unicode/unicode-14.0.0": "^1.6.16", 46 | "babel-plugin-add-module-exports": "^1.0.4", 47 | "babel-plugin-array-includes": "^2.0.3", 48 | "babel-plugin-transform-xregexp": "^1.0.0", 49 | "browserify": "^17.0.1", 50 | "eslint": "^8.57.1", 51 | "jasmine": "^5.12.0", 52 | "jsesc": "^3.1.0", 53 | "nyc": "^17.1.0", 54 | "unicode-property-value-aliases": "^3.9.0" 55 | }, 56 | "dependencies": { 57 | "@babel/runtime-corejs3": "^7.28.4" 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/addons/unicode-properties.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * XRegExp Unicode Properties 5.1.2 3 | * 4 | * Steven Levithan (c) 2012-present MIT License 5 | * Unicode data by Mathias Bynens 6 | */ 7 | 8 | import properties from '../../tools/output/properties'; 9 | 10 | export default (XRegExp) => { 11 | 12 | /** 13 | * Adds properties to meet the UTS #18 Level 1 RL1.2 requirements for Unicode regex support. See 14 | * . Following are definitions of these properties from 15 | * UAX #44 : 16 | * 17 | * - Alphabetic 18 | * Characters with the Alphabetic property. Generated from: Lowercase + Uppercase + Lt + Lm + 19 | * Lo + Nl + Other_Alphabetic. 20 | * 21 | * - Default_Ignorable_Code_Point 22 | * For programmatic determination of default ignorable code points. New characters that should 23 | * be ignored in rendering (unless explicitly supported) will be assigned in these ranges, 24 | * permitting programs to correctly handle the default rendering of such characters when not 25 | * otherwise supported. 26 | * 27 | * - Lowercase 28 | * Characters with the Lowercase property. Generated from: Ll + Other_Lowercase. 29 | * 30 | * - Noncharacter_Code_Point 31 | * Code points permanently reserved for internal use. 32 | * 33 | * - Uppercase 34 | * Characters with the Uppercase property. Generated from: Lu + Other_Uppercase. 35 | * 36 | * - White_Space 37 | * Spaces, separator characters and other control characters which should be treated by 38 | * programming languages as "white space" for the purpose of parsing elements. 39 | * 40 | * The properties ASCII, Any, and Assigned are also included but are not defined in UAX #44. UTS 41 | * #18 RL1.2 additionally requires support for Unicode scripts and general categories. These are 42 | * included in XRegExp's Unicode Categories and Unicode Scripts addons. 43 | * 44 | * Token names are case insensitive, and any spaces, hyphens, and underscores are ignored. 45 | * 46 | * Uses Unicode 14.0.0. 47 | * 48 | * @requires XRegExp, Unicode Base 49 | */ 50 | 51 | if (!XRegExp.addUnicodeData) { 52 | throw new ReferenceError('Unicode Base must be loaded before Unicode Properties'); 53 | } 54 | 55 | const unicodeData = properties; 56 | 57 | // Add non-generated data 58 | unicodeData.push({ 59 | name: 'Assigned', 60 | // Since this is defined as the inverse of Unicode category Cn (Unassigned), the Unicode 61 | // Categories addon is required to use this property 62 | inverseOf: 'Cn' 63 | }); 64 | 65 | XRegExp.addUnicodeData(unicodeData); 66 | }; 67 | -------------------------------------------------------------------------------- /tests/perf/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | XRegExp Performance Tests 6 | 17 | 18 | 19 |
20 | 21 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /tests/vendor/benchmark.js/README.md: -------------------------------------------------------------------------------- 1 | # Benchmark.js v2.1.1 2 | 3 | A [robust](https://mathiasbynens.be/notes/javascript-benchmarking "Bulletproof JavaScript benchmarks") benchmarking library that supports high-resolution timers & returns statistically significant results. As seen on [jsPerf](https://jsperf.com/). 4 | 5 | ## Documentation 6 | 7 | * [API Documentation](https://benchmarkjs.com/docs) 8 | 9 | ## Download 10 | 11 | * [Development source](https://raw.githubusercontent.com/bestiejs/benchmark.js/2.1.1/benchmark.js) 12 | 13 | ## Installation 14 | 15 | Benchmark.js’ only hard dependency is [lodash](https://lodash.com/). 16 | Include [platform.js](https://mths.be/platform) to populate [Benchmark.platform](https://benchmarkjs.com/docs#platform). 17 | 18 | In a browser: 19 | 20 | ```html 21 | 22 | 23 | 24 | ``` 25 | 26 | In an AMD loader: 27 | 28 | ```js 29 | require({ 30 | 'paths': { 31 | 'benchmark': 'path/to/benchmark', 32 | 'lodash': 'path/to/lodash', 33 | 'platform': 'path/to/platform' 34 | } 35 | }, 36 | ['benchmark'], function(Benchmark) {/*…*/}); 37 | ``` 38 | 39 | Using npm: 40 | 41 | ```bash 42 | $ npm i --save benchmark 43 | ``` 44 | 45 | In Node.js: 46 | 47 | ```js 48 | var Benchmark = require('benchmark'); 49 | ``` 50 | 51 | Optionally, use the [microtime module](https://github.com/wadey/node-microtime) by Wade Simmons: 52 | 53 | ```bash 54 | npm i --save microtime 55 | ``` 56 | 57 | Usage example: 58 | 59 | ```js 60 | var suite = new Benchmark.Suite; 61 | 62 | // add tests 63 | suite.add('RegExp#test', function() { 64 | /o/.test('Hello World!'); 65 | }) 66 | .add('String#indexOf', function() { 67 | 'Hello World!'.indexOf('o') > -1; 68 | }) 69 | // add listeners 70 | .on('cycle', function(event) { 71 | console.log(String(event.target)); 72 | }) 73 | .on('complete', function() { 74 | console.log('Fastest is ' + this.filter('fastest').map('name')); 75 | }) 76 | // run async 77 | .run({ 'async': true }); 78 | 79 | // logs: 80 | // => RegExp#test x 4,161,532 +-0.99% (59 cycles) 81 | // => String#indexOf x 6,139,623 +-1.00% (131 cycles) 82 | // => Fastest is String#indexOf 83 | ``` 84 | 85 | ## Developing 86 | 87 | The following `npm` tasks are available to assist during development and release: 88 | 89 | - `npm run server` will start `live-server` and open the base directory in your browser; then you can, for example, browse to /example/jsperf/ to run the available tests in your browser using the local benchmark.js file. 90 | 91 | - `npm run test` -- nuff said. 92 | 93 | - `npm run doc` -- will regenerate the documentation from source. 94 | 95 | Also note that rough support for a test *catalog* is available for the `/example/jsperf/` demo: run `./build-jsperf.sh` to update the catalog file and then the next reload of the `/example/jsperf/index.html` page will show a clickable list of all available tests near the bottom so you can browse and jump from one test file/suite to another. 96 | 97 | 98 | ## Support 99 | 100 | Tested in Chrome 46-47, Firefox 42-43, IE 9-11, Edge 13, Safari 8-9, Node.js 0.10-6, & PhantomJS 1.9.8. 101 | 102 | ## BestieJS 103 | 104 | Benchmark.js is part of the BestieJS *“Best in Class”* module collection. This means we promote solid browser/environment support, ES5+ precedents, unit testing, & plenty of documentation. 105 | -------------------------------------------------------------------------------- /docs/assets/index.css: -------------------------------------------------------------------------------- 1 | body {font-family:Calibri, Tahoma, Verdana, Arial, Helvetica, sans-serif; font-size:85%; margin:0; padding:0; background:#fff;} 2 | a:link, a:visited {color:#296e31; text-decoration:none;} 3 | a:hover, a:active {color:#0a3716; text-decoration:underline;} 4 | #header {padding:15px 15px 10px; border-bottom:3px solid #e3e3e3; background:#f3f3f3;} 5 | #logoX {color:#999;} 6 | #body {height:100%; padding:15px;} 7 | #navBar {height:100%; width:200px; float:left;} 8 | #main {height:100%; margin-left:200px;} 9 | #footer {clear:both; border-top:3px solid #e3e3e3; padding:0 15px 20px;} 10 | #footnotes {margin-top:25px;} 11 | #tocContainer {float:right; background:#fff; padding:5px 0 20px 20px;} 12 | #toc {border:1px solid #aaa; padding:0 20px 8px;} 13 | #toc h2 {margin-top:15px;} 14 | #toc ul {padding-left:15px;} 15 | .small {font-size:80%;} 16 | .plain {font-weight:normal;} 17 | .alert {color:#900; font-weight:bold;} 18 | .todo {color:#c00; font-weight:bold;} 19 | .clear {clear:both;} 20 | h1 {margin-bottom:0; font-family:Cambria, Tahoma, Verdana, Arial, Helvetica, sans-serif;} 21 | h1 a:link, h1 a:visited, h1 a:active, h1 a:hover {color:#000; text-decoration:none;} 22 | h1.subtitle {margin-top:0; font-size:1.2em; font-weight:normal; font-family:Calibri, Tahoma, Verdana, Arial, Helvetica, sans-serif;} 23 | h2 {border-bottom:1px solid #aaa; margin-top:25px; font-family:Cambria, "Times New Roman", Times, serif; font-size:145%;} 24 | h2 code {border-bottom:0;} 25 | h2 code span.plain {font-size:90%;} 26 | h3 {margin:15px 0 10px; font-family:Cambria, "Times New Roman", Times, serif; font-size:125%; font-weight:normal;} 27 | pre {background:#fafafa; white-space:pre-wrap; font-family:Monaco, Consolas, "Courier New", Courier, monospace; border:1px solid #e3e3e3; padding:5px;} 28 | code {font-family:Monaco, Consolas, "Courier New", Courier, monospace; border:1px solid #eee; background:#f3f3f3;} 29 | cite {font-style:normal;} 30 | q {font-style:italic;} 31 | q:before, q:after {content:"";} 32 | mark {background:#ffc;} 33 | li {margin-bottom:1px; line-height:130%;} 34 | table {border-collapse:collapse; border-color:#888;} 35 | table ul {padding-left:20px; margin:0;} 36 | thead {background:#333; color:#f3f3f3;} 37 | th, td {border:solid #888; border-width:0 1px 1px 0; padding:5px;} 38 | tr.alt {background:#f3f3f3;} 39 | tr.alt code {background:#fafafa;} 40 | table.api {margin-left:20px;} 41 | table.api th, table.api td {border:0;} 42 | table.api tr.alt {background:#fff;} 43 | table.api tr.alt td {border-top:1px solid #ddd;} 44 | table.api tbody th {vertical-align:top; text-align:left; border-right:1px solid #ddd;} 45 | div.aside {border:3px double #ddd; background:#f6f6f6; padding:0 15px 15px; margin-bottom:15px;} 46 | div.aside p {margin:15px 0 0;} 47 | div.aside code {border:1px solid #ddd; background:#f6f6f6; padding:0 2px;} 48 | div.right {float:right; clear:right;} 49 | div.aside.right {width:300px; margin-left:15px;} 50 | a.footnoteLink {font-size:80%; color:#999;} 51 | tr.highlight {background:#bfdcff;} 52 | tr.highlight code {border-color:#99b9df; background:#b3ceef;} 53 | 54 | .menu { 55 | width:180px; 56 | } 57 | .menu ul { 58 | list-style-type:none; 59 | margin:0; 60 | padding:0 0 10px 0; 61 | border:0 solid #a0df99; 62 | border-width:0 1px 1px 0; 63 | } 64 | .menu li a { 65 | font:italic 15px Georgia, "Times New Roman", Times, serif; 66 | display:block; 67 | height:24px; 68 | padding:4px 0 4px 10px; 69 | line-height:24px; 70 | text-decoration:none; 71 | } 72 | .menu li a:link, .menu li a:visited { 73 | color:#296e31; 74 | } 75 | .menu li a:hover { 76 | color:#0a3716; 77 | text-decoration:underline; 78 | } 79 | .menu li a.selected { 80 | color:#333; font-weight:bold; 81 | } 82 | a img {border:0;} 83 | -------------------------------------------------------------------------------- /docs/unicode/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Unicode :: XRegExp 6 | 7 | 8 | 9 | 13 |
14 | 25 |
26 | 27 | 28 | 29 | 30 | 31 |

Unicode

32 | 33 |

Requires the Unicode addons, which are bundled in xregexp-all.js. Alternatively, you can download the individual addon scripts from GitHub. XRegExp's npm package uses xregexp-all.js.

34 | 35 |

The Unicode Base script adds base support for Unicode matching via the \p{…} syntax. À la carte token addon packages add support for Unicode categories, scripts, and other properties. All Unicode tokens can be inverted using \P{…} or \p{^…}. Token names are case insensitive, and any spaces, hyphens, and underscores are ignored. You can omit the braces for token names that are a single letter.

36 | 37 |

Example

38 |
// Categories
39 | XRegExp('\\p{Sc}\\pN+'); // Sc = currency symbol, N = number
40 | // Can also use the full names \p{Currency_Symbol} and \p{Number}
41 | 
42 | // Scripts
43 | XRegExp('\\p{Cyrillic}');
44 | XRegExp('[\\p{Latin}\\p{Common}]');
45 | // Can also use the Script= prefix to match ES2018: \p{Script=Cyrillic}
46 | 
47 | // Properties
48 | XRegExp('\\p{ASCII}');
49 | XRegExp('\\p{Assigned}');
50 | 
51 | // In action...
52 | 
53 | const unicodeWord = XRegExp("^\\pL+$"); // L = letter
54 | unicodeWord.test("Русский"); // true
55 | unicodeWord.test("日本語"); // true
56 | unicodeWord.test("العربية"); // true
57 | 
58 | XRegExp("^\\p{Katakana}+$").test("カタカナ"); // true
59 | 
60 | 61 |

By default, \p{…} and \P{…} support the Basic Multilingual Plane (i.e. code points up to U+FFFF). You can opt-in to full 21-bit Unicode support (with code points up to U+10FFFF) on a per-regex basis by using flag A. In XRegExp, this is called astral mode. You can automatically add flag A for all new regexes by running XRegExp.install('astral'). When in astral mode, \p{…} and \P{…} always match a full code point rather than a code unit, using surrogate pairs for code points above U+FFFF.

62 | 63 |
// Using flag A to match astral code points
64 | XRegExp('^\\pS$').test('💩'); // -> false
65 | XRegExp('^\\pS$', 'A').test('💩'); // -> true
66 | // Using surrogate pair U+D83D U+DCA9 to represent U+1F4A9 (pile of poo)
67 | XRegExp('^\\pS$', 'A').test('\uD83D\uDCA9'); // -> true
68 | 
69 | // Implicit flag A
70 | XRegExp.install('astral');
71 | XRegExp('^\\pS$').test('💩'); // -> true
72 | 
73 | 74 |

Opting in to astral mode disables the use of \p{…} and \P{…} within character classes. In astral mode, use e.g. (\pL|[0-9_])+ instead of [\pL0-9_]+.

75 | 76 | 77 | 78 | 79 | 80 |
81 |
82 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /tests/helpers/h-unicode.js: -------------------------------------------------------------------------------- 1 | if (typeof global === 'undefined') { 2 | global = window; 3 | } 4 | 5 | /* 6 | * Runs a series of `expect` assertions, given a Unicode token name and arrays of code points that 7 | * should or should not be matched. 8 | */ 9 | global.testUnicodeToken = function(name, options) { 10 | var pattern = '^\\p{' + name + '}$'; 11 | var negated = '^\\P{' + name + '}$'; 12 | var astralRegex = XRegExp(pattern, 'A'); 13 | var negatedAstralRegex = XRegExp(negated, 'A'); 14 | var bmpRegex; 15 | var negatedBmpRegex; 16 | var isBmpChar; 17 | 18 | if (options.isAstralOnly) { 19 | expect(function() {XRegExp(pattern);}).toThrowError(SyntaxError); 20 | expect(function() {XRegExp(negated);}).toThrowError(SyntaxError); 21 | } else { 22 | bmpRegex = XRegExp(pattern); 23 | negatedBmpRegex = XRegExp(negated); 24 | } 25 | 26 | if (options.valid) { 27 | options.valid.forEach(function(chr) { 28 | expect(astralRegex.test(chr)).toBe(true); 29 | expect(negatedAstralRegex.test(chr)).toBe(false); 30 | if (!options.isAstralOnly) { 31 | isBmpChar = chr.length === 1; //chr.codePointAt(0) === chr.charCodeAt(0) 32 | expect(bmpRegex.test(chr)).toBe(isBmpChar); 33 | expect(negatedBmpRegex.test(chr)).toBe(false); 34 | } 35 | }); 36 | } 37 | 38 | if (options.invalid) { 39 | options.invalid.forEach(function(chr) { 40 | expect(astralRegex.test(chr)).toBe(false); 41 | expect(negatedAstralRegex.test(chr)).toBe(true); 42 | if (!options.isAstralOnly) { 43 | isBmpChar = chr.length === 1; //chr.codePointAt(0) === chr.charCodeAt(0) 44 | expect(bmpRegex.test(chr)).toBe(false); 45 | expect(negatedBmpRegex.test(chr)).toBe(isBmpChar); 46 | } 47 | }); 48 | } 49 | }; 50 | 51 | 52 | /*! 53 | * ES6 Unicode Shims 0.1 54 | * Steven Levithan (c) 2012 MIT License 55 | */ 56 | 57 | /** 58 | * Returns a string created using the specified sequence of Unicode code points. Accepts integers 59 | * between 0 and 0x10FFFF. Code points above 0xFFFF are converted to surrogate pairs. If a provided 60 | * integer is in the surrogate range, it produces an unpaired surrogate. Comes from accepted ES6 61 | * proposals. 62 | * @memberOf String 63 | * @param {Number} cp1, cp2... Sequence of Unicode code points. 64 | * @returns {String} String created from the specified code points. 65 | * @example 66 | * 67 | * // Basic use 68 | * String.fromCodePoint(0x41); // -> 'A' 69 | * 70 | * // Multiple code points; returns astral characters as surrogate pairs 71 | * String.fromCodePoint(0x20B20, 0x28B4E, 0x29DF6); 72 | * // Unlike String.fromCharCode, this correctly handles code points above 0xFFFF 73 | */ 74 | if (!String.fromCodePoint) { 75 | String.fromCodePoint = function() { 76 | var chars = [], 77 | i, offset, point, units; 78 | for (i = 0; i < arguments.length; ++i) { 79 | point = arguments[i]; 80 | offset = point - 0x10000; 81 | units = point > 0xFFFF ? [0xD800 + (offset >> 10), 0xDC00 + (offset & 0x3FF)] : [point]; 82 | chars.push(String.fromCharCode.apply(null, units)); 83 | } 84 | return chars.join(""); 85 | }; 86 | } 87 | 88 | /** 89 | * Returns the numeric Unicode code point of the character at the given index. Here `pos` is the 90 | * code *unit* position. If it's the second surrogate of a pair or an unpaired starting surrogate, 91 | * the code unit of the surrogate is returned; otherwise the code point is derived from the 92 | * surrogate pair. Comes from accepted ES6 proposals. 93 | * @memberOf String.prototype 94 | * @param {Number} [pos=0] Code point index in the string. Defaults to `0` if not a number. 95 | * @returns {Number} Code point at the specified index. `NaN` if the index is less than `0` or 96 | * greater than the string length. 97 | * @example 98 | * 99 | * var str = String.fromCodePoint(166734); 100 | * str.codePointAt(0); // -> 166734 101 | * // Unlike the charCodeAt method, this correctly handles code points above 0xFFFF 102 | */ 103 | /*if (!String.prototype.codePointAt) { 104 | String.prototype.codePointAt = function (pos) { 105 | pos = isNaN(pos) ? 0 : pos; 106 | var str = String(this), 107 | code = str.charCodeAt(pos), 108 | next = str.charCodeAt(pos + 1); 109 | // If a surrogate pair 110 | if (0xD800 <= code && code <= 0xDBFF && 0xDC00 <= next && next <= 0xDFFF) { 111 | return ((code - 0xD800) * 0x400) + (next - 0xDC00) + 0x10000; 112 | } 113 | return code; 114 | }; 115 | }*/ 116 | -------------------------------------------------------------------------------- /tools/scripts/utils.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const jsesc = require('jsesc'); 3 | 4 | const pkg = require('../../package.json'); 5 | const dependencies = Object.keys(pkg.devDependencies); 6 | const unicodeVersion = dependencies.find((name) => /^@unicode\/unicode-\d/.test(name)); 7 | 8 | // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae 9 | const highSurrogate = (codePoint) => Math.floor((codePoint - 0x10000) / 0x400) + 0xD800; 10 | 11 | const lowSurrogate = (codePoint) => ((codePoint - 0x10000) % 0x400) + 0xDC00; 12 | 13 | const codePointToString = (codePoint) => { 14 | const string = String.fromCodePoint(codePoint); 15 | // Important: escape RegExp meta-characters. 16 | if (/[$()*+\-\./?\[\]^{|}]/.test(string)) { 17 | return `\\${string}`; 18 | } 19 | return string; 20 | }; 21 | 22 | const createRange = (codePoints) => { 23 | // Does the range contain lone high surrogates? 24 | let isBmpLast = false; 25 | // Does the range contain astral code points? 26 | let hasAstralCodePoints = false; 27 | const bmp = []; 28 | const supplementary = new Map(); 29 | for (const codePoint of codePoints) { 30 | if (codePoint >= 0xD800 && codePoint <= 0xDBFF) { 31 | isBmpLast = true; 32 | bmp.push(codePoint); 33 | } else if (codePoint <= 0xFFFF) { 34 | bmp.push(codePoint); 35 | } else { // It’s a supplementary code point. 36 | const hi = highSurrogate(codePoint); 37 | const lo = lowSurrogate(codePoint); 38 | if (supplementary.has(hi)) { 39 | supplementary.get(hi).push(lo); 40 | } else { 41 | supplementary.set(hi, [lo]); 42 | } 43 | hasAstralCodePoints = true; 44 | } 45 | } 46 | 47 | const supplementaryByLowRanges = new Map(); 48 | for (const [hi, lo] of supplementary) { 49 | const key = createBmpRange(lo); 50 | if (supplementaryByLowRanges.has(key)) { 51 | supplementaryByLowRanges.get(key).push(hi); 52 | } else { 53 | supplementaryByLowRanges.set(key, [hi]); 54 | } 55 | } 56 | // `supplementaryDictByLowRanges` looks like this: 57 | // { 'low surrogate range': [list of high surrogates that have this exact low surrogate range] }) 58 | 59 | const bmpRange = createBmpRange(bmp, {addBrackets: false}); 60 | 61 | const buf = []; 62 | let astralRange = ''; 63 | 64 | // [bmpRange (including orphaned high surrogates), astralRange, isBmpLast] 65 | if (hasAstralCodePoints) { 66 | for (const [lo, hi] of supplementaryByLowRanges) { 67 | buf.push(createBmpRange(hi) + lo); 68 | } 69 | astralRange = buf.join('|'); 70 | } 71 | 72 | return { 73 | bmp: bmpRange, 74 | astral: astralRange, 75 | isBmpLast: isBmpLast && hasAstralCodePoints 76 | }; 77 | }; 78 | 79 | const createBmpRange = (r, {addBrackets} = {addBrackets: true}) => { 80 | if (r.length === 0) {return '';} 81 | 82 | const buf = []; 83 | let [start] = r; 84 | let [end] = r; 85 | let predict = start + 1; 86 | r = r.slice(1); 87 | 88 | let counter = 0; 89 | for (const code of r) { 90 | if (predict == code) { 91 | end = code; 92 | predict = code + 1; 93 | continue; 94 | } else { 95 | if (start == end) { 96 | buf.push(codePointToString(start)); 97 | counter++; 98 | } else if (end == start + 1) { 99 | buf.push(`${codePointToString(start)}${codePointToString(end)}`); 100 | counter += 2; 101 | } else { 102 | buf.push(`${codePointToString(start)}-${codePointToString(end)}`); 103 | counter += 2; 104 | } 105 | start = code; 106 | end = code; 107 | predict = code + 1; 108 | } 109 | } 110 | 111 | if (start == end) { 112 | buf.push(codePointToString(start)); 113 | counter++; 114 | } else if (end == start + 1) { 115 | buf.push(`${codePointToString(start)}${codePointToString(end)}`); 116 | counter += 2; 117 | } else { 118 | buf.push(`${codePointToString(start)}-${codePointToString(end)}`); 119 | counter += 2; 120 | } 121 | 122 | const output = buf.join(''); 123 | if (!addBrackets || counter == 1) { 124 | return output; 125 | } 126 | return `[${output}]`; 127 | }; 128 | 129 | const assemble = ({name, alias, codePoints}) => { 130 | const {bmp, astral, isBmpLast} = createRange(codePoints); 131 | const result = {name}; 132 | if (alias) { 133 | result.alias = alias; 134 | } 135 | if (isBmpLast) { 136 | result.isBmpLast = true; 137 | } 138 | if (bmp) { 139 | result.bmp = bmp; 140 | } 141 | if (astral) { 142 | result.astral = astral; 143 | } 144 | return result; 145 | }; 146 | 147 | const writeFile = (name, object) => { 148 | console.log(`Saving ${name}…`); 149 | const output = jsesc(object, { 150 | compact: false, 151 | indent: ' ' 152 | }); 153 | fs.writeFileSync( 154 | `${__dirname}/../output/${name}`, 155 | `module.exports = ${output};\n` 156 | ); 157 | }; 158 | 159 | module.exports = { 160 | assemble, 161 | writeFile, 162 | unicodeVersion 163 | }; 164 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | XRegExp 6 | 7 | 8 | 9 | 13 |
14 | 25 |
26 | 27 | 28 | 29 | 30 | 31 |
32 | 41 |
42 | 43 |

What is it?

44 | 45 |

XRegExp provides augmented (and extensible) JavaScript regular expressions. You get modern syntax and flags beyond what browsers support natively. XRegExp is also a regex utility belt with tools to make your grepping and parsing easier, while freeing you from regex cross-browser inconsistencies and other annoyances.

46 | 47 |

XRegExp supports all native ES6 regular expression syntax. It supports ES5+ browsers (including Internet Explorer 9+), and you can use it with Node.js or as a RequireJS module. Over the years, many of XRegExp's features have been adopted by new JavaScript standards (named capturing, Unicode properties/scripts/categories, flag s, sticky matching, etc.), so using XRegExp can be a way to extend these features into older browsers. It's released under the MIT License.

48 | 49 |

XRegExp lets you write regexes like this:

50 | 51 |
// Using named capture and flag x (free-spacing and line comments)
 52 | const date = XRegExp(`(?<year>  [0-9]{4} ) -?  # year
 53 |                       (?<month> [0-9]{2} ) -?  # month
 54 |                       (?<day>   [0-9]{2} )     # day`, 'x');
 55 | 
56 | 57 |

And do cool stuff like this:

58 | 59 |
// Using named backreferences...
 60 | XRegExp.exec('2021-02-23', date).groups.year;
 61 | // -> '2021'
 62 | XRegExp.replace('2021-02-23', date, '$<month>/$<day>/$<year>');
 63 | // -> '02/23/2021'
 64 | 
 65 | // Finding matches within matches, while passing forward and returning specific backreferences
 66 | const html = `<a href="https://xregexp.com/api/">XRegExp</a>
 67 |               <a href="https://www.google.com/">Google</a>`;
 68 | XRegExp.matchChain(html, [
 69 |   {regex: /<a href="([^"]+)">/i, backref: 1},
 70 |   {regex: XRegExp('(?i)^https?://(?<domain>[^/?#]+)'), backref: 'domain'}
 71 | ]);
 72 | // -> ['xregexp.com', 'www.google.com']
 73 | 
74 | 75 |

Check out more usage examples on GitHub ⇨.

76 | 77 |

Features

78 | 79 | 85 | 86 |

Performance

87 | 88 |

XRegExp compiles to native RegExp objects. Therefore regexes built with XRegExp perform just as fast as native regular expressions. There is a tiny extra cost when compiling a pattern for the first time.

89 | 90 |

Installation and usage

91 | 92 |

In browsers (bundle XRegExp with all of its addons):

93 | 94 |
<script src="https://unpkg.com/xregexp/xregexp-all.js"></script>
 95 | 
96 | 97 |

Using npm:

98 | 99 |
npm install xregexp
100 | 
101 | 102 |

In Node.js:

103 | 104 |
const XRegExp = require('xregexp');
105 | 
106 | 107 |

Named Capture Breaking Change in XRegExp 5

108 | 109 |

XRegExp 5 introduced a breaking change where named backreference properties now appear on the result's groups object (following ES2018), rather than directly on the result. To restore the old handling so you don't need to update old code, run the following line after importing XRegExp:

110 | 111 |
XRegExp.uninstall('namespacing');
112 | 
113 | 114 |

XRegExp 4.1.0 and later allow introducing the new behavior without upgrading to XRegExp 5 by running XRegExp.install('namespacing').

115 | 116 |

Following is the most commonly needed change to update code for the new behavior:

117 | 118 |
// Change this
119 | const name = XRegExp.exec(str, regexWithNamedCapture).name;
120 | 
121 | // To this
122 | const name = XRegExp.exec(str, regexWithNamedCapture).groups.name;
123 | 
124 | 125 |

See the README on GitHub ⇨ for more examples of using named capture with XRegExp.exec and XRegExp.replace.

126 | 127 | 128 | 129 | 130 | 131 |
132 |
133 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /tests/spec/s-addons-build.js: -------------------------------------------------------------------------------- 1 | beforeEach(function() { 2 | global.resetFeatures(); 3 | global.addToEqualMatchMatcher(); 4 | }); 5 | 6 | describe('XRegExp.build addon:', function() { 7 | 8 | describe('XRegExp.tag()', function() { 9 | 10 | it('should escape the metacharacters of interpolated strings', function() { 11 | var inner = '.html'; 12 | var re = XRegExp.tag()`^index${inner}$`; 13 | 14 | expect(re.test('index.html')).toBe(true); 15 | expect(re.test('index-html')).toBe(false); 16 | }); 17 | 18 | it('should rewrite the backreferences of interpolated regexes', function() { 19 | var inner = /(.)\1/; 20 | var re = XRegExp.tag()`^${inner}${inner}$`; 21 | 22 | expect(re.test('aabb')).toBe(true); 23 | expect(re.test('aaba')).toBe(false); 24 | }); 25 | 26 | it('should treat interpolated strings as atomic tokens', function() { 27 | var inner = 'ab'; 28 | var re = XRegExp.tag()`^${inner}+$`; 29 | 30 | expect(re.test('abab')).toBe(true); 31 | expect(re.test('abb')).toBe(false); 32 | }); 33 | 34 | it('should treat interpolated regexes as atomic tokens', function() { 35 | var inner = /ab/; 36 | var re = XRegExp.tag()`^${inner}+$`; 37 | 38 | expect(re.test('abab')).toBe(true); 39 | expect(re.test('abb')).toBe(false); 40 | }); 41 | 42 | it('should support the "x" flag', function() { 43 | var inner = /ab/; 44 | var re = XRegExp.tag('x')` 45 | ^ 46 | ${inner} 47 | + 48 | $ 49 | `; 50 | 51 | expect(re.test('abab')).toBe(true); 52 | expect(re.test('abb')).toBe(false); 53 | }); 54 | 55 | it('should support the "n" flag', function() { 56 | var inner = XRegExp('(unnamed), (?named)'); 57 | var re = XRegExp.tag('n')`${inner}`; 58 | 59 | expect(re.exec('unnamed, named')[1]).toBe('named'); 60 | }); 61 | 62 | it('should support the "g" flag', function() { 63 | var inner = 'a'; 64 | var re = XRegExp.tag('g')`${inner}`; 65 | 66 | expect('aaa'.match(re)).toEqual(['a', 'a', 'a']); 67 | }); 68 | 69 | it('should allow `false` to be interpolated', function() { 70 | var inner = false; 71 | var re = XRegExp.tag()`^${inner}$`; 72 | 73 | expect(re.test('false')).toBe(true); 74 | }); 75 | 76 | it('should allow unescaped character classes', function() { 77 | var re = XRegExp.tag()`\d`; 78 | 79 | expect(re.test('1')).toBe(true); 80 | }); 81 | 82 | it('should work as described in the comment @example', function() { 83 | var h12 = /1[0-2]|0?[1-9]/; 84 | var h24 = /2[0-3]|[01][0-9]/; 85 | var hours = XRegExp.tag('x')`${h12} : | ${h24}`; 86 | var minutes = /^[0-5][0-9]$/; 87 | var time = XRegExp.tag('x')`^ ${hours} (?${minutes}) $`; 88 | 89 | expect(time.test('10:59')).toBe(true); 90 | expect(XRegExp.exec('10:59', time).groups.minutes).toEqual('59'); 91 | }); 92 | 93 | }); 94 | 95 | describe('XRegExp.build()', function() { 96 | 97 | it('should apply a mode modifier in the outer pattern to the full regex with interpolated values', function() { 98 | expect(XRegExp.build('(?x){{a}}', {a: /1 2/}).test('12')).toBe(true); 99 | // IE 7 and 8 (not 6 or 9) throw an Error rather than SyntaxError 100 | expect(function() {XRegExp.build('(?x)({{a}})', {a: /#/});}).toThrow(); 101 | }); 102 | 103 | it('should ignore newlines when using flag x', function() { 104 | expect(XRegExp.build('(?x)\n', {}).test('')).toBe(true); 105 | expect(XRegExp.build('\n', {}, 'x').test('')).toBe(true); 106 | expect(XRegExp.build('{{sub}}', {sub: '\n'}, 'x').test('')).toBe(true); 107 | }); 108 | 109 | it('should apply a mode modifier with a native flag in the outer pattern to the final result', function() { 110 | expect(XRegExp.build('(?m){{a}}', {a: /a/}).multiline).toBe(true); 111 | expect(XRegExp.build('(?i){{a}}', {a: /a/}).ignoreCase).toBe(true); 112 | }); 113 | 114 | it('should throw an exception when a mode modifier with g or y is used in the outer pattern', function() { 115 | expect(function() {XRegExp.build('(?g){{a}}', {a: /a/});}).toThrowError(SyntaxError); 116 | expect(function() {XRegExp.build('(?y){{a}}', {a: /a/});}).toThrowError(SyntaxError); 117 | expect(function() {XRegExp.build('(?migs){{a}}', {a: /a/});}).toThrowError(SyntaxError); 118 | }); 119 | 120 | it('should not interpolate named subpatterns within character classes', function() { 121 | expect(XRegExp.build('^[{{a}}]$', {a: 'x'}).test('x')).toBe(false); 122 | expect(XRegExp.build('^{{a}}[{{a}}]$', {a: 'x'}).test('x{')).toBe(true); 123 | }); 124 | 125 | it('should strip a leading ^ and trailing unescaped $ in subpatterns, when both are present', function() { 126 | expect(XRegExp.build('{{x}}', {x: /^123$/}).test('01234')).toBe(true); 127 | expect(XRegExp.build('{{x}}', {x: '^123$'}).test('01234')).toBe(true); 128 | expect( 129 | XRegExp.build( 130 | ' (?#comment) {{sub}} ', 131 | {sub: XRegExp(' (?#comment) ^123$ ', 'x')}, 132 | 'x' 133 | ).test('01234') 134 | ).toBe(true); 135 | }); 136 | 137 | it('should not strip a leading ^ and trailing unescaped $ in subpatterns, when both are not present', function() { 138 | expect(XRegExp.build('{{x}}', {x: '^123'}).test('123')).toBe(true); 139 | expect(XRegExp.build('{{x}}', {x: '^123'}).test('01234')).toBe(false); 140 | expect(XRegExp.build('{{x}}', {x: '123$'}).test('123')).toBe(true); 141 | expect(XRegExp.build('{{x}}', {x: '123$'}).test('01234')).toBe(false); 142 | }); 143 | 144 | it('should not strip a leading ^ and trailing unescaped $ in subpatterns, when both are present but not leading/trailing', function() { 145 | expect(XRegExp.build('{{x}}', {x: '^1$'}).test('11')).toBe(true); 146 | expect(XRegExp.build('{{x}}', {x: '^1$\\b'}).test('11')).toBe(false); 147 | }); 148 | 149 | it('should not strip a trailing escaped $ in subpatterns', function() { 150 | expect(XRegExp.build('{{x}}', {x: '^123\\$'}).test('123$')).toBe(true); 151 | expect(XRegExp.build('{{x}}', {x: '^123\\$'}).test('0123$4')).toBe(false); 152 | }); 153 | 154 | it('should support flag n with mixed named and unnamed groups', function() { 155 | expect(function() {XRegExp.build('()(?)\\k', {}, 'n');}).not.toThrow(); 156 | expect(function() {XRegExp.build('{{a}}', {a: '()(?)\\k'}, 'n');}).not.toThrow(); 157 | expect(function() {XRegExp.build('()(?)\\k{{a}}', {a: '()(?)\\k'}, 'n');}).not.toThrow(); 158 | }); 159 | 160 | // TODO: Add complete specs 161 | 162 | it('should pass the readme example', function() { 163 | var time = XRegExp.build('(?x)^ {{hours}} ({{minutes}}) $', { 164 | hours: XRegExp.build('{{h12}} : | {{h24}}', { 165 | h12: /1[0-2]|0?[1-9]/, 166 | h24: /2[0-3]|[01][0-9]/ 167 | }), 168 | minutes: /^[0-5][0-9]$/ 169 | }); 170 | 171 | expect(time.test('10:59')).toBe(true); 172 | expect(XRegExp.exec('10:59', time).groups.minutes).toBe('59'); 173 | }); 174 | 175 | it('should pass a series of complex backreference rewrites', function() { 176 | // Equivalent to: XRegExp('(?(?a)\\2)\\1(?(?b)\\4)\\3()\\5\\1\\3\\k') 177 | var built = XRegExp.build('({{n1}})\\1(?{{n2}})\\2()\\3\\1\\2\\k', { 178 | n1: XRegExp('(?a)\\1'), 179 | n2: XRegExp('(?b)\\1') 180 | }); 181 | var match = XRegExp.exec('aaaabbbbaabbbb', built); 182 | 183 | expect(match).toBeTruthy(); 184 | expect(match.groups.n1).toBe('aa'); 185 | expect(match.groups.n2).toBeUndefined(); 186 | expect(match.groups.nX).toBe('bb'); 187 | expect(match.groups.yo).toBe('a'); 188 | expect(match.groups.yo2).toBe('b'); 189 | }); 190 | 191 | }); 192 | 193 | }); 194 | -------------------------------------------------------------------------------- /tests/spec/s-addons-matchrecursive.js: -------------------------------------------------------------------------------- 1 | beforeEach(function() { 2 | global.resetFeatures(); 3 | global.addToEqualMatchMatcher(); 4 | }); 5 | 6 | describe('XRegExp.matchRecursive addon:', function() { 7 | 8 | describe('XRegExp.matchRecursive()', function() { 9 | 10 | it('should pass the readme example for basic usage', function() { 11 | const str = '(t((e))s)t()(ing)'; 12 | expect(XRegExp.matchRecursive(str, '\\(', '\\)', 'g')).toEqual(['t((e))s', '', 'ing']); 13 | }); 14 | 15 | it('should pass the readme example for extended information mode with valueNames', function() { 16 | const str = 'Here is
an
example'; 17 | expect( 18 | XRegExp.matchRecursive(str, '', '', 'gi', { 19 | valueNames: ['between', 'left', 'match', 'right'] 20 | })) 21 | .toEqual([ 22 | {name: 'between', value: 'Here is ', start: 0, end: 8}, 23 | {name: 'left', value: '
', start: 8, end: 13}, 24 | {name: 'match', value: '
an
', start: 13, end: 27}, 25 | {name: 'right', value: '
', start: 27, end: 33}, 26 | {name: 'between', value: ' example', start: 33, end: 41} 27 | ]); 28 | }); 29 | 30 | it('should pass the readme example for omitting unneeded parts with null valueNames and using escapeChar', function() { 31 | const str = '...{1}.\\{{function(x,y){return {y:x}}}'; 32 | expect( 33 | XRegExp.matchRecursive(str, '{', '}', 'g', { 34 | valueNames: ['literal', null, 'value', null], 35 | escapeChar: '\\' 36 | })) 37 | .toEqual([ 38 | {name: 'literal', value: '...', start: 0, end: 3}, 39 | {name: 'value', value: '1', start: 4, end: 5}, 40 | {name: 'literal', value: '.\\{', start: 6, end: 9}, 41 | {name: 'value', value: 'function(x,y){return {y:x}}', start: 10, end: 37} 42 | ]); 43 | }); 44 | 45 | it('should pass the readme example for sticky mode via flag y', function() { 46 | const str = '<1><<<2>>><3>4<5>'; 47 | expect(XRegExp.matchRecursive(str, '<', '>', 'gy')).toEqual(['1', '<<2>>', '3']); 48 | }); 49 | 50 | it('should pass the readme example for unbalanced delimiters', function() { 51 | const str = 'Here is
an
unbalanced example'; 52 | expect(XRegExp.matchRecursive(str, '', '
', 'gi', { 53 | unbalanced: 'skip' 54 | })).toEqual(['an']); 55 | }); 56 | 57 | it('should throw for unbalanced left delimiter in first match without flag g', function() { 58 | expect(function() {XRegExp.matchRecursive('<', '<', '>');}).toThrow(); 59 | expect(function() {XRegExp.matchRecursive('<<>', '<', '>');}).toThrow(); 60 | }); 61 | 62 | it('should not throw for unbalanced left delimiter after first match without flag g', function() { 63 | expect(function() {XRegExp.matchRecursive('<><', '<', '>');}).not.toThrow(); 64 | }); 65 | 66 | it('should throw for unbalanced left delimiter anywhere in string with flag g', function() { 67 | expect(function() {XRegExp.matchRecursive('<', '<', '>', 'g');}).toThrow(); 68 | expect(function() {XRegExp.matchRecursive('<<>', '<', '>', 'g');}).toThrow(); 69 | expect(function() {XRegExp.matchRecursive('<><', '<', '>', 'g');}).toThrow(); 70 | expect(function() {XRegExp.matchRecursive('.<.<>><', '<', '>', 'g');}).toThrow(); 71 | }); 72 | 73 | it('should throw for unbalanced right delimiter in first match without flag g', function() { 74 | expect(function() {XRegExp.matchRecursive('>', '<', '>');}).toThrow(); 75 | }); 76 | 77 | it('should not throw for unbalanced right delimiter after first match without flag g', function() { 78 | expect(function() {XRegExp.matchRecursive('<>>', '<', '>');}).not.toThrow(); 79 | }); 80 | 81 | it('should throw for unbalanced right delimiter anywhere in string with flag g', function() { 82 | expect(function() {XRegExp.matchRecursive('>', '<', '>', 'g');}).toThrow(); 83 | expect(function() {XRegExp.matchRecursive('<>>', '<', '>', 'g');}).toThrow(); 84 | expect(function() {XRegExp.matchRecursive('.<.<>>>', '<', '>', 'g');}).toThrow(); 85 | }); 86 | 87 | it('should handle unbalanced left delimiter with option unbalanced set to skip', function() { 88 | const matches = XRegExp.matchRecursive('<><<.>', '<', '>', 'g', {unbalanced: 'skip'}); 89 | expect(matches).toEqual(['', '.']); 90 | const vnMatches = XRegExp.matchRecursive('<><<.>', '<', '>', 'g', {unbalanced: 'skip', valueNames: ['between', 'left', 'match', 'right']}); 91 | expect(vnMatches).toEqual([ 92 | {name: 'left', value: '<', start: 0, end: 1}, 93 | {name: 'match', value: '', start: 1, end: 1}, 94 | {name: 'right', value: '>', start: 1, end: 2}, 95 | {name: 'between', value: '<', start: 2, end: 3}, 96 | {name: 'left', value: '<', start: 3, end: 4}, 97 | {name: 'match', value: '.', start: 4, end: 5}, 98 | {name: 'right', value: '>', start: 5, end: 6} 99 | ]); 100 | }); 101 | 102 | it('should handle unbalanced right delimiter with option unbalanced set to skip', function() { 103 | const matches = XRegExp.matchRecursive('.<>>', '<', '>', 'g', {unbalanced: 'skip'}); 104 | expect(matches).toEqual(['']); 105 | const vnMatches = XRegExp.matchRecursive('.<>>', '<', '>', 'g', {unbalanced: 'skip', valueNames: ['between', 'left', 'match', 'right']}); 106 | expect(vnMatches).toEqual([ 107 | {name: 'between', value: '.', start: 0, end: 1}, 108 | {name: 'left', value: '<', start: 1, end: 2}, 109 | {name: 'match', value: '', start: 2, end: 2}, 110 | {name: 'right', value: '>', start: 2, end: 3}, 111 | {name: 'between', value: '>', start: 3, end: 4} 112 | ]); 113 | }); 114 | 115 | it('should handle unbalanced overlapping multichar left delimiter with option unbalanced set to skip', function() { 116 | const matches = XRegExp.matchRecursive('<<<<.>>', '<<', '>>', 'g', { 117 | unbalanced: 'skip', 118 | valueNames: ['between', 'left', 'match', 'right'] 119 | }); 120 | expect(matches).toEqual([ 121 | {name: 'between', value: '<<', start: 0, end: 2}, 122 | {name: 'left', value: '<<', start: 2, end: 4}, 123 | {name: 'match', value: '.', start: 4, end: 5}, 124 | {name: 'right', value: '>>', start: 5, end: 7} 125 | ]); 126 | }); 127 | 128 | it('should handle unbalanced overlapping multichar left delimiter with option unbalanced set to skip-lazy', function() { 129 | const matches = XRegExp.matchRecursive('<<<<.>>', '<<', '>>', 'g', { 130 | unbalanced: 'skip-lazy', 131 | valueNames: ['between', 'left', 'match', 'right'] 132 | }); 133 | expect(matches).toEqual([ 134 | {name: 'between', value: '<', start: 0, end: 1}, 135 | {name: 'left', value: '<<', start: 1, end: 3}, 136 | {name: 'match', value: '<.', start: 3, end: 5}, 137 | {name: 'right', value: '>>', start: 5, end: 7} 138 | ]); 139 | }); 140 | 141 | it('should handle zero-length delimiters', function() { 142 | expect(XRegExp.matchRecursive('<>', '(?=<)', '$')).toEqual(['<>']); 143 | }); 144 | 145 | it('should handle unbalanced zero-length delimiters', function() { 146 | expect(function() {XRegExp.matchRecursive('<>', '(?=.)', '(?:)');}).toThrow(); 147 | expect(XRegExp.matchRecursive('<>', '(?=.)', '(?:)', '', {unbalanced: 'skip'})).toEqual(['>']); 148 | }); 149 | 150 | it('should return an empty array if no matches', function() { 151 | expect(XRegExp.matchRecursive('.', '<', '>')).toEqual([]); 152 | expect(XRegExp.matchRecursive('.', '<', '>', 'g')).toEqual([]); 153 | expect( 154 | XRegExp.matchRecursive('.', '<', '>', '', { 155 | valueNames: ['between', 'left', 'match', 'right'] 156 | }) 157 | ).toEqual([]); 158 | expect( 159 | XRegExp.matchRecursive('.', '<', '>', 'g', { 160 | valueNames: ['between', 'left', 'match', 'right'] 161 | }) 162 | ).toEqual([]); 163 | }); 164 | 165 | }); 166 | 167 | }); 168 | -------------------------------------------------------------------------------- /types/test.ts: -------------------------------------------------------------------------------- 1 | import XRegExp = require('xregexp'); 2 | 3 | // ====================================================== 4 | // constructor 5 | // ====================================================== 6 | let regex1: RegExp = /a/gi; 7 | regex1 = XRegExp('/a/'); 8 | regex1 = XRegExp('/a/', 'gi'); 9 | regex1 = XRegExp(/a/gi); 10 | regex1 = XRegExp(regex1, undefined); 11 | 12 | // ====================================================== 13 | // XRegExp namespace 14 | // ====================================================== 15 | 16 | //#region types 17 | 18 | // TokenScope 19 | let ts1: XRegExp.TokenScopeOption = 'default'; 20 | ts1 = 'class'; 21 | ts1 = 'all'; 22 | 23 | // MatchScope 24 | let ms: XRegExp.MatchScope = 'one'; 25 | ms = 'all'; 26 | 27 | // TokenFlag 28 | let tf: XRegExp.TokenFlag = 'A'; 29 | tf = 'a'; 30 | tf = '0'; 31 | tf = '_'; 32 | tf = '$'; 33 | 34 | // Feature 35 | let fo: XRegExp.FeatureOptions = 'astral'; 36 | fo = 'namespacing'; 37 | fo = 'astral namespacing'; 38 | fo = 'namespacing astral'; 39 | fo = {}; 40 | fo = { astral: true }; 41 | fo = { namespacing: true }; 42 | fo = { astral: true, namespacing: true }; 43 | 44 | // Pattern 45 | let pat: XRegExp.Pattern = '/a/'; 46 | pat = /a/gi; 47 | 48 | // NamedGroups 49 | let ng: XRegExp.NamedGroupsArray = {}; 50 | 51 | // MatchChainArray 52 | const mca: XRegExp.MatchChainArray = []; 53 | mca[0] = /a/gi; 54 | 55 | // ReplacementValue 56 | let rv: XRegExp.ReplacementValue = (s, args) => 'a'; 57 | rv = 'a'; 58 | 59 | // UnicodeCharacterRange 60 | let ucr: XRegExp.UnicodeCharacterRange = { name: 'a', astral: 'a-z' }; 61 | ucr = { name: 'b', bmp: 'a-z' }; 62 | ucr = { name: 'b', inverseOf: 'a-z' }; 63 | 64 | //#endregion 65 | 66 | //#region interfaces 67 | 68 | // TokenOptions 69 | const to: XRegExp.TokenOptions = {}; 70 | to.scope = ts1; 71 | to.flag = tf; 72 | to.optionalFlags = 'gi'; 73 | to.reparse = false; 74 | to.leadChar = '_'; 75 | 76 | // NamedGroupsArray 77 | ng = { name: 'string1', val: 'string2' }; 78 | const ng_str1: string = ng['name'] + ng['val']; 79 | 80 | // MatchArray 81 | class XRegExpMatchArrayImpl extends Array implements XRegExp.MatchArray { 82 | constructor(...items: string[]) { 83 | super(...items); 84 | Object.setPrototypeOf(this, Object.create(XRegExpMatchArrayImpl.prototype)); 85 | } 86 | groups?: XRegExp.NamedGroupsArray; 87 | input?: string; 88 | index?: number; 89 | } 90 | let ma: XRegExp.MatchArray = new XRegExpMatchArrayImpl('asdf', 'qwerty'); 91 | ma.index = 0; 92 | ma.input = 'a'; 93 | ma.groups = ng; 94 | ma['namedMatch'] = 'b'; 95 | const ma_str1: string | undefined = ma['namedMatch'] as string; 96 | 97 | // ExecArray 98 | class XRegExpExecArrayImpl extends Array implements XRegExp.ExecArray { 99 | constructor(...items: string[]) { 100 | super(...items); 101 | Object.setPrototypeOf(this, Object.create(XRegExpExecArrayImpl.prototype)); 102 | } 103 | groups?: XRegExp.NamedGroupsArray; 104 | input = ''; 105 | index = 0; 106 | } 107 | const ea: XRegExp.ExecArray = new XRegExpExecArrayImpl('asdf', 'qwerty'); 108 | ea.groups = ng; 109 | ma.index = 0; 110 | ma.input = 'a'; 111 | ea['namedMatch'] = 'b'; 112 | const ea_str1: string | undefined = ea['namedMatch'] as string; 113 | 114 | // ChainArrayElement 115 | mca[1] = { regex: /a/gi, backref: 1 }; 116 | mca[2] = { regex: /a/gi, backref: 'asdf' }; 117 | 118 | // MatchSubString 119 | class XRegExpMatchSubstringImpl extends String implements XRegExp.MatchSubString { 120 | constructor(value?: any) { 121 | super(value); 122 | Object.setPrototypeOf(this, Object.create(XRegExpMatchSubstringImpl.prototype)); 123 | } 124 | groups?: XRegExp.NamedGroupsArray; 125 | } 126 | const mss: XRegExp.MatchSubString = new XRegExpMatchSubstringImpl('asdf'); 127 | mss.groups = ng; 128 | mss['namedMatch'] = 'b'; 129 | const mss_str1: string | undefined = mss['namedMatch'] as string; 130 | 131 | // ReplacementDetail 132 | let rd: XRegExp.ReplacementDetail = [/a/gi, rv]; 133 | rd = [/a/gi, rv, null]; 134 | rd = [/a/gi, rv, ms]; 135 | rd = [/a/gi, rv, ms, 'undefined indexes will be ignored']; 136 | 137 | // UnionOptions 138 | const uo: XRegExp.UnionOptions = {}; 139 | uo.conjunction = null; 140 | uo.conjunction = 'or'; 141 | uo.conjunction = 'none'; 142 | 143 | // MatchRecursiveOptions 144 | const mro: XRegExp.MatchRecursiveOptions = {}; 145 | mro.escapeChar = null; 146 | mro.escapeChar = '\\'; 147 | mro.valueNames = null; 148 | 149 | // MatchRecursiveValueNames 150 | const mrvn: XRegExp.MatchRecursiveValueNames = [null, null, null, null, 'undefined indexes will be ignored']; 151 | mrvn[0] = 'pre'; 152 | mrvn[1] = 'left'; 153 | mrvn[2] = 'inside'; 154 | mrvn[3] = 'right'; 155 | mro.valueNames = mrvn; 156 | 157 | // MatchRecursiveValueNameMatch 158 | const mrvnm: XRegExp.MatchRecursiveValueNameMatch = { name: 'a', value: 'a', start: 0, end: 1 }; 159 | 160 | // UnicodeCharacterRangeBase 161 | ucr.alias = 'asdf'; 162 | ucr.isBmpLast = true; 163 | 164 | //#endregion 165 | 166 | //#region constants 167 | 168 | const version: string = XRegExp.version; 169 | 170 | //#endregion 171 | 172 | //#region methods 173 | 174 | // addToken 175 | XRegExp.addToken(/a/gi, (m, s, f) => 'a'); 176 | XRegExp.addToken(/b/gi, (m, s, f) => 'b', to); 177 | 178 | // addUnicodeData 179 | XRegExp.addUnicodeData([ ucr ]); 180 | 181 | // build 182 | regex1 = XRegExp.build('(?x)^ {{v1}}:{{v2}} $', { v1: /a/gi, v2: regex1 }); 183 | regex1 = XRegExp.build('(?x)^ {{v1}}:{{v2}} $', { v1: /a/gi, v2: '/a/' }, 'gi'); 184 | 185 | // cache 186 | regex1 = XRegExp.cache('/a/', 'gi'); 187 | 188 | // escape 189 | const escape_str: string = XRegExp.escape('?<.abcde> asdf'); 190 | 191 | // exec 192 | let ean: XRegExp.ExecArray | null = XRegExp.exec('abcdefghijklm', /a/gi); 193 | ean = XRegExp.exec('abcdefghijklm', /a/gi, 0); 194 | ean = XRegExp.exec('abcdefghijklm', /a/gi, 0, true); 195 | ean = XRegExp.exec('abcdefghijklm', /a/gi, 0, 'sticky'); 196 | 197 | // forEach 198 | XRegExp.forEach('ab_ab_ab', /ab/gi, (m, i, s, r) => { /* do action */ }); 199 | 200 | // globalize 201 | regex1 = XRegExp.globalize(/a/gi); 202 | 203 | // install 204 | XRegExp.install('astral'); 205 | XRegExp.install('astral namespacing'); 206 | XRegExp.install('namespacing'); 207 | XRegExp.install('namespacing astral'); 208 | XRegExp.install({}); 209 | XRegExp.install({ astral: true }); 210 | XRegExp.install({ namespacing: true }); 211 | XRegExp.install({ astral: true, namespacing: true }); 212 | 213 | // isInstalled 214 | let ii_bool = XRegExp.isInstalled('astral'); 215 | ii_bool = XRegExp.isInstalled('namespacing'); 216 | 217 | // isRegExp 218 | let ire_bool: boolean = XRegExp.isRegExp(/a/gi); 219 | ire_bool = XRegExp.isRegExp(null); 220 | ire_bool = XRegExp.isRegExp(undefined); 221 | ire_bool = XRegExp.isRegExp('a'); 222 | ire_bool = XRegExp.isRegExp(0); 223 | ire_bool = XRegExp.isRegExp([]); 224 | ire_bool = XRegExp.isRegExp({}); 225 | 226 | // match 227 | const m_str: string|null = XRegExp.match('asdf', /a/gi, 'one'); 228 | const m_strarr: string[] = XRegExp.match('asdf', /a/gi, 'all'); 229 | const m_any: string|null|string[] = XRegExp.match('asdf', /a/gi); 230 | 231 | // matchChain 232 | ma = XRegExp.matchChain('asdf', mca); 233 | 234 | // matchRecursive 235 | let mr1: string[] 236 | = XRegExp.matchRecursive('asdf', 'a', 'f'); 237 | mr1 = XRegExp.matchRecursive('asdf', 'a', 'f', 'gi'); 238 | let mr2: XRegExp.MatchRecursiveValueNameMatch[] 239 | = XRegExp.matchRecursive('asdf', 'a', 'f', null, { valueNames: [ 'a', 'b', 'c', 'd' ] }); 240 | mr2 = XRegExp.matchRecursive('asdf', 'a', 'f', 'gi', { valueNames: [ 'a', 'b', 'c', 'd' ] }); 241 | 242 | // replace 243 | let r_str: string = XRegExp.replace('asdf', '/a/', 'b'); 244 | r_str = XRegExp.replace('asdf', /a/gi, (s, args) => 'a', 'all'); 245 | r_str = XRegExp.replace('asdf', /a/gi, (s, args) => 'a', 'one'); 246 | 247 | // replaceEach 248 | const re_str: string = XRegExp.replaceEach('asdf', [ rd ]); 249 | 250 | // split 251 | let s_strarr: string[] = XRegExp.split('asdf', '/a/'); 252 | s_strarr = XRegExp.split('asdf', /a/gi, 2); 253 | 254 | // tag 255 | let tag_re: RegExp = /a/g; 256 | tag_re = XRegExp.tag('i')`(asdf|${tag_re}|qwerty)`; 257 | 258 | // test 259 | let t_bool: boolean = XRegExp.test('asdf', '/a/'); 260 | t_bool = XRegExp.test('asdf', /a/gi, 3); 261 | t_bool = XRegExp.test('asdf', '/a/', undefined, true); 262 | t_bool = XRegExp.test('asdf', /a/gi, 1, 'sticky'); 263 | 264 | // uninstall 265 | XRegExp.uninstall('astral'); 266 | XRegExp.uninstall('astral namespacing'); 267 | XRegExp.uninstall('namespacing'); 268 | XRegExp.uninstall('namespacing astral'); 269 | XRegExp.uninstall({}); 270 | XRegExp.uninstall({ astral: true }); 271 | XRegExp.uninstall({ namespacing: true }); 272 | XRegExp.uninstall({ astral: true, namespacing: true }); 273 | 274 | // union 275 | let u_re: RegExp = XRegExp.union([ '/a/', /b/gi, XRegExp(/a/gi) ]); 276 | u_re = XRegExp.union([ '/a/', /b/gi, XRegExp(/a/gi) ], null); 277 | u_re = XRegExp.union([ '/a/', /b/gi, XRegExp(/a/gi) ], 'gi'); 278 | u_re = XRegExp.union([ '/a/', /b/gi, XRegExp(/a/gi) ], 'gi', { }); 279 | u_re = XRegExp.union([ '/a/', /b/gi, XRegExp(/a/gi) ], 'gi', { conjunction: null }); 280 | u_re = XRegExp.union([ '/a/', /b/gi, XRegExp(/a/gi) ], 'gi', { conjunction: 'or' }); 281 | u_re = XRegExp.union([ '/a/', /b/gi, XRegExp(/a/gi) ], 'gi', { conjunction: 'none' }); 282 | 283 | //#endregion 284 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | "env": { 3 | "browser": true, 4 | "commonjs": true, 5 | "es6": true, 6 | "node": true 7 | }, 8 | "extends": "eslint:recommended", 9 | "parserOptions": { 10 | "ecmaVersion": 9, 11 | "sourceType": "module" 12 | }, 13 | "rules": { 14 | "accessor-pairs": "error", 15 | "array-bracket-spacing": [ 16 | "error", 17 | "never" 18 | ], 19 | "array-callback-return": "error", 20 | "arrow-body-style": "error", 21 | "arrow-parens": "error", 22 | "arrow-spacing": "error", 23 | "block-scoped-var": "error", 24 | "block-spacing": [ 25 | "error", 26 | "never" 27 | ], 28 | "brace-style": [ 29 | "error", 30 | "1tbs", 31 | { 32 | "allowSingleLine": true 33 | } 34 | ], 35 | "callback-return": "off", 36 | "camelcase": "error", 37 | "capitalized-comments": "off", 38 | "class-methods-use-this": "error", 39 | "comma-dangle": "error", 40 | "comma-spacing": [ 41 | "error", 42 | { 43 | "after": true, 44 | "before": false 45 | } 46 | ], 47 | "comma-style": [ 48 | "error", 49 | "last" 50 | ], 51 | "complexity": "off", 52 | "computed-property-spacing": [ 53 | "error", 54 | "never" 55 | ], 56 | "consistent-return": "error", 57 | "consistent-this": "error", 58 | "curly": "error", 59 | "default-case": "error", 60 | "dot-location": "error", 61 | "dot-notation": "error", 62 | "eol-last": "error", 63 | "eqeqeq": "off", 64 | "func-call-spacing": "error", 65 | "func-name-matching": "error", 66 | "func-names": [ 67 | "error", 68 | "never" 69 | ], 70 | "func-style": [ 71 | "error", 72 | "declaration" 73 | ], 74 | "generator-star-spacing": "error", 75 | "global-require": "error", 76 | "guard-for-in": "off", 77 | "handle-callback-err": "error", 78 | "id-blacklist": "error", 79 | "id-length": "off", 80 | "id-match": "error", 81 | "indent": "error", 82 | "init-declarations": "off", 83 | "jsx-quotes": "error", 84 | "key-spacing": "error", 85 | "keyword-spacing": [ 86 | "error", 87 | { 88 | "after": true, 89 | "before": true 90 | } 91 | ], 92 | "line-comment-position": "off", 93 | "linebreak-style": [ 94 | "error", 95 | "unix" 96 | ], 97 | "lines-around-comment": "error", 98 | "lines-around-directive": "error", 99 | "max-depth": "error", 100 | "max-len": "off", 101 | "max-lines": "off", 102 | "max-nested-callbacks": "error", 103 | "max-params": "off", 104 | "max-statements": "off", 105 | "max-statements-per-line": "off", 106 | "multiline-ternary": "off", 107 | "new-parens": "error", 108 | "newline-after-var": "off", 109 | "newline-before-return": "off", 110 | "newline-per-chained-call": "off", 111 | "no-alert": "error", 112 | "no-array-constructor": "error", 113 | "no-await-in-loop": "error", 114 | "no-bitwise": "off", 115 | "no-caller": "error", 116 | "no-catch-shadow": "error", 117 | "no-compare-neg-zero": "error", 118 | "no-cond-assign": [ 119 | "error", 120 | "except-parens" 121 | ], 122 | "no-confusing-arrow": "error", 123 | "no-constant-condition": [ 124 | "error", 125 | { 126 | "checkLoops": false 127 | } 128 | ], 129 | "no-continue": "off", 130 | "no-div-regex": "error", 131 | "no-duplicate-imports": "error", 132 | "no-else-return": "error", 133 | "no-empty-function": "error", 134 | "no-eq-null": "off", 135 | "no-eval": "error", 136 | "no-extend-native": "off", 137 | "no-extra-bind": "error", 138 | "no-extra-label": "error", 139 | "no-extra-parens": "off", 140 | "no-floating-decimal": "error", 141 | "no-implicit-coercion": [ 142 | "error", 143 | { 144 | "boolean": false, 145 | "number": false, 146 | "string": false 147 | } 148 | ], 149 | "no-implicit-globals": "error", 150 | "no-implied-eval": "error", 151 | "no-inline-comments": "off", 152 | "no-inner-declarations": [ 153 | "error", 154 | "functions" 155 | ], 156 | "no-invalid-this": "off", 157 | "no-iterator": "error", 158 | "no-label-var": "error", 159 | "no-labels": "error", 160 | "no-lone-blocks": "error", 161 | "no-lonely-if": "error", 162 | "no-loop-func": "error", 163 | "no-magic-numbers": "off", 164 | "no-mixed-operators": "error", 165 | "no-mixed-requires": "error", 166 | "no-multi-assign": "error", 167 | "no-multi-spaces": "error", 168 | "no-multi-str": "error", 169 | "no-multiple-empty-lines": "error", 170 | "no-native-reassign": "error", 171 | "no-negated-condition": "error", 172 | "no-negated-in-lhs": "error", 173 | "no-nested-ternary": "off", 174 | "no-new": "off", 175 | "no-new-func": "error", 176 | "no-new-object": "error", 177 | "no-new-require": "error", 178 | "no-new-wrappers": "off", 179 | "no-octal-escape": "error", 180 | "no-param-reassign": "off", 181 | "no-path-concat": "error", 182 | "no-plusplus": "off", 183 | "no-process-env": "error", 184 | "no-process-exit": "error", 185 | "no-proto": "off", 186 | "no-prototype-builtins": "off", 187 | "no-restricted-globals": "error", 188 | "no-restricted-imports": "error", 189 | "no-restricted-modules": "error", 190 | "no-restricted-properties": "error", 191 | "no-restricted-syntax": "error", 192 | "no-return-assign": [ 193 | "error", 194 | "except-parens" 195 | ], 196 | "no-return-await": "error", 197 | "no-script-url": "error", 198 | "no-self-compare": "error", 199 | "no-sequences": "error", 200 | "no-shadow": "off", 201 | "no-shadow-restricted-names": "error", 202 | "no-spaced-func": "error", 203 | "no-sync": "error", 204 | "no-tabs": "error", 205 | "no-template-curly-in-string": "error", 206 | "no-ternary": "off", 207 | "no-throw-literal": "error", 208 | "no-trailing-spaces": "error", 209 | "no-undef-init": "error", 210 | "no-undefined": "off", 211 | "no-underscore-dangle": "off", 212 | "no-unmodified-loop-condition": "error", 213 | "no-unneeded-ternary": "error", 214 | "no-unused-expressions": "error", 215 | "no-use-before-define": "off", 216 | "no-useless-call": "error", 217 | "no-useless-computed-key": "error", 218 | "no-useless-concat": "off", 219 | "no-useless-constructor": "error", 220 | "no-useless-escape": "off", 221 | "no-useless-rename": "error", 222 | "no-useless-return": "error", 223 | "no-var": "off", 224 | "no-void": "error", 225 | "no-warning-comments": "error", 226 | "no-whitespace-before-property": "error", 227 | "no-with": "error", 228 | "nonblock-statement-body-position": "error", 229 | "object-curly-newline": "off", 230 | "object-curly-spacing": [ 231 | "error", 232 | "never" 233 | ], 234 | "object-property-newline": "error", 235 | "object-shorthand": "off", 236 | "one-var": "off", 237 | "one-var-declaration-per-line": "error", 238 | "operator-assignment": [ 239 | "error", 240 | "always" 241 | ], 242 | "operator-linebreak": [ 243 | "error", 244 | "after" 245 | ], 246 | "padded-blocks": "off", 247 | "prefer-arrow-callback": "off", 248 | "prefer-const": "error", 249 | "prefer-destructuring": [ 250 | "error", 251 | { 252 | "array": true, 253 | "object": true 254 | } 255 | ], 256 | "prefer-numeric-literals": "error", 257 | "prefer-promise-reject-errors": "error", 258 | "prefer-reflect": "off", 259 | "prefer-rest-params": "off", 260 | "prefer-spread": "off", 261 | "prefer-template": "off", 262 | "quote-props": "off", 263 | "quotes": "off", 264 | "radix": [ 265 | "error", 266 | "always" 267 | ], 268 | "require-await": "error", 269 | "require-jsdoc": "off", 270 | "rest-spread-spacing": "error", 271 | "semi": "error", 272 | "semi-spacing": [ 273 | "error", 274 | { 275 | "after": true, 276 | "before": false 277 | } 278 | ], 279 | "sort-imports": "error", 280 | "sort-keys": "off", 281 | "sort-vars": "error", 282 | "space-before-blocks": "error", 283 | "space-before-function-paren": [ 284 | "error", 285 | "never" 286 | ], 287 | "space-in-parens": [ 288 | "error", 289 | "never" 290 | ], 291 | "space-infix-ops": "error", 292 | "space-unary-ops": "error", 293 | "spaced-comment": "off", 294 | "strict": "off", 295 | "symbol-description": "error", 296 | "template-curly-spacing": "error", 297 | "template-tag-spacing": "error", 298 | "unicode-bom": [ 299 | "error", 300 | "never" 301 | ], 302 | "valid-jsdoc": "off", 303 | "vars-on-top": "off", 304 | "wrap-iife": "error", 305 | "wrap-regex": "off", 306 | "yield-star-spacing": "error", 307 | "yoda": [ 308 | "error", 309 | "never" 310 | ] 311 | } 312 | }; 313 | -------------------------------------------------------------------------------- /src/addons/build.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * XRegExp.build 5.1.2 3 | * 4 | * Steven Levithan (c) 2012-present MIT License 5 | */ 6 | 7 | export default (XRegExp) => { 8 | const REGEX_DATA = 'xregexp'; 9 | const subParts = /(\()(?!\?)|\\([1-9]\d*)|\\[\s\S]|\[(?:[^\\\]]|\\[\s\S])*\]/g; 10 | const parts = XRegExp.union([/\({{([\w$]+)}}\)|{{([\w$]+)}}/, subParts], 'g', { 11 | conjunction: 'or' 12 | }); 13 | 14 | /** 15 | * Strips a leading `^` and trailing unescaped `$`, if both are present. 16 | * 17 | * @private 18 | * @param {String} pattern Pattern to process. 19 | * @returns {String} Pattern with edge anchors removed. 20 | */ 21 | function deanchor(pattern) { 22 | // Allow any number of empty noncapturing groups before/after anchors, because regexes 23 | // built/generated by XRegExp sometimes include them 24 | const leadingAnchor = /^(?:\(\?:\))*\^/; 25 | const trailingAnchor = /\$(?:\(\?:\))*$/; 26 | 27 | if ( 28 | leadingAnchor.test(pattern) && 29 | trailingAnchor.test(pattern) && 30 | // Ensure that the trailing `$` isn't escaped 31 | trailingAnchor.test(pattern.replace(/\\[\s\S]/g, '')) 32 | ) { 33 | return pattern.replace(leadingAnchor, '').replace(trailingAnchor, ''); 34 | } 35 | 36 | return pattern; 37 | } 38 | 39 | /** 40 | * Converts the provided value to an XRegExp. Native RegExp flags are not preserved. 41 | * 42 | * @private 43 | * @param {String|RegExp} value Value to convert. 44 | * @param {Boolean} [addFlagX] Whether to apply the `x` flag in cases when `value` is not 45 | * already a regex generated by XRegExp 46 | * @returns {RegExp} XRegExp object with XRegExp syntax applied. 47 | */ 48 | function asXRegExp(value, addFlagX) { 49 | const flags = addFlagX ? 'x' : ''; 50 | return XRegExp.isRegExp(value) ? 51 | (value[REGEX_DATA] && value[REGEX_DATA].captureNames ? 52 | // Don't recompile, to preserve capture names 53 | value : 54 | // Recompile as XRegExp 55 | XRegExp(value.source, flags) 56 | ) : 57 | // Compile string as XRegExp 58 | XRegExp(value, flags); 59 | } 60 | 61 | function interpolate(substitution) { 62 | return substitution instanceof RegExp ? substitution : XRegExp.escape(substitution); 63 | } 64 | 65 | function reduceToSubpatternsObject(subpatterns, interpolated, subpatternIndex) { 66 | subpatterns[`subpattern${subpatternIndex}`] = interpolated; 67 | return subpatterns; 68 | } 69 | 70 | function embedSubpatternAfter(raw, subpatternIndex, rawLiterals) { 71 | const hasSubpattern = subpatternIndex < rawLiterals.length - 1; 72 | return raw + (hasSubpattern ? `{{subpattern${subpatternIndex}}}` : ''); 73 | } 74 | 75 | /** 76 | * Provides tagged template literals that create regexes with XRegExp syntax and flags. The 77 | * provided pattern is handled as a raw string, so backslashes don't need to be escaped. 78 | * 79 | * Interpolation of strings and regexes shares the features of `XRegExp.build`. Interpolated 80 | * patterns are treated as atomic units when quantified, interpolated strings have their special 81 | * characters escaped, a leading `^` and trailing unescaped `$` are stripped from interpolated 82 | * regexes if both are present, and any backreferences within an interpolated regex are 83 | * rewritten to work within the overall pattern. 84 | * 85 | * @memberOf XRegExp 86 | * @param {String} [flags] Any combination of XRegExp flags. 87 | * @returns {Function} Handler for template literals that construct regexes with XRegExp syntax. 88 | * @example 89 | * 90 | * XRegExp.tag()`\b\w+\b`.test('word'); // -> true 91 | * 92 | * const hours = /1[0-2]|0?[1-9]/; 93 | * const minutes = /(?[0-5][0-9])/; 94 | * const time = XRegExp.tag('x')`\b ${hours} : ${minutes} \b`; 95 | * time.test('10:59'); // -> true 96 | * XRegExp.exec('10:59', time).groups.minutes; // -> '59' 97 | * 98 | * const backref1 = /(a)\1/; 99 | * const backref2 = /(b)\1/; 100 | * XRegExp.tag()`${backref1}${backref2}`.test('aabb'); // -> true 101 | */ 102 | XRegExp.tag = (flags) => (literals, ...substitutions) => { 103 | const subpatterns = substitutions.map(interpolate).reduce(reduceToSubpatternsObject, {}); 104 | const pattern = literals.raw.map(embedSubpatternAfter).join(''); 105 | return XRegExp.build(pattern, subpatterns, flags); 106 | }; 107 | 108 | /** 109 | * Builds regexes using named subpatterns, for readability and pattern reuse. Backreferences in 110 | * the outer pattern and provided subpatterns are automatically renumbered to work correctly. 111 | * Native flags used by provided subpatterns are ignored in favor of the `flags` argument. 112 | * 113 | * @memberOf XRegExp 114 | * @param {String} pattern XRegExp pattern using `{{name}}` for embedded subpatterns. Allows 115 | * `({{name}})` as shorthand for `(?{{name}})`. Patterns cannot be embedded within 116 | * character classes. 117 | * @param {Object} subs Lookup object for named subpatterns. Values can be strings or regexes. A 118 | * leading `^` and trailing unescaped `$` are stripped from subpatterns, if both are present. 119 | * @param {String} [flags] Any combination of XRegExp flags. 120 | * @returns {RegExp} Regex with interpolated subpatterns. 121 | * @example 122 | * 123 | * const time = XRegExp.build('(?x)^ {{hours}} ({{minutes}}) $', { 124 | * hours: XRegExp.build('{{h12}} : | {{h24}}', { 125 | * h12: /1[0-2]|0?[1-9]/, 126 | * h24: /2[0-3]|[01][0-9]/ 127 | * }, 'x'), 128 | * minutes: /^[0-5][0-9]$/ 129 | * }); 130 | * time.test('10:59'); // -> true 131 | * XRegExp.exec('10:59', time).groups.minutes; // -> '59' 132 | */ 133 | XRegExp.build = (pattern, subs, flags) => { 134 | flags = flags || ''; 135 | // Used with `asXRegExp` calls for `pattern` and subpatterns in `subs`, to work around how 136 | // some browsers convert `RegExp('\n')` to a regex that contains the literal characters `\` 137 | // and `n`. See more details at . 138 | const addFlagX = flags.includes('x'); 139 | const inlineFlags = /^\(\?([\w$]+)\)/.exec(pattern); 140 | // Add flags within a leading mode modifier to the overall pattern's flags 141 | if (inlineFlags) { 142 | flags = XRegExp._clipDuplicates(flags + inlineFlags[1]); 143 | } 144 | 145 | const data = {}; 146 | for (const p in subs) { 147 | if (subs.hasOwnProperty(p)) { 148 | // Passing to XRegExp enables extended syntax and ensures independent validity, 149 | // lest an unescaped `(`, `)`, `[`, or trailing `\` breaks the `(?:)` wrapper. For 150 | // subpatterns provided as native regexes, it dies on octals and adds the property 151 | // used to hold extended regex instance data, for simplicity. 152 | const sub = asXRegExp(subs[p], addFlagX); 153 | data[p] = { 154 | // Deanchoring allows embedding independently useful anchored regexes. If you 155 | // really need to keep your anchors, double them (i.e., `^^...$$`). 156 | pattern: deanchor(sub.source), 157 | names: sub[REGEX_DATA].captureNames || [] 158 | }; 159 | } 160 | } 161 | 162 | // Passing to XRegExp dies on octals and ensures the outer pattern is independently valid; 163 | // helps keep this simple. Named captures will be put back. 164 | const patternAsRegex = asXRegExp(pattern, addFlagX); 165 | 166 | // 'Caps' is short for 'captures' 167 | let numCaps = 0; 168 | let numPriorCaps; 169 | let numOuterCaps = 0; 170 | const outerCapsMap = [0]; 171 | const outerCapNames = patternAsRegex[REGEX_DATA].captureNames || []; 172 | const output = patternAsRegex.source.replace(parts, ($0, $1, $2, $3, $4) => { 173 | const subName = $1 || $2; 174 | let capName; 175 | let intro; 176 | let localCapIndex; 177 | // Named subpattern 178 | if (subName) { 179 | if (!data.hasOwnProperty(subName)) { 180 | throw new ReferenceError(`Undefined property ${$0}`); 181 | } 182 | // Named subpattern was wrapped in a capturing group 183 | if ($1) { 184 | capName = outerCapNames[numOuterCaps]; 185 | outerCapsMap[++numOuterCaps] = ++numCaps; 186 | // If it's a named group, preserve the name. Otherwise, use the subpattern name 187 | // as the capture name 188 | intro = `(?<${capName || subName}>`; 189 | } else { 190 | intro = '(?:'; 191 | } 192 | numPriorCaps = numCaps; 193 | const rewrittenSubpattern = data[subName].pattern.replace(subParts, (match, paren, backref) => { 194 | // Capturing group 195 | if (paren) { 196 | capName = data[subName].names[numCaps - numPriorCaps]; 197 | ++numCaps; 198 | // If the current capture has a name, preserve the name 199 | if (capName) { 200 | return `(?<${capName}>`; 201 | } 202 | // Backreference 203 | } else if (backref) { 204 | localCapIndex = +backref - 1; 205 | // Rewrite the backreference 206 | return data[subName].names[localCapIndex] ? 207 | // Need to preserve the backreference name in case using flag `n` 208 | `\\k<${data[subName].names[localCapIndex]}>` : 209 | `\\${+backref + numPriorCaps}`; 210 | } 211 | return match; 212 | }); 213 | return `${intro}${rewrittenSubpattern})`; 214 | } 215 | // Capturing group 216 | if ($3) { 217 | capName = outerCapNames[numOuterCaps]; 218 | outerCapsMap[++numOuterCaps] = ++numCaps; 219 | // If the current capture has a name, preserve the name 220 | if (capName) { 221 | return `(?<${capName}>`; 222 | } 223 | // Backreference 224 | } else if ($4) { 225 | localCapIndex = +$4 - 1; 226 | // Rewrite the backreference 227 | return outerCapNames[localCapIndex] ? 228 | // Need to preserve the backreference name in case using flag `n` 229 | `\\k<${outerCapNames[localCapIndex]}>` : 230 | `\\${outerCapsMap[+$4]}`; 231 | } 232 | return $0; 233 | }); 234 | 235 | return XRegExp(output, flags); 236 | }; 237 | }; 238 | -------------------------------------------------------------------------------- /src/addons/matchrecursive.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * XRegExp.matchRecursive 5.1.2 3 | * 4 | * Steven Levithan (c) 2009-present MIT License 5 | */ 6 | 7 | export default (XRegExp) => { 8 | 9 | /** 10 | * Returns a match detail object composed of the provided values. 11 | * 12 | * @private 13 | */ 14 | function row(name, value, start, end) { 15 | return { 16 | name, 17 | value, 18 | start, 19 | end 20 | }; 21 | } 22 | 23 | /** 24 | * Returns an array of match strings between outermost left and right delimiters, or an array of 25 | * objects with detailed match parts and position data. By default, an error is thrown if 26 | * delimiters are unbalanced within the subject string. 27 | * 28 | * @memberOf XRegExp 29 | * @param {String} str String to search. 30 | * @param {String} left Left delimiter as an XRegExp pattern. 31 | * @param {String} right Right delimiter as an XRegExp pattern. 32 | * @param {String} [flags] Any combination of XRegExp flags, used for the left and right delimiters. 33 | * @param {Object} [options] Options object with optional properties: 34 | * - `valueNames` {Array} Providing `valueNames` changes the return value from an array of 35 | * matched strings to an array of objects that provide the value and start/end positions 36 | * for the matched strings as well as the matched delimiters and unmatched string segments. 37 | * To use this extended information mode, provide an array of 4 strings that name the parts 38 | * to be returned: 39 | * 1. String segments outside of (before, between, and after) matches. 40 | * 2. Matched outermost left delimiters. 41 | * 3. Matched text between the outermost left and right delimiters. 42 | * 4. Matched outermost right delimiters. 43 | * Taken together, these parts include the entire subject string if used with flag g. 44 | * Use `null` for any of these values to omit unneeded parts from the returned results. 45 | * - `escapeChar` {String} Single char used to escape delimiters within the subject string. 46 | * - `unbalanced` {String} Handling mode for unbalanced delimiters. Options are: 47 | * - 'error' - throw (default) 48 | * - 'skip' - unbalanced delimiters are treated as part of the text between delimiters, and 49 | * searches continue at the end of the unbalanced delimiter. 50 | * - 'skip-lazy' - unbalanced delimiters are treated as part of the text between delimiters, 51 | * and searches continue one character after the start of the unbalanced delimiter. 52 | * @returns {Array} Array of matches, or an empty array. 53 | * @example 54 | * 55 | * // Basic usage 56 | * const str1 = '(t((e))s)t()(ing)'; 57 | * XRegExp.matchRecursive(str1, '\\(', '\\)', 'g'); 58 | * // -> ['t((e))s', '', 'ing'] 59 | * 60 | * // Extended information mode with valueNames 61 | * const str2 = 'Here is
an
example'; 62 | * XRegExp.matchRecursive(str2, '', '', 'gi', { 63 | * valueNames: ['between', 'left', 'match', 'right'] 64 | * }); 65 | * // -> [ 66 | * // {name: 'between', value: 'Here is ', start: 0, end: 8}, 67 | * // {name: 'left', value: '
', start: 8, end: 13}, 68 | * // {name: 'match', value: '
an
', start: 13, end: 27}, 69 | * // {name: 'right', value: '
', start: 27, end: 33}, 70 | * // {name: 'between', value: ' example', start: 33, end: 41} 71 | * // ] 72 | * 73 | * // Omitting unneeded parts with null valueNames, and using escapeChar 74 | * const str3 = '...{1}.\\{{function(x,y){return {y:x}}}'; 75 | * XRegExp.matchRecursive(str3, '{', '}', 'g', { 76 | * valueNames: ['literal', null, 'value', null], 77 | * escapeChar: '\\' 78 | * }); 79 | * // -> [ 80 | * // {name: 'literal', value: '...', start: 0, end: 3}, 81 | * // {name: 'value', value: '1', start: 4, end: 5}, 82 | * // {name: 'literal', value: '.\\{', start: 6, end: 9}, 83 | * // {name: 'value', value: 'function(x,y){return {y:x}}', start: 10, end: 37} 84 | * // ] 85 | * 86 | * // Sticky mode via flag y 87 | * const str4 = '<1><<<2>>><3>4<5>'; 88 | * XRegExp.matchRecursive(str4, '<', '>', 'gy'); 89 | * // -> ['1', '<<2>>', '3'] 90 | * 91 | * // Skipping unbalanced delimiters instead of erroring 92 | * const str5 = 'Here is
an
unbalanced example'; 93 | * XRegExp.matchRecursive(str5, '', '
', 'gi', { 94 | * unbalanced: 'skip' 95 | * }); 96 | * // -> ['an'] 97 | */ 98 | XRegExp.matchRecursive = (str, left, right, flags, options) => { 99 | flags = flags || ''; 100 | options = options || {}; 101 | const global = flags.includes('g'); 102 | const sticky = flags.includes('y'); 103 | // Flag `y` is handled manually 104 | const basicFlags = flags.replace(/y/g, ''); 105 | left = XRegExp(left, basicFlags); 106 | right = XRegExp(right, basicFlags); 107 | 108 | let esc; 109 | let {escapeChar} = options; 110 | if (escapeChar) { 111 | if (escapeChar.length > 1) { 112 | throw new Error('Cannot use more than one escape character'); 113 | } 114 | escapeChar = XRegExp.escape(escapeChar); 115 | // Example of concatenated `esc` regex: 116 | // `escapeChar`: '%' 117 | // `left`: '<' 118 | // `right`: '>' 119 | // Regex is: /(?:%[\S\s]|(?:(?!<|>)[^%])+)+/ 120 | esc = new RegExp( 121 | `(?:${escapeChar}[\\S\\s]|(?:(?!${ 122 | // Using `XRegExp.union` safely rewrites backreferences in `left` and `right`. 123 | // Intentionally not passing `basicFlags` to `XRegExp.union` since any syntax 124 | // transformation resulting from those flags was already applied to `left` and 125 | // `right` when they were passed through the XRegExp constructor above. 126 | XRegExp.union([left, right], '', {conjunction: 'or'}).source 127 | })[^${escapeChar}])+)+`, 128 | // Flags `dgy` not needed here 129 | flags.replace(XRegExp._hasNativeFlag('s') ? /[^imsu]/g : /[^imu]/g, '') 130 | ); 131 | } 132 | 133 | let openTokens = 0; 134 | let delimStart = 0; 135 | let delimEnd = 0; 136 | let lastOuterEnd = 0; 137 | let outerStart; 138 | let innerStart; 139 | let leftMatch; 140 | let rightMatch; 141 | const vN = options.valueNames; 142 | const output = []; 143 | 144 | while (true) { 145 | // If using an escape character, advance to the delimiter's next starting position, 146 | // skipping any escaped characters in between 147 | if (escapeChar) { 148 | delimEnd += (XRegExp.exec(str, esc, delimEnd, 'sticky') || [''])[0].length; 149 | } 150 | 151 | leftMatch = XRegExp.exec(str, left, delimEnd); 152 | rightMatch = XRegExp.exec(str, right, delimEnd); 153 | // Keep the leftmost match only 154 | if (leftMatch && rightMatch) { 155 | if (leftMatch.index <= rightMatch.index) { 156 | rightMatch = null; 157 | } else { 158 | leftMatch = null; 159 | } 160 | } 161 | 162 | // Paths (LM: leftMatch, RM: rightMatch, OT: openTokens): 163 | // LM | RM | OT | Result 164 | // 1 | 0 | 1 | loop 165 | // 1 | 0 | 0 | loop 166 | // 0 | 1 | 1 | loop 167 | // 0 | 1 | 0 | throw 168 | // 0 | 0 | 1 | throw 169 | // 0 | 0 | 0 | break 170 | // The paths above don't include the sticky mode special case. The loop ends after the 171 | // first completed match if not `global`. 172 | if (leftMatch || rightMatch) { 173 | delimStart = (leftMatch || rightMatch).index; 174 | delimEnd = delimStart + (leftMatch || rightMatch)[0].length; 175 | } else if (!openTokens) { 176 | break; 177 | } 178 | if (sticky && !openTokens && delimStart > lastOuterEnd) { 179 | break; 180 | } 181 | if (leftMatch) { 182 | if (!openTokens) { 183 | outerStart = delimStart; 184 | innerStart = delimEnd; 185 | } 186 | openTokens += 1; 187 | } else if (rightMatch && openTokens) { 188 | openTokens -= 1; 189 | if (!openTokens) { 190 | if (vN) { 191 | if (vN[0] && outerStart > lastOuterEnd) { 192 | output.push(row(vN[0], str.slice(lastOuterEnd, outerStart), lastOuterEnd, outerStart)); 193 | } 194 | if (vN[1]) { 195 | output.push(row(vN[1], str.slice(outerStart, innerStart), outerStart, innerStart)); 196 | } 197 | if (vN[2]) { 198 | output.push(row(vN[2], str.slice(innerStart, delimStart), innerStart, delimStart)); 199 | } 200 | if (vN[3]) { 201 | output.push(row(vN[3], str.slice(delimStart, delimEnd), delimStart, delimEnd)); 202 | } 203 | } else { 204 | output.push(str.slice(innerStart, delimStart)); 205 | } 206 | lastOuterEnd = delimEnd; 207 | if (!global) { 208 | break; 209 | } 210 | } 211 | // Found unbalanced delimiter 212 | } else { 213 | const unbalanced = options.unbalanced || 'error'; 214 | if (unbalanced === 'skip' || unbalanced === 'skip-lazy') { 215 | if (rightMatch) { 216 | rightMatch = null; 217 | // No `leftMatch` for unbalanced left delimiter because we've reached the string end 218 | } else { 219 | if (unbalanced === 'skip') { 220 | const outerStartDelimLength = XRegExp.exec(str, left, outerStart, 'sticky')[0].length; 221 | delimEnd = outerStart + (outerStartDelimLength || 1); 222 | } else { 223 | delimEnd = outerStart + 1; 224 | } 225 | openTokens = 0; 226 | } 227 | } else if (unbalanced === 'error') { 228 | const delimSide = rightMatch ? 'right' : 'left'; 229 | const errorPos = rightMatch ? delimStart : outerStart; 230 | throw new Error(`Unbalanced ${delimSide} delimiter found in string at position ${errorPos}`); 231 | } else { 232 | throw new Error(`Unsupported value for unbalanced: ${unbalanced}`); 233 | } 234 | } 235 | 236 | // If the delimiter matched an empty string, avoid an infinite loop 237 | if (delimStart === delimEnd) { 238 | delimEnd += 1; 239 | } 240 | } 241 | 242 | if (global && output.length > 0 && !sticky && vN && vN[0] && str.length > lastOuterEnd) { 243 | output.push(row(vN[0], str.slice(lastOuterEnd), lastOuterEnd, str.length)); 244 | } 245 | 246 | return output; 247 | }; 248 | }; 249 | -------------------------------------------------------------------------------- /src/addons/unicode-base.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * XRegExp Unicode Base 5.1.2 3 | * 4 | * Steven Levithan (c) 2008-present MIT License 5 | */ 6 | 7 | export default (XRegExp) => { 8 | 9 | /** 10 | * Adds base support for Unicode matching: 11 | * - Adds syntax `\p{..}` for matching Unicode tokens. Tokens can be inverted using `\P{..}` or 12 | * `\p{^..}`. Token names ignore case, spaces, hyphens, and underscores. You can omit the 13 | * braces for token names that are a single letter (e.g. `\pL` or `PL`). 14 | * - Adds flag A (astral), which enables 21-bit Unicode support. 15 | * - Adds the `XRegExp.addUnicodeData` method used by other addons to provide character data. 16 | * 17 | * Unicode Base relies on externally provided Unicode character data. Official addons are 18 | * available to provide data for Unicode categories, scripts, and properties. 19 | * 20 | * @requires XRegExp 21 | */ 22 | 23 | // ==--------------------------== 24 | // Private stuff 25 | // ==--------------------------== 26 | 27 | // Storage for Unicode data 28 | const unicode = {}; 29 | const unicodeTypes = {}; 30 | 31 | // Reuse utils 32 | const dec = XRegExp._dec; 33 | const hex = XRegExp._hex; 34 | const pad4 = XRegExp._pad4; 35 | 36 | // Generates a token lookup name: lowercase, with hyphens, spaces, and underscores removed 37 | function normalize(name) { 38 | return name.replace(/[- _]+/g, '').toLowerCase(); 39 | } 40 | 41 | // Gets the decimal code of a literal code unit, \xHH, \uHHHH, or a backslash-escaped literal 42 | function charCode(chr) { 43 | const esc = /^\\[xu](.+)/.exec(chr); 44 | return esc ? 45 | dec(esc[1]) : 46 | chr.charCodeAt(chr[0] === '\\' ? 1 : 0); 47 | } 48 | 49 | // Inverts a list of ordered BMP characters and ranges 50 | function invertBmp(range) { 51 | let output = ''; 52 | let lastEnd = -1; 53 | 54 | XRegExp.forEach( 55 | range, 56 | /(\\x..|\\u....|\\?[\s\S])(?:-(\\x..|\\u....|\\?[\s\S]))?/, 57 | (m) => { 58 | const start = charCode(m[1]); 59 | if (start > (lastEnd + 1)) { 60 | output += `\\u${pad4(hex(lastEnd + 1))}`; 61 | if (start > (lastEnd + 2)) { 62 | output += `-\\u${pad4(hex(start - 1))}`; 63 | } 64 | } 65 | lastEnd = charCode(m[2] || m[1]); 66 | } 67 | ); 68 | 69 | if (lastEnd < 0xFFFF) { 70 | output += `\\u${pad4(hex(lastEnd + 1))}`; 71 | if (lastEnd < 0xFFFE) { 72 | output += '-\\uFFFF'; 73 | } 74 | } 75 | 76 | return output; 77 | } 78 | 79 | // Generates an inverted BMP range on first use 80 | function cacheInvertedBmp(slug) { 81 | const prop = 'b!'; 82 | return ( 83 | unicode[slug][prop] || 84 | (unicode[slug][prop] = invertBmp(unicode[slug].bmp)) 85 | ); 86 | } 87 | 88 | // Combines and optionally negates BMP and astral data 89 | function buildAstral(slug, isNegated) { 90 | const item = unicode[slug]; 91 | let combined = ''; 92 | 93 | if (item.bmp && !item.isBmpLast) { 94 | combined = `[${item.bmp}]${item.astral ? '|' : ''}`; 95 | } 96 | if (item.astral) { 97 | combined += item.astral; 98 | } 99 | if (item.isBmpLast && item.bmp) { 100 | combined += `${item.astral ? '|' : ''}[${item.bmp}]`; 101 | } 102 | 103 | // Astral Unicode tokens always match a code point, never a code unit 104 | return isNegated ? 105 | `(?:(?!${combined})(?:[\uD800-\uDBFF][\uDC00-\uDFFF]|[\0-\uFFFF]))` : 106 | `(?:${combined})`; 107 | } 108 | 109 | // Builds a complete astral pattern on first use 110 | function cacheAstral(slug, isNegated) { 111 | const prop = isNegated ? 'a!' : 'a='; 112 | return ( 113 | unicode[slug][prop] || 114 | (unicode[slug][prop] = buildAstral(slug, isNegated)) 115 | ); 116 | } 117 | 118 | // ==--------------------------== 119 | // Core functionality 120 | // ==--------------------------== 121 | 122 | /* 123 | * Add astral mode (flag A) and Unicode token syntax: `\p{..}`, `\P{..}`, `\p{^..}`, `\pC`. 124 | */ 125 | XRegExp.addToken( 126 | // Use `*` instead of `+` to avoid capturing `^` as the token name in `\p{^}` 127 | /\\([pP])(?:{(\^?)(?:(\w+)=)?([^}]*)}|([A-Za-z]))/, 128 | (match, scope, flags) => { 129 | const ERR_DOUBLE_NEG = 'Invalid double negation '; 130 | const ERR_UNKNOWN_NAME = 'Unknown Unicode token '; 131 | const ERR_UNKNOWN_REF = 'Unicode token missing data '; 132 | const ERR_ASTRAL_ONLY = 'Astral mode required for Unicode token '; 133 | const ERR_ASTRAL_IN_CLASS = 'Astral mode does not support Unicode tokens within character classes'; 134 | const [ 135 | fullToken, 136 | pPrefix, 137 | caretNegation, 138 | typePrefix, 139 | tokenName, 140 | tokenSingleCharName 141 | ] = match; 142 | // Negated via \P{..} or \p{^..} 143 | let isNegated = pPrefix === 'P' || !!caretNegation; 144 | // Switch from BMP (0-FFFF) to astral (0-10FFFF) mode via flag A 145 | const isAstralMode = flags.includes('A'); 146 | // Token lookup name. Check `tokenSingleCharName` first to avoid passing `undefined` 147 | // via `\p{}` 148 | let slug = normalize(tokenSingleCharName || tokenName); 149 | // Token data object 150 | let item = unicode[slug]; 151 | 152 | if (pPrefix === 'P' && caretNegation) { 153 | throw new SyntaxError(ERR_DOUBLE_NEG + fullToken); 154 | } 155 | if (!unicode.hasOwnProperty(slug)) { 156 | throw new SyntaxError(ERR_UNKNOWN_NAME + fullToken); 157 | } 158 | 159 | if (typePrefix) { 160 | if (!(unicodeTypes[typePrefix] && unicodeTypes[typePrefix][slug])) { 161 | throw new SyntaxError(ERR_UNKNOWN_NAME + fullToken); 162 | } 163 | } 164 | 165 | // Switch to the negated form of the referenced Unicode token 166 | if (item.inverseOf) { 167 | slug = normalize(item.inverseOf); 168 | if (!unicode.hasOwnProperty(slug)) { 169 | throw new ReferenceError(`${ERR_UNKNOWN_REF + fullToken} -> ${item.inverseOf}`); 170 | } 171 | item = unicode[slug]; 172 | isNegated = !isNegated; 173 | } 174 | 175 | if (!(item.bmp || isAstralMode)) { 176 | throw new SyntaxError(ERR_ASTRAL_ONLY + fullToken); 177 | } 178 | if (isAstralMode) { 179 | if (scope === 'class') { 180 | throw new SyntaxError(ERR_ASTRAL_IN_CLASS); 181 | } 182 | 183 | return cacheAstral(slug, isNegated); 184 | } 185 | 186 | return scope === 'class' ? 187 | (isNegated ? cacheInvertedBmp(slug) : item.bmp) : 188 | `${(isNegated ? '[^' : '[') + item.bmp}]`; 189 | }, 190 | { 191 | scope: 'all', 192 | optionalFlags: 'A', 193 | leadChar: '\\' 194 | } 195 | ); 196 | 197 | /** 198 | * Adds to the list of Unicode tokens that XRegExp regexes can match via `\p` or `\P`. 199 | * 200 | * @memberOf XRegExp 201 | * @param {Array} data Objects with named character ranges. Each object may have properties 202 | * `name`, `alias`, `isBmpLast`, `inverseOf`, `bmp`, and `astral`. All but `name` are 203 | * optional, although one of `bmp` or `astral` is required (unless `inverseOf` is set). If 204 | * `astral` is absent, the `bmp` data is used for BMP and astral modes. If `bmp` is absent, 205 | * the name errors in BMP mode but works in astral mode. If both `bmp` and `astral` are 206 | * provided, the `bmp` data only is used in BMP mode, and the combination of `bmp` and 207 | * `astral` data is used in astral mode. `isBmpLast` is needed when a token matches orphan 208 | * high surrogates *and* uses surrogate pairs to match astral code points. The `bmp` and 209 | * `astral` data should be a combination of literal characters and `\xHH` or `\uHHHH` escape 210 | * sequences, with hyphens to create ranges. Any regex metacharacters in the data should be 211 | * escaped, apart from range-creating hyphens. The `astral` data can additionally use 212 | * character classes and alternation, and should use surrogate pairs to represent astral code 213 | * points. `inverseOf` can be used to avoid duplicating character data if a Unicode token is 214 | * defined as the exact inverse of another token. 215 | * @param {String} [typePrefix] Enables optionally using this type as a prefix for all of the 216 | * provided Unicode tokens, e.g. if given `'Type'`, then `\p{TokenName}` can also be written 217 | * as `\p{Type=TokenName}`. 218 | * @example 219 | * 220 | * // Basic use 221 | * XRegExp.addUnicodeData([{ 222 | * name: 'XDigit', 223 | * alias: 'Hexadecimal', 224 | * bmp: '0-9A-Fa-f' 225 | * }]); 226 | * XRegExp('\\p{XDigit}:\\p{Hexadecimal}+').test('0:3D'); // -> true 227 | */ 228 | XRegExp.addUnicodeData = (data, typePrefix) => { 229 | const ERR_NO_NAME = 'Unicode token requires name'; 230 | const ERR_NO_DATA = 'Unicode token has no character data '; 231 | 232 | if (typePrefix) { 233 | // Case sensitive to match ES2018 234 | unicodeTypes[typePrefix] = {}; 235 | } 236 | 237 | for (const item of data) { 238 | if (!item.name) { 239 | throw new Error(ERR_NO_NAME); 240 | } 241 | if (!(item.inverseOf || item.bmp || item.astral)) { 242 | throw new Error(ERR_NO_DATA + item.name); 243 | } 244 | 245 | const normalizedName = normalize(item.name); 246 | unicode[normalizedName] = item; 247 | if (typePrefix) { 248 | unicodeTypes[typePrefix][normalizedName] = true; 249 | } 250 | 251 | if (item.alias) { 252 | const normalizedAlias = normalize(item.alias); 253 | unicode[normalizedAlias] = item; 254 | if (typePrefix) { 255 | unicodeTypes[typePrefix][normalizedAlias] = true; 256 | } 257 | } 258 | } 259 | 260 | // Reset the pattern cache used by the `XRegExp` constructor, since the same pattern and 261 | // flags might now produce different results 262 | XRegExp.cache.flush('patterns'); 263 | }; 264 | 265 | /** 266 | * @ignore 267 | * 268 | * Return a reference to the internal Unicode definition structure for the given Unicode 269 | * Property if the given name is a legal Unicode Property for use in XRegExp `\p` or `\P` regex 270 | * constructs. 271 | * 272 | * @memberOf XRegExp 273 | * @param {String} name Name by which the Unicode Property may be recognized (case-insensitive), 274 | * e.g. `'N'` or `'Number'`. The given name is matched against all registered Unicode 275 | * Properties and Property Aliases. 276 | * @returns {Object} Reference to definition structure when the name matches a Unicode Property. 277 | * 278 | * @note 279 | * For more info on Unicode Properties, see also http://unicode.org/reports/tr18/#Categories. 280 | * 281 | * @note 282 | * This method is *not* part of the officially documented API and may change or be removed in 283 | * the future. It is meant for userland code that wishes to reuse the (large) internal Unicode 284 | * structures set up by XRegExp. 285 | */ 286 | XRegExp._getUnicodeProperty = (name) => { 287 | const slug = normalize(name); 288 | return unicode[slug]; 289 | }; 290 | }; 291 | -------------------------------------------------------------------------------- /docs/syntax/named_capture_comparison/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Named capture comparison :: XRegExp 6 | 7 | 8 | 9 | 13 |
14 | 25 |
26 | 27 | 28 | 29 | 30 | 31 |

New syntax » Named capture comparison

32 | 33 |

There are several different syntaxes used for named capture. Although Python was the first to implement the feature, most libraries have adopted .NET's alternative syntax.

34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 56 | 61 | 67 | 70 | 71 | 72 | 73 | 74 | 75 | 80 | 85 | 90 | 93 | 94 | 95 | 96 | 97 | 98 | 104 | 110 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 128 | 137 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 155 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 175 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 191 | 196 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 213 | 219 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 236 | 241 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 259 | 266 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 284 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 315 | 320 | 325 | 326 | 327 | 328 | 329 | 330 |
LibraryCaptureBackref in regexBackref in replacementStored atBackref numberingMultiple groups with same name
XRegExp 51 |
    52 |
  • (?<name>…)
  • 53 |
  • (?P<name>…)1
  • 54 |
55 |
57 |
    58 |
  • \k<name>
  • 59 |
60 |
62 |
    63 |
  • $<name>2
  • 64 |
  • ${name}
  • 65 |
66 |
68 | result.groups.name3 69 | SequentialError4
EcmaScript 2018 76 |
    77 |
  • (?<name>…)
  • 78 |
79 |
81 |
    82 |
  • \k<name>
  • 83 |
84 |
86 |
    87 |
  • $<name>
  • 88 |
89 |
91 | result.groups.name 92 | SequentialError
.NET 99 |
    100 |
  • (?<name>…)
  • 101 |
  • (?'name'…)
  • 102 |
103 |
105 |
    106 |
  • \k<name>
  • 107 |
  • \k'name'
  • 108 |
109 |
111 |
    112 |
  • ${name}
  • 113 |
114 |
matcher.Groups('name')Unnamed first, then namedBackref to last executed participating group
Perl 5.10 122 |
    123 |
  • (?<name>…)
  • 124 |
  • (?'name'…)
  • 125 |
  • (?P<name>…)
  • 126 |
127 |
129 |
    130 |
  • \k<name>
  • 131 |
  • \k'name'
  • 132 |
  • \k{name}
  • 133 |
  • \g{name}
  • 134 |
  • (?P=name)
  • 135 |
136 |
138 |
    139 |
  • $+{name}
  • 140 |
141 |
$+{name}SequentialBackref to leftmost participating group
PCRE 7 149 |
    150 |
  • (?<name>…)
  • 151 |
  • (?'name'…)
  • 152 |
  • (?P<name>…)
  • 153 |
154 |
156 |
    157 |
  • \k<name>
  • 158 |
  • \k'name'
  • 159 |
  • \k{name}5
  • 160 |
  • \g{name}5
  • 161 |
  • (?P=name)
  • 162 |
163 |
N/ASequentialError
PCRE 4 171 |
    172 |
  • (?P<name>…)
  • 173 |
174 |
176 |
    177 |
  • (?P=name)
  • 178 |
179 |
N/ASequentialError
Python 187 |
    188 |
  • (?P<name>…)
  • 189 |
190 |
192 |
    193 |
  • (?P=name)
  • 194 |
195 |
197 |
    198 |
  • \g<name>
  • 199 |
200 |
result.group('name')SequentialError
Oniguruma 208 |
    209 |
  • (?<name>…)
  • 210 |
  • (?'name'…)
  • 211 |
212 |
214 |
    215 |
  • \k<name>
  • 216 |
  • \k'name'
  • 217 |
218 |
220 |
    221 |
  • \k<name>
  • 222 |
  • \k'name'
  • 223 |
224 |
N/AUnnamed groups default to noncapturing when mixed with named groupsBackref to rightmost participating group. Backrefs within a regex work as alternation of matches of all preceding groups with the same name, in reverse order.
Java 7 232 |
    233 |
  • (?<name>…)
  • 234 |
235 |
237 |
    238 |
  • \k<name>
  • 239 |
240 |
242 |
    243 |
  • ${name}
  • 244 |
245 |
matcher.group('name')SequentialError
JGsoft 253 |
    254 |
  • (?<name>…)
  • 255 |
  • (?'name'…)
  • 256 |
  • (?P<name>…)
  • 257 |
258 |
260 |
    261 |
  • \k<name>
  • 262 |
  • \k'name'
  • 263 |
  • (?P=name)
  • 264 |
265 |
267 |
    268 |
  • ${name}
  • 269 |
  • \g<name>
  • 270 |
271 |
N/A.NET and Python styles, depending on capture syntaxSame as .NET
Boost.Regex 279 |
    280 |
  • (?<name>…)
  • 281 |
  • (?'name'…)
  • 282 |
283 |
285 |
    286 |
  • \k<name>
  • 287 |
  • \g{name}
  • 288 |
289 |
????
RE2 298 |
    299 |
  • (?P<name>…)
  • 300 |
301 |
N/A????
JRegex 311 |
    312 |
  • ({name}…)
  • 313 |
314 |
316 |
    317 |
  • {\name}
  • 318 |
319 |
321 |
    322 |
  • ${name}
  • 323 |
324 |
matcher.group('name')??
331 | 332 |

1 As of XRegExp 2. Not recommended for use, because support for the (?P<name>…) syntax may be removed in future versions of XRegExp. It is currently supported only to avoid an octal escape versus backreference issue in old Opera. Opera supported the Python named capture syntax natively, but did not provide full named capture functionality.

333 | 334 |

2 As of XRegExp 4.

335 | 336 |

3 As of XRegExp 4.1, when the namespacing option is on (it's on by default in XRegExp 5). Stored at result.name when namespacing is off.
337 | Note: Within string.replace callbacks, stored at: arguments[arguments.length - 1].name (with namespacing on) or arguments[0].name (with namespacing off).

338 | 339 |

4 As of XRegExp 3.

340 | 341 |

5 As of PCRE 7.2.

342 | 343 |

TODO: Add a column comparing the use of capture names in regex conditionals (not supported by XRegExp).

344 | 345 | 346 | 347 | 348 | 349 |
350 |
351 | 354 | 355 | 356 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # XRegExp 2 | 3 | [![npm version][npm-version-src]][npm-version-href] 4 | [![npm downloads][npm-downloads-src]][npm-downloads-href] 5 | 6 | > [!TIP] 7 | > XRegExp was indispensable for heavy regex users in its time, but many of its best features have been adopted into modern JavaScript. Check out [Regex+](https://github.com/slevithan/regex), the lightweight spiritual successor to XRegExp that once again takes JavaScript regexes to the next level. 8 | 9 | XRegExp provides augmented (and extensible) JavaScript regular expressions. You get modern syntax and flags beyond what browsers support natively. XRegExp is also a regex utility belt with tools to make your grepping and parsing easier, while freeing you from regex cross-browser inconsistencies and other annoyances. 10 | 11 | XRegExp supports ES5+ browsers, and you can use it with Node.js or as a RequireJS module. Over the years, many of XRegExp's features have been adopted by new JavaScript standards (named capturing, Unicode properties/scripts/categories, flag `s`, sticky matching, etc.), so using XRegExp can be a way to extend these features into older browsers. 12 | 13 | ## Performance 14 | 15 | XRegExp compiles to native `RegExp` objects. Therefore regexes built with XRegExp perform just as fast as native regular expressions. There is a tiny extra cost when compiling a pattern for the first time. 16 | 17 | ## Named capture breaking change in XRegExp 5 18 | 19 | XRegExp 5 introduced a breaking change where named backreference properties now appear on the result's `groups` object (following ES2018), rather than directly on the result. To restore the old handling so you don't need to update old code, run the following line after importing XRegExp: 20 | 21 | ```js 22 | XRegExp.uninstall('namespacing'); 23 | ``` 24 | 25 | XRegExp 4.1.0 and later allow introducing the new behavior without upgrading to XRegExp 5 by running `XRegExp.install('namespacing')`. 26 | 27 | Following is the most commonly needed change to update code for the new behavior: 28 | 29 | ```js 30 | // Change this 31 | const name = XRegExp.exec(str, regexWithNamedCapture).name; 32 | 33 | // To this 34 | const name = XRegExp.exec(str, regexWithNamedCapture).groups.name; 35 | ``` 36 | 37 | See below for more examples of using named capture with `XRegExp.exec` and `XRegExp.replace`. 38 | 39 | ## Usage examples 40 | 41 | ```js 42 | // Using named capture and flag x for free-spacing and line comments 43 | const date = XRegExp( 44 | `(? [0-9]{4} ) -? # year 45 | (? [0-9]{2} ) -? # month 46 | (? [0-9]{2} ) # day`, 'x'); 47 | 48 | // XRegExp.exec provides named backreferences on the result's groups property 49 | let match = XRegExp.exec('2021-02-22', date); 50 | match.groups.year; // -> '2021' 51 | 52 | // It also includes optional pos and sticky arguments 53 | let pos = 3; 54 | const result = []; 55 | while (match = XRegExp.exec('<1><2><3>4<5>', /<(\d+)>/, pos, 'sticky')) { 56 | result.push(match[1]); 57 | pos = match.index + match[0].length; 58 | } 59 | // result -> ['2', '3'] 60 | 61 | // XRegExp.replace allows named backreferences in replacements 62 | XRegExp.replace('2021-02-22', date, '$/$/$'); 63 | // -> '02/22/2021' 64 | XRegExp.replace('2021-02-22', date, (...args) => { 65 | // Named backreferences are on the last argument 66 | const {day, month, year} = args.at(-1); 67 | return `${month}/${day}/${year}`; 68 | }); 69 | // -> '02/22/2021' 70 | 71 | // XRegExps compile to RegExps and work with native methods 72 | date.test('2021-02-22'); 73 | // -> true 74 | // However, named captures must be referenced using numbered backreferences 75 | // if used with native methods 76 | '2021-02-22'.replace(date, '$2/$3/$1'); 77 | // -> '02/22/2021' 78 | 79 | // Use XRegExp.forEach to extract every other digit from a string 80 | const evens = []; 81 | XRegExp.forEach('1a2345', /\d/, (match, i) => { 82 | if (i % 2) evens.push(+match[0]); 83 | }); 84 | // evens -> [2, 4] 85 | 86 | // Use XRegExp.matchChain to get numbers within tags 87 | XRegExp.matchChain('1 2 3 4 \n 56', [ 88 | XRegExp('.*?', 'is'), 89 | /\d+/ 90 | ]); 91 | // -> ['2', '4', '56'] 92 | 93 | // You can also pass forward and return specific backreferences 94 | const html = 95 | `XRegExp 96 | Google`; 97 | XRegExp.matchChain(html, [ 98 | {regex: //i, backref: 1}, 99 | {regex: XRegExp('(?i)^https?://(?[^/?#]+)'), backref: 'domain'} 100 | ]); 101 | // -> ['xregexp.com', 'www.google.com'] 102 | 103 | // Merge strings and regexes, with updated backreferences 104 | XRegExp.union(['m+a*n', /(bear)\1/, /(pig)\1/], 'i', {conjunction: 'or'}); 105 | // -> /m\+a\*n|(bear)\1|(pig)\2/i 106 | ``` 107 | 108 | These examples give the flavor of what's possible, but XRegExp has more syntax, flags, methods, options, and browser fixes that aren't shown here. You can also augment XRegExp's regular expression syntax with addons (see below) or write your own. See [xregexp.com](https://xregexp.com/) for details. 109 | 110 | ## Addons 111 | 112 | You can either load addons individually, or bundle all addons with XRegExp by loading `xregexp-all.js` from https://unpkg.com/xregexp/xregexp-all.js. 113 | 114 | ### Unicode 115 | 116 | If not using `xregexp-all.js`, first include the Unicode Base script and then one or more of the addons for Unicode categories, properties, or scripts. 117 | 118 | Then you can do this: 119 | 120 | ```js 121 | // Test some Unicode scripts 122 | // Can also use the Script= prefix to match ES2018: \p{Script=Hiragana} 123 | XRegExp('^\\p{Hiragana}+$').test('ひらがな'); // -> true 124 | XRegExp('^[\\p{Latin}\\p{Common}]+$').test('Über Café.'); // -> true 125 | 126 | // Test the Unicode categories Letter and Mark 127 | // Can also use the short names \p{L} and \p{M} 128 | const unicodeWord = XRegExp.tag()`^\p{Letter}[\p{Letter}\p{Mark}]*$`; 129 | unicodeWord.test('Русский'); // -> true 130 | unicodeWord.test('日本語'); // -> true 131 | unicodeWord.test('العربية'); // -> true 132 | ``` 133 | 134 | By default, `\p{…}` and `\P{…}` support the Basic Multilingual Plane (i.e. code points up to `U+FFFF`). You can opt-in to full 21-bit Unicode support (with code points up to `U+10FFFF`) on a per-regex basis by using flag `A`. This is called *astral mode*. You can automatically add flag `A` for all new regexes by running `XRegExp.install('astral')`. When in astral mode, `\p{…}` and `\P{…}` always match a full code point rather than a code unit, using surrogate pairs for code points above `U+FFFF`. 135 | 136 | ```js 137 | // Using flag A to match astral code points 138 | XRegExp('^\\p{S}$').test('💩'); // -> false 139 | XRegExp('^\\p{S}$', 'A').test('💩'); // -> true 140 | // Using surrogate pair U+D83D U+DCA9 to represent U+1F4A9 (pile of poo) 141 | XRegExp('^\\p{S}$', 'A').test('\uD83D\uDCA9'); // -> true 142 | 143 | // Implicit flag A 144 | XRegExp.install('astral'); 145 | XRegExp('^\\p{S}$').test('💩'); // -> true 146 | ``` 147 | 148 | Opting in to astral mode disables the use of `\p{…}` and `\P{…}` within character classes. In astral mode, use e.g. `(\pL|[0-9_])+` instead of `[\pL0-9_]+`. 149 | 150 | XRegExp uses Unicode 14.0.0. 151 | 152 | ### XRegExp.build 153 | 154 | Build regular expressions using named subpatterns, for readability and pattern reuse: 155 | 156 | ```js 157 | const time = XRegExp.build('(?x)^ {{hours}} ({{minutes}}) $', { 158 | hours: XRegExp.build('{{h12}} : | {{h24}}', { 159 | h12: /1[0-2]|0?[1-9]/, 160 | h24: /2[0-3]|[01][0-9]/ 161 | }), 162 | minutes: /^[0-5][0-9]$/ 163 | }); 164 | 165 | time.test('10:59'); // -> true 166 | XRegExp.exec('10:59', time).groups.minutes; // -> '59' 167 | ``` 168 | 169 | Named subpatterns can be provided as strings or regex objects. A leading `^` and trailing unescaped `$` are stripped from subpatterns if both are present, which allows embedding independently-useful anchored patterns. `{{…}}` tokens can be quantified as a single unit. Any backreferences in the outer pattern or provided subpatterns are automatically renumbered to work correctly within the larger combined pattern. The syntax `({{name}})` works as shorthand for named capture via `(?{{name}})`. Named subpatterns cannot be embedded within character classes. 170 | 171 | #### XRegExp.tag (included with XRegExp.build) 172 | 173 | Provides tagged template literals that create regexes with XRegExp syntax and flags: 174 | 175 | ```js 176 | XRegExp.tag()`\b\w+\b`.test('word'); // -> true 177 | 178 | const hours = /1[0-2]|0?[1-9]/; 179 | const minutes = /(?[0-5][0-9])/; 180 | const time = XRegExp.tag('x')`\b ${hours} : ${minutes} \b`; 181 | time.test('10:59'); // -> true 182 | XRegExp.exec('10:59', time).groups.minutes; // -> '59' 183 | 184 | const backref1 = /(a)\1/; 185 | const backref2 = /(b)\1/; 186 | XRegExp.tag()`${backref1}${backref2}`.test('aabb'); // -> true 187 | ``` 188 | 189 | `XRegExp.tag` does more than just interpolation. You get all the XRegExp syntax and flags, and since it reads patterns as raw strings, you no longer need to escape all your backslashes. `XRegExp.tag` also uses `XRegExp.build` under the hood, so you get all of its extras for free. Leading `^` and trailing unescaped `$` are stripped from interpolated patterns if both are present (to allow embedding independently useful anchored regexes), interpolating into a character class is an error (to avoid unintended meaning in edge cases), interpolated patterns are treated as atomic units when quantified, interpolated strings have their special characters escaped, and any backreferences within an interpolated regex are rewritten to work within the overall pattern. 190 | 191 | ### XRegExp.matchRecursive 192 | 193 | A robust and flexible API for matching recursive constructs using XRegExp pattern strings as left and right delimiters: 194 | 195 | ```js 196 | const str1 = '(t((e))s)t()(ing)'; 197 | XRegExp.matchRecursive(str1, '\\(', '\\)', 'g'); 198 | // -> ['t((e))s', '', 'ing'] 199 | 200 | // Extended information mode with valueNames 201 | const str2 = 'Here is
an
example'; 202 | XRegExp.matchRecursive(str2, '', '', 'gi', { 203 | valueNames: ['between', 'left', 'match', 'right'] 204 | }); 205 | /* -> [ 206 | {name: 'between', value: 'Here is ', start: 0, end: 8}, 207 | {name: 'left', value: '
', start: 8, end: 13}, 208 | {name: 'match', value: '
an
', start: 13, end: 27}, 209 | {name: 'right', value: '
', start: 27, end: 33}, 210 | {name: 'between', value: ' example', start: 33, end: 41} 211 | ] */ 212 | 213 | // Omitting unneeded parts with null valueNames, and using escapeChar 214 | const str3 = '...{1}.\\{{function(x,y){return {y:x}}}'; 215 | XRegExp.matchRecursive(str3, '{', '}', 'g', { 216 | valueNames: ['literal', null, 'value', null], 217 | escapeChar: '\\' 218 | }); 219 | /* -> [ 220 | {name: 'literal', value: '...', start: 0, end: 3}, 221 | {name: 'value', value: '1', start: 4, end: 5}, 222 | {name: 'literal', value: '.\\{', start: 6, end: 9}, 223 | {name: 'value', value: 'function(x,y){return {y:x}}', start: 10, end: 37} 224 | ] */ 225 | 226 | // Sticky mode via flag y 227 | const str4 = '<1><<<2>>><3>4<5>'; 228 | XRegExp.matchRecursive(str4, '<', '>', 'gy'); 229 | // -> ['1', '<<2>>', '3'] 230 | 231 | // Skipping unbalanced delimiters instead of erroring 232 | const str5 = 'Here is
an
unbalanced example'; 233 | XRegExp.matchRecursive(str5, '', '
', 'gi', { 234 | unbalanced: 'skip' 235 | }); 236 | // -> ['an'] 237 | ``` 238 | 239 | By default, `XRegExp.matchRecursive` throws an error if it scans past an unbalanced delimiter in the target string. Multiple alternative options are available for handling unbalanced delimiters. 240 | 241 | ## Installation and usage 242 | 243 | In browsers (bundle XRegExp with all of its addons): 244 | 245 | ```html 246 | 247 | ``` 248 | 249 | Using [npm](https://www.npmjs.com/): 250 | 251 | ```bash 252 | npm install xregexp 253 | ``` 254 | 255 | In [Node.js](https://nodejs.org/en/): 256 | 257 | ```js 258 | const XRegExp = require('xregexp'); 259 | ``` 260 | 261 | 271 | 272 | ## Credits 273 | 274 | XRegExp project collaborators are: 275 | 276 | - [Steven Levithan](https://blog.stevenlevithan.com/) 277 | - [Joseph Frazier](https://github.com/josephfrazier) 278 | - [Mathias Bynens](https://mathiasbynens.be/) 279 | 280 | Thanks to all contributors and others who have submitted code, provided feedback, reported bugs, and inspired new features. 281 | 282 | XRegExp is released under the [MIT License](https://mit-license.org/). Learn more at [xregexp.com](https://xregexp.com/). 283 | 284 | 285 | 286 | [npm-version-src]: https://img.shields.io/npm/v/xregexp?color=78C372 287 | [npm-version-href]: https://npmjs.com/package/xregexp 288 | [npm-downloads-src]: https://img.shields.io/npm/dm/xregexp?color=78C372 289 | [npm-downloads-href]: https://npmjs.com/package/xregexp 290 | -------------------------------------------------------------------------------- /tests/perf/perf.js: -------------------------------------------------------------------------------- 1 | (function() { 2 | var outputBox = document.getElementById('log'); 3 | var suites = []; 4 | 5 | // Used to skip 21-bit Unicode tests when running older XRegExp versions 6 | var hasAstralSupport = parseInt(XRegExp.version, 10) >= 3; 7 | // The `cache.flush` method was added in v3 8 | XRegExp.cache.flush = XRegExp.cache.flush || function() {}; 9 | // The `install` and `uninstall` methods were added in v2 10 | XRegExp.install = XRegExp.install || function() {}; 11 | XRegExp.uninstall = XRegExp.uninstall || function() {}; 12 | // The `exec` method was renamed from `execAt` in v2 13 | XRegExp.exec = XRegExp.exec || XRegExp.execAt; 14 | 15 | function log(msg) { 16 | outputBox.insertAdjacentHTML('beforeend', msg.replace(/\n/g, '
')); 17 | } 18 | function scrollToEnd() { 19 | window.scroll(0, document.body.scrollHeight); 20 | } 21 | 22 | var suiteOptions = { 23 | onStart: function() { 24 | log('\n' + this.name + ':'); 25 | }, 26 | 27 | onCycle: function(event) { 28 | log('\n' + String(event.target)); 29 | scrollToEnd(); 30 | }, 31 | 32 | onComplete: function() { 33 | log('\nFastest is ' + this.filter('fastest').map('name') + '\n'); 34 | // Remove current suite from queue 35 | suites.shift(); 36 | if (suites.length) { 37 | // Run next suite 38 | suites[0].run(); 39 | } else { 40 | log('\nFinished. ☺'); 41 | } 42 | scrollToEnd(); 43 | } 44 | }; 45 | 46 | // run async 47 | var benchmarkOptions = { 48 | async: true 49 | }; 50 | 51 | // Expose as global 52 | window.run = function() { 53 | log('Testing XRegExp ' + XRegExp.version + '.\n'); 54 | log('Sit back and relax. This might take a while.\n'); 55 | suites[0].run(); 56 | }; 57 | 58 | /*-------------------------------------- 59 | * Start of perf suites 60 | *------------------------------------*/ 61 | 62 | (function() { 63 | var configs = [ 64 | { 65 | name: 'Constructor with short pattern', 66 | pattern: '^([.])\\1+$' 67 | }, 68 | { 69 | name: 'Constructor with medium pattern', 70 | pattern: '^([.])\\1+$ this is a test of a somewhat longer pattern' 71 | }, 72 | { 73 | name: 'Constructor with long pattern', 74 | pattern: XRegExp('\\p{L}').source 75 | }, 76 | { 77 | name: 'Constructor with x flag, whitespace, and comments', 78 | pattern: '\n # comment\n # comment\n', 79 | flags: 'x' 80 | } 81 | ]; 82 | 83 | configs.forEach(function(config) { 84 | var flags = config.flags || ''; 85 | var allFlagsNative = /^[gimuy]*$/.test(flags); 86 | 87 | var suite = new Benchmark.Suite(config.name, suiteOptions) 88 | .add('XRegExp with pattern cache flush', function() { 89 | XRegExp(config.pattern, flags); 90 | XRegExp.cache.flush('patterns'); 91 | }, benchmarkOptions) 92 | .add('XRegExp', function() { 93 | XRegExp(config.pattern, flags); 94 | }, benchmarkOptions) 95 | .add('XRegExp.cache', function() { 96 | XRegExp.cache(config.pattern, flags); 97 | }, benchmarkOptions); 98 | if (allFlagsNative) { 99 | suite.add('RegExp', function() { 100 | new RegExp(config.pattern, flags); 101 | }, benchmarkOptions); 102 | } 103 | 104 | suites.push(suite); 105 | }); 106 | }()); 107 | 108 | (function() { 109 | var regexG = /(((?=x).)\2)+/g; 110 | var str = Array(30 + 1).join('hello world x ') + 'xx!'; 111 | var pos = 5; 112 | 113 | suites.push(new Benchmark.Suite('exec', suiteOptions) 114 | .add('Native exec', function() { 115 | regexG.lastIndex = pos; 116 | regexG.exec(str); 117 | }, benchmarkOptions) 118 | .add('XRegExp.exec', function() { 119 | XRegExp.exec(str, regexG, pos); 120 | }, benchmarkOptions) 121 | ); 122 | 123 | var numStrs = 2e5; 124 | var strs = []; 125 | var i; 126 | 127 | // Use lots of different strings to remove the benefit of Opera's regex/string match cache 128 | for (i = 0; i < numStrs; ++i) { 129 | strs.push(str + i); 130 | } 131 | 132 | suites.push(new Benchmark.Suite('exec with ' + numStrs + ' different strings', suiteOptions) 133 | .add('Native exec', function() { 134 | regexG.lastIndex = pos; 135 | regexG.exec(strs[++i] || strs[i = 0]); 136 | }, benchmarkOptions) 137 | .add('XRegExp.exec', function() { 138 | XRegExp.exec(strs[++i] || strs[i = 0], regexG, pos); 139 | }, benchmarkOptions) 140 | ); 141 | 142 | suites.push(new Benchmark.Suite('Sticky exec with ' + numStrs + ' different strings', suiteOptions) 143 | .add('Native exec', function() { 144 | regexG.lastIndex = pos; 145 | var match = regexG.exec(strs[++i] || strs[i = 0]); 146 | if (match && match.index !== pos) { 147 | match = null; 148 | } 149 | }, benchmarkOptions) 150 | .add('XRegExp.exec', function() { 151 | var match = XRegExp.exec(strs[++i] || strs[i = 0], regexG, pos, 'sticky'); // eslint-disable-line no-unused-vars 152 | }, benchmarkOptions) 153 | ); 154 | }()); 155 | 156 | (function() { 157 | var str = Array(30 + 1).join('hello xx world '); 158 | 159 | suites.push(Benchmark.Suite('Iteration with a nonglobal regex', suiteOptions) 160 | .add('replace with callback', function() { 161 | var r = /^|(((?=x).)\2)+/; 162 | var matches = []; 163 | if (!r.global) { 164 | // globalize 165 | r = new RegExp( 166 | r.source, 167 | 'g' + 168 | (r.ignoreCase ? 'i' : '') + 169 | (r.multiline ? 'm' : '') + 170 | (r.unicode ? 'u' : '') + 171 | (r.sticky ? 'y' : '') 172 | ); 173 | } 174 | str.replace(r, function(match) { 175 | matches.push(match); 176 | }); 177 | }, benchmarkOptions) 178 | .add('while/exec', function() { 179 | var r = /^|(((?=x).)\2)+/; 180 | var matches = []; 181 | var match; 182 | if (r.global) { 183 | r.lastIndex = 0; 184 | } else { 185 | // globalize 186 | r = new RegExp( 187 | r.source, 188 | 'g' + 189 | (r.ignoreCase ? 'i' : '') + 190 | (r.multiline ? 'm' : '') + 191 | (r.unicode ? 'u' : '') + 192 | (r.sticky ? 'y' : '') 193 | ); 194 | } 195 | while (match = r.exec(str)) { // eslint-disable-line no-cond-assign 196 | matches.push(match[0]); 197 | if (r.lastIndex === match.index) { 198 | ++r.lastIndex; 199 | } 200 | } 201 | }, benchmarkOptions) 202 | .add('while/XRegExp.exec', function() { 203 | var r = /^|(((?=x).)\2)+/; 204 | var matches = []; 205 | var match; 206 | var pos = 0; 207 | while (match = XRegExp.exec(str, r, pos)) { // eslint-disable-line no-cond-assign 208 | matches.push(match[0]); 209 | pos = match.index + (match[0].length || 1); 210 | } 211 | }, benchmarkOptions) 212 | .add('XRegExp.forEach', function() { 213 | var r = /^|(((?=x).)\2)+/; 214 | var matches = []; 215 | XRegExp.forEach(str, r, function(match) { 216 | matches.push(match[0]); 217 | }); 218 | }, benchmarkOptions) 219 | ); 220 | }()); 221 | 222 | (function() { 223 | var str = Array(30 + 1).join('hello world ') + 'http://xregexp.com/path/to/file?q=1'; 224 | var pattern = '\\b([^:/?\\s]+)://([^/?\\s]+)([^?\\s]*)\\??([^\\s]*)'; 225 | var regexp = new RegExp(pattern); 226 | var xregexp = XRegExp(pattern); 227 | 228 | suites.push(new Benchmark.Suite('Regex object type', suiteOptions) 229 | .add('RegExp object', function() { 230 | regexp.exec(str); 231 | }, benchmarkOptions) 232 | .add('XRegExp object', function() { 233 | xregexp.exec(str); 234 | }, benchmarkOptions) 235 | ); 236 | 237 | var xregexpNamed4 = 238 | XRegExp('\\b(? [^:/?\\s]+ ) :// # aka protocol \n' + 239 | ' (? [^/?\\s]+ ) # domain name/IP \n' + 240 | ' (? [^?\\s]* ) \\?? # optional path \n' + 241 | ' (? [^\\s]* ) # optional query', 'x'); 242 | var xregexpNamed1 = 243 | XRegExp('\\b(? [^:/?\\s]+ ) :// # aka protocol \n' + 244 | ' ( [^/?\\s]+ ) # domain name/IP \n' + 245 | ' ( [^?\\s]* ) \\?? # optional path \n' + 246 | ' ( [^\\s]* ) # optional query', 'x'); 247 | var xregexpNumbered = 248 | XRegExp('\\b( [^:/?\\s]+ ) :// # aka protocol \n' + 249 | ' ( [^/?\\s]+ ) # domain name/IP \n' + 250 | ' ( [^?\\s]* ) \\?? # optional path \n' + 251 | ' ( [^\\s]* ) # optional query', 'x'); 252 | 253 | suites.push(new Benchmark.Suite('Capturing', suiteOptions) 254 | .add('Numbered capture', function() { 255 | XRegExp.exec(str, xregexpNumbered); 256 | }, benchmarkOptions) 257 | .add('Named capture (one name)', function() { 258 | XRegExp.exec(str, xregexpNamed1); 259 | }, benchmarkOptions) 260 | .add('Named capture (four names)', function() { 261 | XRegExp.exec(str, xregexpNamed4); 262 | }, benchmarkOptions) 263 | ); 264 | }()); 265 | 266 | suites.push(new Benchmark.Suite('Unicode letter construction', suiteOptions) 267 | .add('Incomplete set: /[a-z]/i', function() { 268 | XRegExp('(?i)[a-z]'); 269 | XRegExp.cache.flush('patterns'); 270 | }, benchmarkOptions) 271 | .add('BMP only: /\\p{L}/', function() { 272 | XRegExp('\\p{L}'); 273 | XRegExp.cache.flush('patterns'); 274 | }, benchmarkOptions) 275 | .add('Full Unicode: /\\p{L}/A', (hasAstralSupport ? 276 | function() { 277 | XRegExp('(?A)\\p{L}'); 278 | XRegExp.cache.flush('patterns'); 279 | } : 280 | function() { 281 | throw new Error('Astral mode unsupported'); 282 | } 283 | ), benchmarkOptions) 284 | ); 285 | 286 | (function() { 287 | var asciiText = 'Now is the time for all good men to come to the aid of the party!'; 288 | var mixedText = 'We are looking for a letter/word followed by an exclamation mark, ☃ ☃ ☃ ☃ ☃ and δοκεῖ δέ μοι καὶ Καρχηδόνα μὴ εἶναι!'; 289 | var unicodeText = 'Зоммерфельд получил ряд важных результатов в рамках «старой квантовой теории», предшествовавшей появлению современной квантовой механики!'; 290 | var unicodeText2 = 'როგორც სამედიცინო ფაკულტეტის ახალგაზრდა სტუდენტი, გევარა მთელს ლათინურ ამერიკაში მოგზაურობდა და იგი სწრაფად!'; 291 | 292 | function test(regex) { 293 | regex.test(asciiText); 294 | regex.test(mixedText); 295 | regex.test(unicodeText); 296 | regex.test(unicodeText2); 297 | } 298 | 299 | var azCaselessChar = XRegExp('(?i)[a-z]!'); 300 | var bmpLetterChar = XRegExp('\\p{L}!'); 301 | var astralLetterChar = hasAstralSupport ? XRegExp('(?A)\\p{L}!') : null; 302 | 303 | suites.push(new Benchmark.Suite('Unicode letter matching', suiteOptions) 304 | .add('a-z caseless', function() { 305 | test(azCaselessChar); 306 | }, benchmarkOptions) 307 | .add('\\p{L}', function() { 308 | test(bmpLetterChar); 309 | }, benchmarkOptions) 310 | .add('\\p{L} astral', (hasAstralSupport ? 311 | function() { 312 | test(astralLetterChar); 313 | } : 314 | function() { 315 | throw new Error('Astral mode unsupported'); 316 | }), benchmarkOptions 317 | ) 318 | ); 319 | 320 | var azCaselessWord = XRegExp('(?i)[a-z]+!'); 321 | var bmpLetterWord = XRegExp('\\p{L}+!'); 322 | var astralLetterWord = hasAstralSupport ? XRegExp('(?A)\\p{L}+!') : null; 323 | 324 | suites.push(new Benchmark.Suite('Unicode word matching', suiteOptions) 325 | .add('a-z caseless', function() { 326 | test(azCaselessWord); 327 | }, benchmarkOptions) 328 | .add('\\p{L}', function() { 329 | test(bmpLetterWord); 330 | }, benchmarkOptions) 331 | .add('\\p{L} astral', (hasAstralSupport ? 332 | function() { 333 | test(astralLetterWord); 334 | } : 335 | function() { 336 | throw new Error('Astral mode unsupported'); 337 | }), benchmarkOptions 338 | ) 339 | ); 340 | }()); 341 | }()); 342 | -------------------------------------------------------------------------------- /docs/flags/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | New flags :: XRegExp 6 | 7 | 8 | 9 |
13 |
14 | 25 |
26 | 27 | 28 | 29 | 30 | 31 |
32 | 42 |
43 | 44 |

New flags

45 | 46 |

About flags

47 | 48 |

XRegExp provides four new flags (n, s, x, A), which can be combined with native flags and arranged in any order. Unlike native flags, non-native flags do not show up as properties on regular expression objects.

49 | 50 |
    51 |
  • New flags 52 |
      53 |
    • n — Named capture only
    • 54 |
    • s — Dot matches all (singleline) — Added as a native flag in ES2018, but XRegExp always supports it
    • 55 |
    • x — Free-spacing and line comments (extended)
    • 56 |
    • A — 21-bit Unicode properties (astral) — Requires the Unicode Base addon
    • 57 |
    58 |
  • 59 |
  • Native flags 60 |
      61 |
    • g — All matches, or advance lastIndex after matches (global)
    • 62 |
    • i — Case insensitive (ignoreCase)
    • 63 |
    • m^ and $ match at newlines (multiline)
    • 64 |
    • u — Handle surrogate pairs as code points and enable \u{…} and \p{…} (unicode) — Requires native ES6 support
    • 65 |
    • y — Matches must start at lastIndex (sticky) — Requires Firefox 3+ or native ES6 support
    • 66 |
    • d — Include indices for capturing groups on match results (hasIndices) — Requires native ES2021 support
    • 67 |
    68 |
  • 69 |
70 | 71 | 72 |

Named capture only (n)

73 | 74 |

Specifies that the only captures are explicitly named groups of the form (?<name>…). This allows unnamed (…) parentheses to act as noncapturing groups without the syntactic clumsiness of the expression (?:…).

75 | 76 |

Annotations

77 |
    78 |
  • Rationale: Backreference capturing adds performance overhead and is needed far less often than simple grouping. The n flag frees the (…) syntax from its often-undesired capturing side effect, while still allowing explicitly-named capturing groups.
  • 79 |
  • Compatibility: No known problems; the n flag is illegal in native JavaScript regular expressions.
  • 80 |
  • Prior art: The n flag comes from .NET, where it's called "explicit capture."
  • 81 |
82 | 83 | 84 |

Dot matches all (s)

85 | 86 | 89 | 90 | 93 | 94 |

Usually, a dot does not match newlines. However, a mode in which dots match any code unit (including newlines) can be as useful as one where dots don't. The s flag allows the mode to be selected on a per-regex basis. Escaped dots (\.) and dots within character classes ([.]) are always equivalent to literal dots. The newline code points are as follows:

95 | 96 |
    97 |
  • U+000A — Line feed — \n
  • 98 |
  • U+000D — Carriage return — \r
  • 99 |
  • U+2028 — Line separator
  • 100 |
  • U+2029 — Paragraph separator
  • 101 |
102 | 103 |

Annotations

104 |
    105 |
  • Rationale: All popular Perl-style regular expression flavors except JavaScript (prior to ES2018) include a flag that allows dots to match newlines. Without this mode, matching any single code unit requires, e.g., [\s\S], [\0-\uFFFF], [^] (JavaScript only; doesn't work in some browsers without XRegExp), or god forbid (.|\s) (which requires unnecessary backtracking).
  • 106 |
  • Compatibility: No known problems; the s flag is illegal in native JavaScript regular expressions prior to ES2018.
  • 107 |
  • Prior art: The s flag comes from Perl.
  • 108 |
109 | 110 |
111 |

When using XRegExp's Unicode Properties addon, you can match any code point without using the s flag via \p{Any}.

112 |
113 | 114 | 115 |

Free-spacing and line comments (x)

116 | 117 |

This flag has two complementary effects. First, it causes all whitespace recognized natively by \s to be ignored, so you can free-format the regex pattern for readability. Second, it allows comments with a leading #. Specifically, it turns whitespace into an "ignore me" metacharacter, and # into an "ignore me and everything else up to the next newline" metacharacter. They aren't taken as metacharacters within character classes (which means that classes are not free-format even with x, following precedent from most other regex libraries that support x), and as with other metacharacters, you can escape whitespace and # that you want to be taken literally. Of course, you can always use \s to match whitespace.

118 | 119 |
120 |

It might be better to think of whitespace and comments as do-nothing (rather than ignore-me) metacharacters. This distinction is important with something like \12 3, which with the x flag is taken as \12 followed by 3, and not \123. However, quantifiers following whitespace or comments apply to the preceeding token, so x + is equivalent to x+.

121 |
122 | 123 |

The ignored whitespace characters are those matched natively by \s. ES3 whitespace is based on Unicode 2.1.0 or later. ES5 whitespace is based on Unicode 3.0.0 or later, plus U+FEFF. Following are the code points that should be matched by \s according to ES5 and Unicode 4.0.1:

124 | 125 |
    126 |
  • U+0009 — Tab — \t
  • 127 |
  • U+000A — Line feed — \n
  • 128 |
  • U+000B — Vertical tab — \v
  • 129 |
  • U+000C — Form feed — \f
  • 130 |
  • U+000D — Carriage return — \r
  • 131 |
  • U+0020 — Space
  • 132 |
  • U+00A0 — No-break space
  • 133 |
  • U+1680 — Ogham space mark
  • 134 |
  • U+180E — Mongolian vowel separator
  • 135 |
  • U+2000 — En quad
  • 136 |
  • U+2001 — Em quad
  • 137 |
  • U+2002 — En space
  • 138 |
  • U+2003 — Em space
  • 139 |
  • U+2004 — Three-per-em space
  • 140 |
  • U+2005 — Four-per-em space
  • 141 |
  • U+2006 — Six-per-em space
  • 142 |
  • U+2007 — Figure space
  • 143 |
  • U+2008 — Punctuation space
  • 144 |
  • U+2009 — Thin space
  • 145 |
  • U+200A — Hair space
  • 146 |
  • U+2028 — Line separator
  • 147 |
  • U+2029 — Paragraph separator
  • 148 |
  • U+202F — Narrow no-break space
  • 149 |
  • U+205F — Medium mathematical space
  • 150 |
  • U+3000 — Ideographic space
  • 151 |
  • U+FEFF — Zero width no-break space
  • 152 |
153 | 154 |

Annotations

155 |
    156 |
  • Rationale: Regular expressions are notoriously hard to read; adding whitespace and comments makes regular expressions easier to read.
  • 157 |
  • Compatibility: No known problems; the x flag is illegal in native JavaScript regular expressions.
  • 158 |
  • Prior art: The x flag comes from Perl, and was originally inspired by Jeffrey Friedl's pretty-printing of complex regexes.
  • 159 |
160 | 161 |
162 |

Unicode 1.1.5–4.0.0 assigned code point U+200B (ZWSP) to the Zs (Space separator) category, which means that some browsers or regex engines might include this additional code point in those matched by \s, etc. Unicode 4.0.1 moved ZWSP to the Cf (Format) category.

163 | 164 |

Unicode 1.1.5 assigned code point U+FEFF (ZWNBSP) to the Zs category. Unicode 2.0.14 moved ZWNBSP to the Cf category. ES5 explicitly includes ZWNBSP in its list of whitespace characters, even though this does not match any version of the Unicode standard since 1996.

165 | 166 |

U+180E (Mongolian vowel separator) was introduced in Unicode 3.0.0, which assigned it the Cf category. Unicode 4.0.0 moved it into the Zs category, and Unicode 6.3.0 moved it back to the Cf category.

167 |
168 | 169 |
170 |

JavaScript's \s is similar but not equivalent to \p{Z} (the Separator category) from regex libraries that support Unicode categories, including XRegExp's own Unicode Categories addon. The difference is that \s includes code points U+0009U+000D and U+FEFF, which are not assigned the Separator category in the Unicode character database.

171 | 172 |

JavaScript's \s is nearly equivalent to \p{White_Space} from the Unicode Properties addon. The differences are: 1. \p{White_Space} does not include U+FEFF (ZWNBSP), and 2. \p{White_Space} includes U+0085 (NEL), which is not assigned the Separator category in the Unicode character database.

173 | 174 |

Aside: Not all JavaScript regex syntax is Unicode-aware. According to JavaScript specs, \s, \S, ., ^, and $ use Unicode-based interpretations of whitespace and newline, while \d, \D, \w, \W, \b, and \B use ASCII-only interpretations of digit, word character, and word boundary. Some browsers and browser versions get aspects of these details wrong.

175 | 176 |

For more details, see JavaScript, Regex, and Unicode.

177 |
178 | 179 | 180 |

21-bit Unicode properties (A)

181 | 182 |

Requires the Unicode Base addon.

183 | 184 |

By default, \p{…} and \P{…} support the Basic Multilingual Plane (i.e. code points up to U+FFFF). You can opt-in to full 21-bit Unicode support (with code points up to U+10FFFF) on a per-regex basis by using flag A. In XRegExp, this is called astral mode. You can automatically add flag A for all new regexes by running XRegExp.install('astral'). When in astral mode, \p{…} and \P{…} always match a full code point rather than a code unit, using surrogate pairs for code points above U+FFFF.

185 | 186 |
// Using flag A to match astral code points
187 | XRegExp('^\\p{S}$').test('💩'); // -> false
188 | XRegExp('^\\p{S}$', 'A').test('💩'); // -> true
189 | XRegExp('(?A)^\\p{S}$').test('💩'); // -> true
190 | // Using surrogate pair U+D83D U+DCA9 to represent U+1F4A9 (pile of poo)
191 | XRegExp('(?A)^\\p{S}$').test('\uD83D\uDCA9'); // -> true
192 | 
193 | // Implicit flag A
194 | XRegExp.install('astral');
195 | XRegExp('^\\p{S}$').test('💩'); // -> true
196 | 
197 | 198 |

Important: Opting in to astral mode disables the use of \p{…} and \P{…} within character classes. In astral mode, use e.g. (\p{L}|[0-9_])+ instead of [\p{L}0-9_]+.

199 | 200 |

Annotations

201 |
    202 |
  • Rationale: Astral code point matching uses surrogate pairs and is somewhat slower than BMP-only matching. Enabling astral code point matching on a per-regex basis can therefore be useful.
  • 203 |
  • Compatibility: No known problems; the A flag is illegal in native JavaScript regular expressions.
  • 204 |
  • Prior art: None.
  • 205 |
206 | 207 | 208 | 209 | 210 | 211 |
212 |
213 | 216 | 217 | 218 | -------------------------------------------------------------------------------- /docs/syntax/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | New syntax :: XRegExp 6 | 7 | 8 | 9 | 13 |
14 | 25 |
26 | 27 | 28 | 29 | 30 | 31 |
32 |
33 |

Table of contents

34 | 42 |
43 |
44 | 45 |

New syntax

46 | 47 |

Named capture

48 | 49 |

XRegExp includes comprehensive support for named capture. Following are the details of XRegExp's named capture syntax:

50 | 51 |
    52 |
  • Capture: (?<name>…)
  • 53 |
  • Backreference in regex: \k<name>
  • 54 |
  • Backreference in replacement text: $<name>
  • 55 |
  • Backreference stored at: result.groups.name
  • 56 |
  • Backreference numbering: Sequential (i.e., left to right for both named and unnamed capturing groups)
  • 57 |
  • Multiple groups with same name: SyntaxError
  • 58 |
59 | 60 |

Notes

61 |
    62 |
  • See additional details and compare to named capture in other regex flavors here: Named capture comparison.
  • 63 |
  • JavaScript added native support for named capture in ES2018. XRegExp support predates this, and it extends this support into pre-ES2018 browsers.
  • 64 |
  • Capture names can use a wide range of Unicode characters (see the definition of RegExpIdentifierName).
  • 65 |
66 | 67 |

Example

68 |
const repeatedWords = XRegExp.tag('gi')`\b(?<word>[a-z]+)\s+\k<word>\b`;
 69 | // Alternatively: XRegExp('\\b(?<word>[a-z]+)\\s+\\k<word>\\b', 'gi');
 70 | 
 71 | // Check for repeated words
 72 | repeatedWords.test('The the test data');
 73 | // -> true
 74 | 
 75 | // Remove any repeated words
 76 | const withoutRepeated = XRegExp.replace('The the test data', repeatedWords, '${word}');
 77 | // -> 'The test data'
 78 | 
 79 | const url = XRegExp(`^(?<scheme> [^:/?]+ ) ://   # aka protocol
 80 |                       (?<host>   [^/?]+  )       # domain name/IP
 81 |                       (?<path>   [^?]*   ) \\??  # optional path
 82 |                       (?<query>  .*      )       # optional query`, 'x');
 83 | 
 84 | // Get the URL parts
 85 | const parts = XRegExp.exec('https://google.com/path/to/file?q=1', url);
 86 | // parts -> ['https://google.com/path/to/file?q=1', 'https', 'google.com', '/path/to/file', 'q=1']
 87 | // parts.groups.scheme -> 'https'
 88 | // parts.groups.host   -> 'google.com'
 89 | // parts.groups.path   -> '/path/to/file'
 90 | // parts.groups.query  -> 'q=1'
 91 | 
 92 | // Named backreferences are available in replacement functions as properties of the last argument
 93 | XRegExp.replace('https://google.com/path/to/file?q=1', url, (match, ...args) => {
 94 |   const groups = args.pop();
 95 |   return match.replace(groups.host, 'xregexp.com');
 96 | });
 97 | // -> 'https://xregexp.com/path/to/file?q=1'
 98 | 
99 | 100 |

Regexes that use named capture work with all native methods. However, you need to use XRegExp.exec and XRegExp.replace for access to named backreferences, otherwise only numbered backreferences are available.

101 | 102 |

Annotations

103 |
    104 |
  • Rationale: Named capture can help make regular expressions and related code self-documenting, and thereby easier to read and use.
  • 105 |
  • Compatibility: The named capture syntax is illegal in pre-ES2018 native JavaScript regular expressions and hence does not cause problems. Backreferences to undefined named groups throw a SyntaxError.
  • 106 |
  • Compatibility with deprecated features: XRegExp's named capture functionality does not support the lastMatch property of the global RegExp object or the RegExp.prototype.compile method, since those features were deprecated in JavaScript 1.5.
  • 107 |
  • Prior art: Comes from Python (feature) and .NET (syntax).
  • 108 |
109 | 110 | 111 |

Inline comments

112 | 113 |

Inline comments use the syntax (?#comment). They are an alternative to the line comments allowed in free-spacing mode.

114 | 115 |

Comments are a do-nothing (rather than ignore-me) metasequence. This distinction is important with something like \1(?#comment)2, which is taken as \1 followed by 2, and not \12. However, quantifiers following comments apply to the preceeding token, so x(?#comment)+ is equivalent to x+.

116 | 117 |

Example

118 |
const regex = XRegExp('^(?#month)\\d{1,2}/(?#day)\\d{1,2}/(?#year)(\\d{2}){1,2}', 'n');
119 | const isDate = regex.test('04/20/2008'); // -> true
120 | 
121 | // Can still be useful when combined with free-spacing, because inline comments
122 | // don't need to end with \n
123 | const regex = XRegExp('^ \\d{1,2}      (?#month)' +
124 |                       '/ \\d{1,2}      (?#day  )' +
125 |                       '/ (\\d{2}){1,2} (?#year )', 'nx');
126 | 
127 | 128 |

Annotations

129 |
    130 |
  • Rationale: Comments make regular expressions more readable.
  • 131 |
  • Compatibility: No known problems with this syntax; it is illegal in native JavaScript regular expressions.
  • 132 |
  • Prior art: The syntax comes from Perl. It is also available in .NET, PCRE, Python, Ruby, and Tcl, among other regular expression flavors.
  • 133 |
134 | 135 | 136 |

Leading mode modifier

137 | 138 |

A mode modifier uses the syntax (?imnsuxA), where imnsuxA is any combination of XRegExp flags except g, y, or d. Mode modifiers provide an alternate way to enable the specified flags. XRegExp allows the use of a single mode modifier at the very beginning of a pattern only.

139 | 140 |

Example

141 |
const regex = XRegExp('(?im)^[a-z]+$');
142 | regex.ignoreCase; // -> true
143 | regex.multiline; // -> true
144 | 
145 | 146 |

When creating a regex, it's okay to include flags in a mode modifier that are also provided via the separate flags argument. For instance, XRegExp('(?s).+', 's') is valid.

147 | 148 |

Flags g, y, and d cannot be included in a mode modifier, or an error is thrown. This is because g, y, and d, unlike all other flags, have no impact on the meaning of a regex. Rather, they change how particular methods choose to apply the regex. XRegExp methods provide e.g. scope, sticky, and pos arguments that allow you to use and change such functionality on a per-run rather than per-regex basis. Additionally, consider that it makes sense to apply all other flags to a particular subsection of a regex, whereas flags g, y, and d only make sense when applied to the regex as a whole. Allowing g, y, and d in a mode modifier might therefore create future compatibility problems.

149 | 150 |

The use of unknown flags in a mode modifier causes an error to be thrown. However, XRegExp addons can add new flags that are then automatically valid within mode modifiers.

151 | 152 |

Annotations

153 |
    154 |
  • Rationale: Mode modifiers allow you to enable flags in situations where a regex pattern can be provided as a string only. They can also improve readability, since flags are read first rather than after the pattern.
  • 155 |
  • Compatibility: No known problems with this syntax; it is illegal in native JavaScript regular expressions.
  • 156 |
  • Compatibility with other regex flavors: Some regex flavors support the use of multiple mode modifiers anywhere in a pattern, and allow extended syntax for unsetting flags via (?-i), simultaneously setting and unsetting flags via (?i-m), and enabling flags for subpatterns only via (?i:…). XRegExp does not support these extended options.
  • 157 |
  • Prior art: The syntax comes from Perl. It is also available in .NET, Java, PCRE, Python, Ruby, and Tcl, among other regular expression flavors.
  • 158 |
159 | 160 | 161 |

Stricter error handling

162 | 163 |

XRegExp makes any escaped letters or numbers a SyntaxError unless they form a valid and complete metasequence or backreference. This helps to catch errors early, and makes it safe for future versions of ES or XRegExp to introduce new escape sequences. It also means that octal escapes are always an error in XRegExp. ES3/5 do not allow octal escapes, but browsers support them anyway for backward compatibility, which often leads to unintended behavior.

164 | 165 |

XRegExp requires all backreferences, whether written as \n, \k<n>, or \k<name>, to appear to the right of the opening parenthesis of the group they reference.

166 | 167 |

XRegExp never allows \n-style backreferences to be followed by literal numbers. To match backreference 1 followed by a literal 2 character, you can use, e.g., (a)\k<1>2, (?x)(a)\1 2, or (a)\1(?#)2.

168 | 169 | 170 |

Unicode

171 | 172 |

XRegExp supports matching Unicode categories, scripts, and other properties via addon scripts. Such tokens are matched using \p{…}, \P{…}, and \p{^…}. See XRegExp Unicode addons for more details.

173 | 174 |

XRegExp additionally supports the \u{N…} syntax for matching individual code points. In ES6 this is supported natively, but only when using the u flag. XRegExp supports this syntax for code points 0FFFF even when not using the u flag, and it supports the complete Unicode range 010FFFF when using u.

175 | 176 | 177 |

Replacement text

178 | 179 |

XRegExp's replacement text syntax is used by the XRegExp.replace function. It adds $0 as a synonym of $& (to refer to the entire match), and adds $<n> and ${n} for backreferences to named and numbered capturing groups (in addition to $1, etc.). When the braces syntax is used for numbered backreferences, it allows numbers with three or more digits (not possible natively) and allows separating a backreference from an immediately-following digit (not always possible natively). XRegExp uses stricter replacement text error handling than native JavaScript, to help you catch errors earlier (e.g., the use of a $ character that isn't part of a valid metasequence causes an error to be thrown).

180 | 181 |

Following are the special tokens that can be used in XRegExp replacement strings:

182 | 183 |
    184 |
  • $$ - Inserts a literal $ character.
  • 185 |
  • $&, $0 - Inserts the matched substring.
  • 186 |
  • $` - Inserts the string that precedes the matched substring (left context).
  • 187 |
  • $' - Inserts the string that follows the matched substring (right context).
  • 188 |
  • $n, $nn - Where n/nn are digits referencing an existing capturing group, inserts 189 | backreference n/nn.
  • 190 |
  • $<n>, ${n} - Where n is a name or any number of digits that reference an existent capturing 191 | group, inserts backreference n.
  • 192 |
193 | 194 |

XRegExp behavior for $<n> and ${n}:

195 | 196 |
    197 |
  • Backreference to numbered capture, if n is an integer. Use 0 for the entire match. Any number of leading zeros may be used.
  • 198 |
  • Backreference to named capture n, if it exists. Does not overlap with numbered capture since XRegExp does not allow named capture to use a bare integer as the name.
  • 199 |
  • If the name or number does not refer to an existing capturing group, it's an error.
  • 200 |
201 | 202 |

XRegExp behavior for $n and $nn:

203 | 204 |
    205 |
  • Backreferences without curly braces end after 1 or 2 digits. Use ${…} for more digits.
  • 206 |
  • $1 is an error if there are no capturing groups.
  • 207 |
  • $10 is an error if there are less than 10 capturing groups. Use ${1}0 instead.
  • 208 |
  • $01 is equivalent to $1 if a capturing group exists, otherwise it's an error.
  • 209 |
  • $0 (not followed by 1-9) and $00 are the entire match.
  • 210 |
211 | 212 |

For comparison, following is JavaScript's native behavior for $n and $nn:

213 | 214 |
    215 |
  • Backreferences end after 1 or 2 digits. Cannot use backreference to capturing group 100+.
  • 216 |
  • $1 is a literal $1 if there are no capturing groups.
  • 217 |
  • $10 is $1 followed by a literal 0 if there are less than 10 capturing groups.
  • 218 |
  • $01 is equivalent to $1 if a capturing group exists, otherwise it's a literal $01.
  • 219 |
  • $0 is a literal $0.
  • 220 |
221 | 222 | 223 | 224 | 225 | 226 |
227 |
228 | 231 | 232 | 233 | -------------------------------------------------------------------------------- /tools/output/properties.js: -------------------------------------------------------------------------------- 1 | module.exports = [ 2 | { 3 | 'name': 'ASCII', 4 | 'bmp': '\0-\x7F' 5 | }, 6 | { 7 | 'name': 'Alphabetic', 8 | 'bmp': 'A-Za-z\xAA\xB5\xBA\xC0-\xD6\xD8-\xF6\xF8-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0345\u0370-\u0374\u0376\u0377\u037A-\u037D\u037F\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03F5\u03F7-\u0481\u048A-\u052F\u0531-\u0556\u0559\u0560-\u0588\u05B0-\u05BD\u05BF\u05C1\u05C2\u05C4\u05C5\u05C7\u05D0-\u05EA\u05EF-\u05F2\u0610-\u061A\u0620-\u0657\u0659-\u065F\u066E-\u06D3\u06D5-\u06DC\u06E1-\u06E8\u06ED-\u06EF\u06FA-\u06FC\u06FF\u0710-\u073F\u074D-\u07B1\u07CA-\u07EA\u07F4\u07F5\u07FA\u0800-\u0817\u081A-\u082C\u0840-\u0858\u0860-\u086A\u0870-\u0887\u0889-\u088E\u08A0-\u08C9\u08D4-\u08DF\u08E3-\u08E9\u08F0-\u093B\u093D-\u094C\u094E-\u0950\u0955-\u0963\u0971-\u0983\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BD-\u09C4\u09C7\u09C8\u09CB\u09CC\u09CE\u09D7\u09DC\u09DD\u09DF-\u09E3\u09F0\u09F1\u09FC\u0A01-\u0A03\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A3E-\u0A42\u0A47\u0A48\u0A4B\u0A4C\u0A51\u0A59-\u0A5C\u0A5E\u0A70-\u0A75\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABD-\u0AC5\u0AC7-\u0AC9\u0ACB\u0ACC\u0AD0\u0AE0-\u0AE3\u0AF9-\u0AFC\u0B01-\u0B03\u0B05-\u0B0C\u0B0F\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32\u0B33\u0B35-\u0B39\u0B3D-\u0B44\u0B47\u0B48\u0B4B\u0B4C\u0B56\u0B57\u0B5C\u0B5D\u0B5F-\u0B63\u0B71\u0B82\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCC\u0BD0\u0BD7\u0C00-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3D-\u0C44\u0C46-\u0C48\u0C4A-\u0C4C\u0C55\u0C56\u0C58-\u0C5A\u0C5D\u0C60-\u0C63\u0C80-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBD-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCC\u0CD5\u0CD6\u0CDD\u0CDE\u0CE0-\u0CE3\u0CF1\u0CF2\u0D00-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D-\u0D44\u0D46-\u0D48\u0D4A-\u0D4C\u0D4E\u0D54-\u0D57\u0D5F-\u0D63\u0D7A-\u0D7F\u0D81-\u0D83\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DF2\u0DF3\u0E01-\u0E3A\u0E40-\u0E46\u0E4D\u0E81\u0E82\u0E84\u0E86-\u0E8A\u0E8C-\u0EA3\u0EA5\u0EA7-\u0EB9\u0EBB-\u0EBD\u0EC0-\u0EC4\u0EC6\u0ECD\u0EDC-\u0EDF\u0F00\u0F40-\u0F47\u0F49-\u0F6C\u0F71-\u0F81\u0F88-\u0F97\u0F99-\u0FBC\u1000-\u1036\u1038\u103B-\u103F\u1050-\u108F\u109A-\u109D\u10A0-\u10C5\u10C7\u10CD\u10D0-\u10FA\u10FC-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u1380-\u138F\u13A0-\u13F5\u13F8-\u13FD\u1401-\u166C\u166F-\u167F\u1681-\u169A\u16A0-\u16EA\u16EE-\u16F8\u1700-\u1713\u171F-\u1733\u1740-\u1753\u1760-\u176C\u176E-\u1770\u1772\u1773\u1780-\u17B3\u17B6-\u17C8\u17D7\u17DC\u1820-\u1878\u1880-\u18AA\u18B0-\u18F5\u1900-\u191E\u1920-\u192B\u1930-\u1938\u1950-\u196D\u1970-\u1974\u1980-\u19AB\u19B0-\u19C9\u1A00-\u1A1B\u1A20-\u1A5E\u1A61-\u1A74\u1AA7\u1ABF\u1AC0\u1ACC-\u1ACE\u1B00-\u1B33\u1B35-\u1B43\u1B45-\u1B4C\u1B80-\u1BA9\u1BAC-\u1BAF\u1BBA-\u1BE5\u1BE7-\u1BF1\u1C00-\u1C36\u1C4D-\u1C4F\u1C5A-\u1C7D\u1C80-\u1C88\u1C90-\u1CBA\u1CBD-\u1CBF\u1CE9-\u1CEC\u1CEE-\u1CF3\u1CF5\u1CF6\u1CFA\u1D00-\u1DBF\u1DE7-\u1DF4\u1E00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2071\u207F\u2090-\u209C\u2102\u2107\u210A-\u2113\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u212F-\u2139\u213C-\u213F\u2145-\u2149\u214E\u2160-\u2188\u24B6-\u24E9\u2C00-\u2CE4\u2CEB-\u2CEE\u2CF2\u2CF3\u2D00-\u2D25\u2D27\u2D2D\u2D30-\u2D67\u2D6F\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\u2DE0-\u2DFF\u2E2F\u3005-\u3007\u3021-\u3029\u3031-\u3035\u3038-\u303C\u3041-\u3096\u309D-\u309F\u30A1-\u30FA\u30FC-\u30FF\u3105-\u312F\u3131-\u318E\u31A0-\u31BF\u31F0-\u31FF\u3400-\u4DBF\u4E00-\uA48C\uA4D0-\uA4FD\uA500-\uA60C\uA610-\uA61F\uA62A\uA62B\uA640-\uA66E\uA674-\uA67B\uA67F-\uA6EF\uA717-\uA71F\uA722-\uA788\uA78B-\uA7CA\uA7D0\uA7D1\uA7D3\uA7D5-\uA7D9\uA7F2-\uA805\uA807-\uA827\uA840-\uA873\uA880-\uA8C3\uA8C5\uA8F2-\uA8F7\uA8FB\uA8FD-\uA8FF\uA90A-\uA92A\uA930-\uA952\uA960-\uA97C\uA980-\uA9B2\uA9B4-\uA9BF\uA9CF\uA9E0-\uA9EF\uA9FA-\uA9FE\uAA00-\uAA36\uAA40-\uAA4D\uAA60-\uAA76\uAA7A-\uAABE\uAAC0\uAAC2\uAADB-\uAADD\uAAE0-\uAAEF\uAAF2-\uAAF5\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E\uAB30-\uAB5A\uAB5C-\uAB69\uAB70-\uABEA\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uF900-\uFA6D\uFA70-\uFAD9\uFB00-\uFB06\uFB13-\uFB17\uFB1D-\uFB28\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE70-\uFE74\uFE76-\uFEFC\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC', 9 | 'astral': '\uD800[\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1F\uDF2D-\uDF4A\uDF50-\uDF7A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5]|\uD801[\uDC00-\uDC9D\uDCB0-\uDCD3\uDCD8-\uDCFB\uDD00-\uDD27\uDD30-\uDD63\uDD70-\uDD7A\uDD7C-\uDD8A\uDD8C-\uDD92\uDD94\uDD95\uDD97-\uDDA1\uDDA3-\uDDB1\uDDB3-\uDDB9\uDDBB\uDDBC\uDE00-\uDF36\uDF40-\uDF55\uDF60-\uDF67\uDF80-\uDF85\uDF87-\uDFB0\uDFB2-\uDFBA]|\uD802[\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDC60-\uDC76\uDC80-\uDC9E\uDCE0-\uDCF2\uDCF4\uDCF5\uDD00-\uDD15\uDD20-\uDD39\uDD80-\uDDB7\uDDBE\uDDBF\uDE00-\uDE03\uDE05\uDE06\uDE0C-\uDE13\uDE15-\uDE17\uDE19-\uDE35\uDE60-\uDE7C\uDE80-\uDE9C\uDEC0-\uDEC7\uDEC9-\uDEE4\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72\uDF80-\uDF91]|\uD803[\uDC00-\uDC48\uDC80-\uDCB2\uDCC0-\uDCF2\uDD00-\uDD27\uDE80-\uDEA9\uDEAB\uDEAC\uDEB0\uDEB1\uDF00-\uDF1C\uDF27\uDF30-\uDF45\uDF70-\uDF81\uDFB0-\uDFC4\uDFE0-\uDFF6]|\uD804[\uDC00-\uDC45\uDC71-\uDC75\uDC82-\uDCB8\uDCC2\uDCD0-\uDCE8\uDD00-\uDD32\uDD44-\uDD47\uDD50-\uDD72\uDD76\uDD80-\uDDBF\uDDC1-\uDDC4\uDDCE\uDDCF\uDDDA\uDDDC\uDE00-\uDE11\uDE13-\uDE34\uDE37\uDE3E\uDE80-\uDE86\uDE88\uDE8A-\uDE8D\uDE8F-\uDE9D\uDE9F-\uDEA8\uDEB0-\uDEE8\uDF00-\uDF03\uDF05-\uDF0C\uDF0F\uDF10\uDF13-\uDF28\uDF2A-\uDF30\uDF32\uDF33\uDF35-\uDF39\uDF3D-\uDF44\uDF47\uDF48\uDF4B\uDF4C\uDF50\uDF57\uDF5D-\uDF63]|\uD805[\uDC00-\uDC41\uDC43-\uDC45\uDC47-\uDC4A\uDC5F-\uDC61\uDC80-\uDCC1\uDCC4\uDCC5\uDCC7\uDD80-\uDDB5\uDDB8-\uDDBE\uDDD8-\uDDDD\uDE00-\uDE3E\uDE40\uDE44\uDE80-\uDEB5\uDEB8\uDF00-\uDF1A\uDF1D-\uDF2A\uDF40-\uDF46]|\uD806[\uDC00-\uDC38\uDCA0-\uDCDF\uDCFF-\uDD06\uDD09\uDD0C-\uDD13\uDD15\uDD16\uDD18-\uDD35\uDD37\uDD38\uDD3B\uDD3C\uDD3F-\uDD42\uDDA0-\uDDA7\uDDAA-\uDDD7\uDDDA-\uDDDF\uDDE1\uDDE3\uDDE4\uDE00-\uDE32\uDE35-\uDE3E\uDE50-\uDE97\uDE9D\uDEB0-\uDEF8]|\uD807[\uDC00-\uDC08\uDC0A-\uDC36\uDC38-\uDC3E\uDC40\uDC72-\uDC8F\uDC92-\uDCA7\uDCA9-\uDCB6\uDD00-\uDD06\uDD08\uDD09\uDD0B-\uDD36\uDD3A\uDD3C\uDD3D\uDD3F-\uDD41\uDD43\uDD46\uDD47\uDD60-\uDD65\uDD67\uDD68\uDD6A-\uDD8E\uDD90\uDD91\uDD93-\uDD96\uDD98\uDEE0-\uDEF6\uDFB0]|\uD808[\uDC00-\uDF99]|\uD809[\uDC00-\uDC6E\uDC80-\uDD43]|\uD80B[\uDF90-\uDFF0]|[\uD80C\uD81C-\uD820\uD822\uD840-\uD868\uD86A-\uD86C\uD86F-\uD872\uD874-\uD879\uD880-\uD883][\uDC00-\uDFFF]|\uD80D[\uDC00-\uDC2E]|\uD811[\uDC00-\uDE46]|\uD81A[\uDC00-\uDE38\uDE40-\uDE5E\uDE70-\uDEBE\uDED0-\uDEED\uDF00-\uDF2F\uDF40-\uDF43\uDF63-\uDF77\uDF7D-\uDF8F]|\uD81B[\uDE40-\uDE7F\uDF00-\uDF4A\uDF4F-\uDF87\uDF8F-\uDF9F\uDFE0\uDFE1\uDFE3\uDFF0\uDFF1]|\uD821[\uDC00-\uDFF7]|\uD823[\uDC00-\uDCD5\uDD00-\uDD08]|\uD82B[\uDFF0-\uDFF3\uDFF5-\uDFFB\uDFFD\uDFFE]|\uD82C[\uDC00-\uDD22\uDD50-\uDD52\uDD64-\uDD67\uDD70-\uDEFB]|\uD82F[\uDC00-\uDC6A\uDC70-\uDC7C\uDC80-\uDC88\uDC90-\uDC99\uDC9E]|\uD835[\uDC00-\uDC54\uDC56-\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD1E-\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD52-\uDEA5\uDEA8-\uDEC0\uDEC2-\uDEDA\uDEDC-\uDEFA\uDEFC-\uDF14\uDF16-\uDF34\uDF36-\uDF4E\uDF50-\uDF6E\uDF70-\uDF88\uDF8A-\uDFA8\uDFAA-\uDFC2\uDFC4-\uDFCB]|\uD837[\uDF00-\uDF1E]|\uD838[\uDC00-\uDC06\uDC08-\uDC18\uDC1B-\uDC21\uDC23\uDC24\uDC26-\uDC2A\uDD00-\uDD2C\uDD37-\uDD3D\uDD4E\uDE90-\uDEAD\uDEC0-\uDEEB]|\uD839[\uDFE0-\uDFE6\uDFE8-\uDFEB\uDFED\uDFEE\uDFF0-\uDFFE]|\uD83A[\uDC00-\uDCC4\uDD00-\uDD43\uDD47\uDD4B]|\uD83B[\uDE00-\uDE03\uDE05-\uDE1F\uDE21\uDE22\uDE24\uDE27\uDE29-\uDE32\uDE34-\uDE37\uDE39\uDE3B\uDE42\uDE47\uDE49\uDE4B\uDE4D-\uDE4F\uDE51\uDE52\uDE54\uDE57\uDE59\uDE5B\uDE5D\uDE5F\uDE61\uDE62\uDE64\uDE67-\uDE6A\uDE6C-\uDE72\uDE74-\uDE77\uDE79-\uDE7C\uDE7E\uDE80-\uDE89\uDE8B-\uDE9B\uDEA1-\uDEA3\uDEA5-\uDEA9\uDEAB-\uDEBB]|\uD83C[\uDD30-\uDD49\uDD50-\uDD69\uDD70-\uDD89]|\uD869[\uDC00-\uDEDF\uDF00-\uDFFF]|\uD86D[\uDC00-\uDF38\uDF40-\uDFFF]|\uD86E[\uDC00-\uDC1D\uDC20-\uDFFF]|\uD873[\uDC00-\uDEA1\uDEB0-\uDFFF]|\uD87A[\uDC00-\uDFE0]|\uD87E[\uDC00-\uDE1D]|\uD884[\uDC00-\uDF4A]' 10 | }, 11 | { 12 | 'name': 'Any', 13 | 'isBmpLast': true, 14 | 'bmp': '\0-\uFFFF', 15 | 'astral': '[\uD800-\uDBFF][\uDC00-\uDFFF]' 16 | }, 17 | { 18 | 'name': 'Default_Ignorable_Code_Point', 19 | 'bmp': '\xAD\u034F\u061C\u115F\u1160\u17B4\u17B5\u180B-\u180F\u200B-\u200F\u202A-\u202E\u2060-\u206F\u3164\uFE00-\uFE0F\uFEFF\uFFA0\uFFF0-\uFFF8', 20 | 'astral': '\uD82F[\uDCA0-\uDCA3]|\uD834[\uDD73-\uDD7A]|[\uDB40-\uDB43][\uDC00-\uDFFF]' 21 | }, 22 | { 23 | 'name': 'Lowercase', 24 | 'bmp': 'a-z\xAA\xB5\xBA\xDF-\xF6\xF8-\xFF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E-\u0180\u0183\u0185\u0188\u018C\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F\u0240\u0242\u0247\u0249\u024B\u024D\u024F-\u0293\u0295-\u02B8\u02C0\u02C1\u02E0-\u02E4\u0345\u0371\u0373\u0377\u037A-\u037D\u0390\u03AC-\u03CE\u03D0\u03D1\u03D5-\u03D7\u03D9\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F3\u03F5\u03F8\u03FB\u03FC\u0430-\u045F\u0461\u0463\u0465\u0467\u0469\u046B\u046D\u046F\u0471\u0473\u0475\u0477\u0479\u047B\u047D\u047F\u0481\u048B\u048D\u048F\u0491\u0493\u0495\u0497\u0499\u049B\u049D\u049F\u04A1\u04A3\u04A5\u04A7\u04A9\u04AB\u04AD\u04AF\u04B1\u04B3\u04B5\u04B7\u04B9\u04BB\u04BD\u04BF\u04C2\u04C4\u04C6\u04C8\u04CA\u04CC\u04CE\u04CF\u04D1\u04D3\u04D5\u04D7\u04D9\u04DB\u04DD\u04DF\u04E1\u04E3\u04E5\u04E7\u04E9\u04EB\u04ED\u04EF\u04F1\u04F3\u04F5\u04F7\u04F9\u04FB\u04FD\u04FF\u0501\u0503\u0505\u0507\u0509\u050B\u050D\u050F\u0511\u0513\u0515\u0517\u0519\u051B\u051D\u051F\u0521\u0523\u0525\u0527\u0529\u052B\u052D\u052F\u0560-\u0588\u10D0-\u10FA\u10FD-\u10FF\u13F8-\u13FD\u1C80-\u1C88\u1D00-\u1DBF\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFF-\u1F07\u1F10-\u1F15\u1F20-\u1F27\u1F30-\u1F37\u1F40-\u1F45\u1F50-\u1F57\u1F60-\u1F67\u1F70-\u1F7D\u1F80-\u1F87\u1F90-\u1F97\u1FA0-\u1FA7\u1FB0-\u1FB4\u1FB6\u1FB7\u1FBE\u1FC2-\u1FC4\u1FC6\u1FC7\u1FD0-\u1FD3\u1FD6\u1FD7\u1FE0-\u1FE7\u1FF2-\u1FF4\u1FF6\u1FF7\u2071\u207F\u2090-\u209C\u210A\u210E\u210F\u2113\u212F\u2134\u2139\u213C\u213D\u2146-\u2149\u214E\u2170-\u217F\u2184\u24D0-\u24E9\u2C30-\u2C5F\u2C61\u2C65\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73\u2C74\u2C76-\u2C7D\u2C81\u2C83\u2C85\u2C87\u2C89\u2C8B\u2C8D\u2C8F\u2C91\u2C93\u2C95\u2C97\u2C99\u2C9B\u2C9D\u2C9F\u2CA1\u2CA3\u2CA5\u2CA7\u2CA9\u2CAB\u2CAD\u2CAF\u2CB1\u2CB3\u2CB5\u2CB7\u2CB9\u2CBB\u2CBD\u2CBF\u2CC1\u2CC3\u2CC5\u2CC7\u2CC9\u2CCB\u2CCD\u2CCF\u2CD1\u2CD3\u2CD5\u2CD7\u2CD9\u2CDB\u2CDD\u2CDF\u2CE1\u2CE3\u2CE4\u2CEC\u2CEE\u2CF3\u2D00-\u2D25\u2D27\u2D2D\uA641\uA643\uA645\uA647\uA649\uA64B\uA64D\uA64F\uA651\uA653\uA655\uA657\uA659\uA65B\uA65D\uA65F\uA661\uA663\uA665\uA667\uA669\uA66B\uA66D\uA681\uA683\uA685\uA687\uA689\uA68B\uA68D\uA68F\uA691\uA693\uA695\uA697\uA699\uA69B-\uA69D\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7BB\uA7BD\uA7BF\uA7C1\uA7C3\uA7C8\uA7CA\uA7D1\uA7D3\uA7D5\uA7D7\uA7D9\uA7F6\uA7F8-\uA7FA\uAB30-\uAB5A\uAB5C-\uAB68\uAB70-\uABBF\uFB00-\uFB06\uFB13-\uFB17\uFF41-\uFF5A', 25 | 'astral': '\uD801[\uDC28-\uDC4F\uDCD8-\uDCFB\uDD97-\uDDA1\uDDA3-\uDDB1\uDDB3-\uDDB9\uDDBB\uDDBC\uDF80\uDF83-\uDF85\uDF87-\uDFB0\uDFB2-\uDFBA]|\uD803[\uDCC0-\uDCF2]|\uD806[\uDCC0-\uDCDF]|\uD81B[\uDE60-\uDE7F]|\uD835[\uDC1A-\uDC33\uDC4E-\uDC54\uDC56-\uDC67\uDC82-\uDC9B\uDCB6-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDCCF\uDCEA-\uDD03\uDD1E-\uDD37\uDD52-\uDD6B\uDD86-\uDD9F\uDDBA-\uDDD3\uDDEE-\uDE07\uDE22-\uDE3B\uDE56-\uDE6F\uDE8A-\uDEA5\uDEC2-\uDEDA\uDEDC-\uDEE1\uDEFC-\uDF14\uDF16-\uDF1B\uDF36-\uDF4E\uDF50-\uDF55\uDF70-\uDF88\uDF8A-\uDF8F\uDFAA-\uDFC2\uDFC4-\uDFC9\uDFCB]|\uD837[\uDF00-\uDF09\uDF0B-\uDF1E]|\uD83A[\uDD22-\uDD43]' 26 | }, 27 | { 28 | 'name': 'Noncharacter_Code_Point', 29 | 'bmp': '\uFDD0-\uFDEF\uFFFE\uFFFF', 30 | 'astral': '[\uD83F\uD87F\uD8BF\uD8FF\uD93F\uD97F\uD9BF\uD9FF\uDA3F\uDA7F\uDABF\uDAFF\uDB3F\uDB7F\uDBBF\uDBFF][\uDFFE\uDFFF]' 31 | }, 32 | { 33 | 'name': 'Uppercase', 34 | 'bmp': 'A-Z\xC0-\xD6\xD8-\xDE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u0370\u0372\u0376\u037F\u0386\u0388-\u038A\u038C\u038E\u038F\u0391-\u03A1\u03A3-\u03AB\u03CF\u03D2-\u03D4\u03D8\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F4\u03F7\u03F9\u03FA\u03FD-\u042F\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476\u0478\u047A\u047C\u047E\u0480\u048A\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C0\u04C1\u04C3\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04D2\u04D4\u04D6\u04D8\u04DA\u04DC\u04DE\u04E0\u04E2\u04E4\u04E6\u04E8\u04EA\u04EC\u04EE\u04F0\u04F2\u04F4\u04F6\u04F8\u04FA\u04FC\u04FE\u0500\u0502\u0504\u0506\u0508\u050A\u050C\u050E\u0510\u0512\u0514\u0516\u0518\u051A\u051C\u051E\u0520\u0522\u0524\u0526\u0528\u052A\u052C\u052E\u0531-\u0556\u10A0-\u10C5\u10C7\u10CD\u13A0-\u13F5\u1C90-\u1CBA\u1CBD-\u1CBF\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFE\u1F08-\u1F0F\u1F18-\u1F1D\u1F28-\u1F2F\u1F38-\u1F3F\u1F48-\u1F4D\u1F59\u1F5B\u1F5D\u1F5F\u1F68-\u1F6F\u1FB8-\u1FBB\u1FC8-\u1FCB\u1FD8-\u1FDB\u1FE8-\u1FEC\u1FF8-\u1FFB\u2102\u2107\u210B-\u210D\u2110-\u2112\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u2130-\u2133\u213E\u213F\u2145\u2160-\u216F\u2183\u24B6-\u24CF\u2C00-\u2C2F\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E-\u2C80\u2C82\u2C84\u2C86\u2C88\u2C8A\u2C8C\u2C8E\u2C90\u2C92\u2C94\u2C96\u2C98\u2C9A\u2C9C\u2C9E\u2CA0\u2CA2\u2CA4\u2CA6\u2CA8\u2CAA\u2CAC\u2CAE\u2CB0\u2CB2\u2CB4\u2CB6\u2CB8\u2CBA\u2CBC\u2CBE\u2CC0\u2CC2\u2CC4\u2CC6\u2CC8\u2CCA\u2CCC\u2CCE\u2CD0\u2CD2\u2CD4\u2CD6\u2CD8\u2CDA\u2CDC\u2CDE\u2CE0\u2CE2\u2CEB\u2CED\u2CF2\uA640\uA642\uA644\uA646\uA648\uA64A\uA64C\uA64E\uA650\uA652\uA654\uA656\uA658\uA65A\uA65C\uA65E\uA660\uA662\uA664\uA666\uA668\uA66A\uA66C\uA680\uA682\uA684\uA686\uA688\uA68A\uA68C\uA68E\uA690\uA692\uA694\uA696\uA698\uA69A\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\uA7BA\uA7BC\uA7BE\uA7C0\uA7C2\uA7C4-\uA7C7\uA7C9\uA7D0\uA7D6\uA7D8\uA7F5\uFF21-\uFF3A', 35 | 'astral': '\uD801[\uDC00-\uDC27\uDCB0-\uDCD3\uDD70-\uDD7A\uDD7C-\uDD8A\uDD8C-\uDD92\uDD94\uDD95]|\uD803[\uDC80-\uDCB2]|\uD806[\uDCA0-\uDCBF]|\uD81B[\uDE40-\uDE5F]|\uD835[\uDC00-\uDC19\uDC34-\uDC4D\uDC68-\uDC81\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB5\uDCD0-\uDCE9\uDD04\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD38\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD6C-\uDD85\uDDA0-\uDDB9\uDDD4-\uDDED\uDE08-\uDE21\uDE3C-\uDE55\uDE70-\uDE89\uDEA8-\uDEC0\uDEE2-\uDEFA\uDF1C-\uDF34\uDF56-\uDF6E\uDF90-\uDFA8\uDFCA]|\uD83A[\uDD00-\uDD21]|\uD83C[\uDD30-\uDD49\uDD50-\uDD69\uDD70-\uDD89]' 36 | }, 37 | { 38 | 'name': 'White_Space', 39 | 'bmp': '\t-\r \x85\xA0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000' 40 | } 41 | ]; 42 | --------------------------------------------------------------------------------