├── coverage └── .gitkeep ├── .prettierrc.js ├── index.mjs ├── browser.ts ├── .gitignore ├── .npmignore ├── tsconfig.json ├── webpack.config.js ├── jest.config.js ├── .github └── workflows │ └── test-and-cov.yml ├── .eslintrc.js ├── CHANGELOG.md ├── LICENSE ├── package.json ├── README.md ├── lib ├── index.d.ts └── index.js.map ├── __tests__ └── index.test.ts ├── dist └── reregexp.min.js └── src └── index.ts /coverage/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.prettierrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | semi: true, 3 | trailingComma: "all", 4 | singleQuote: true, 5 | tabWidth: 2 6 | }; -------------------------------------------------------------------------------- /index.mjs: -------------------------------------------------------------------------------- 1 | import mod from './lib/index.js'; 2 | export default mod.default; 3 | export const { CharsetHelper, parserRule, regexpRule } = mod; 4 | -------------------------------------------------------------------------------- /browser.ts: -------------------------------------------------------------------------------- 1 | import ReRegExp from './src/index'; 2 | const global: typeof window & { 3 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 4 | [index: string]: any; 5 | } = window; 6 | global['ReRegExp'] = ReRegExp; 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # node modules 2 | node_modules/ 3 | 4 | # vscode 5 | .vscode/* 6 | 7 | # build 8 | /local 9 | /coverage 10 | .husky/ 11 | 12 | # system temp files 13 | .DS_Store* 14 | 15 | # use pnpm 16 | package-lock.json 17 | .yarn.lock 18 | .pnpm-store/ -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | # dir 2 | /src/ 3 | /local/ 4 | /__tests__/ 5 | /dist/ 6 | /node_modules/ 7 | /coverage 8 | 9 | # files 10 | browser.ts 11 | 12 | # system temp file 13 | .DS_Store 14 | 15 | # vscode config 16 | .vscode 17 | 18 | # package 19 | yarn.lock 20 | package-lock.json 21 | 22 | # config files 23 | .travis.yml 24 | webpack.config.js 25 | jest.config.js 26 | .prettierrc.js 27 | .eslintrc.js 28 | 29 | # husky 30 | .husky/ 31 | 32 | # git action 33 | .github/ -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "moduleResolution":"node", 5 | "target": "es5", 6 | "noImplicitAny": true, 7 | "removeComments": true, 8 | "preserveConstEnums": true, 9 | "outDir": "./lib", 10 | "sourceMap": true, 11 | "declaration": true, 12 | "lib": [ 13 | "es2017", 14 | "dom" 15 | ], 16 | "baseUrl": ".", 17 | }, 18 | "include": [ 19 | "src/**/*" 20 | ], 21 | "exclude": [ 22 | "node_modules", 23 | "lib/*", 24 | "**/*.spec.ts", 25 | "src/local" 26 | ] 27 | } -------------------------------------------------------------------------------- /webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | 3 | module.exports = { 4 | entry: './browser.ts', 5 | module: { 6 | rules: [ 7 | { 8 | test: /\.ts$/, 9 | use: { 10 | loader: 'ts-loader', 11 | options: { 12 | transpileOnly: true, 13 | }, 14 | }, 15 | exclude: /node_modules|__tests__|lib|dist/, 16 | }, 17 | ], 18 | }, 19 | resolve: { 20 | extensions: ['.ts', '.js'], 21 | }, 22 | output: { 23 | filename: 'reregexp.min.js', 24 | path: path.resolve(__dirname, 'dist'), 25 | }, 26 | }; 27 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | const { defaults: tsjPreset } = require('ts-jest/presets'); 2 | module.exports = { 3 | testEnvironment: 'node', 4 | transform: Object.assign({}, tsjPreset.transform), 5 | verbose: true, 6 | testRegex: '(/__tests__/.*|(\\.|/)(test|spec))\\.(jsx?|tsx?)$', 7 | testPathIgnorePatterns: [ 8 | '/lib/', 9 | '/dist/', 10 | '/node_modules/', 11 | '/local/', 12 | ], 13 | coverageDirectory: '/coverage', 14 | collectCoverage: true, 15 | collectCoverageFrom: ['src/**/*.{ts,js}', '!/node_modules/'], 16 | moduleFileExtensions: ['ts', 'js'], 17 | moduleNameMapper: { 18 | '^@/(.*)': '/src/$1', 19 | }, 20 | rootDir: '.', 21 | }; 22 | -------------------------------------------------------------------------------- /.github/workflows/test-and-cov.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | pull_request: 7 | branches: [master] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | 13 | strategy: 14 | matrix: 15 | node-version: [16.x, 18.x] 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Use Node.js ${{ matrix.node-version }} 20 | uses: actions/setup-node@v3 21 | with: 22 | node-version: ${{ matrix.node-version }} 23 | - name: Install dependencies 24 | run: npm install -g pnpm@8 && pnpm i 25 | - name: Run tests 26 | run: pnpm test 27 | - name: Run coverage 28 | uses: coverallsapp/github-action@v2 29 | with: 30 | github-token: ${{ secrets.GITHUB_TOKEN }} 31 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | parser: '@typescript-eslint/parser', 3 | parserOptions: { 4 | ecmaVersion: 2020, 5 | sourceType: 'module', 6 | }, 7 | env: { 8 | jest: true, 9 | browser: true, 10 | }, 11 | extends: [ 12 | 'plugin:@typescript-eslint/recommended', 13 | 'prettier/@typescript-eslint', 14 | 'plugin:prettier/recommended', 15 | ], 16 | rules: { 17 | '@typescript-eslint/no-unused-vars': [ 18 | 'error', 19 | { argsIgnorePattern: '^_', varsIgnorePattern: '^_' }, 20 | ], 21 | 'no-console': 'warn', 22 | }, 23 | overrides: [ 24 | { 25 | files: [ 26 | 'jest.config.js', 27 | 'webpack.config.js', 28 | '.prettierrc.js', 29 | '.eslintrc.js', 30 | ], 31 | env: { 32 | node: true, 33 | }, 34 | extends: ['plugin:prettier/recommended'], 35 | rules: { 36 | '@typescript-eslint/no-var-requires': 'off', 37 | }, 38 | }, 39 | ], 40 | }; 41 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | The changelog of the reregexp library. 4 | 5 | ## [1.6.1] - 2023-05-21 6 | 7 | ### Added 8 | 9 | - Add ESM module export support. 10 | 11 | ## [1.6.0] - 2022-05-09 12 | 13 | ### Added 14 | 15 | - Add `charactersOfAny` config, you can either set a global config with the static property `ReRegExp.charactersOfAny` or with a `ParserConf` for an instance `new ReRegExp(context, { charactersOfAny })`, it let you can define which characters can be generated by a `.` character class. 16 | 17 | ## [1.5.0] - 2021-05-19 18 | 19 | ### Added 20 | 21 | - Support unicode property class syntax, e.g. `\p{Letter}`, more details have shown in README. 22 | 23 | ## [1.4.0] - 2021-05-16 24 | 25 | ### Added 26 | 27 | - Add `capture` config field, if you care about the result of the regexp group data. Also add tests for this feature. 28 | 29 | ### Changed 30 | 31 | - Optimize some regexp rules of the parser. 32 | - Change the default export library name from 'RegexpParser' to `ReRegExp` in browser. 33 | - Make the readme more clearly. 34 | - Upgrade the typescript and other tools dependencies versions. 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 jxz_211 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "reregexp", 3 | "version": "1.6.1", 4 | "description": "Generate a random string match a given regular expression, suitable for mocking strings.", 5 | "main": "./lib/index.js", 6 | "typings": "./lib/index.d.ts", 7 | "author": "jxz_211@163.com", 8 | "exports": { 9 | ".": { 10 | "require": "./lib/index.js", 11 | "import": "./index.mjs", 12 | "types": "./lib/index.d.ts" 13 | } 14 | }, 15 | "keywords": [ 16 | "regex", 17 | "regexp", 18 | "random", 19 | "generate", 20 | "mock", 21 | "string", 22 | "toString", 23 | "reverse" 24 | ], 25 | "license": "MIT", 26 | "devDependencies": { 27 | "@babel/eslint-parser": "^7.12.1", 28 | "@types/jest": "^26.0.15", 29 | "@types/node": "^14.14.6", 30 | "@typescript-eslint/eslint-plugin": "^4.33.0", 31 | "@typescript-eslint/parser": "^4.33.0", 32 | "coveralls": "^3.1.0", 33 | "eslint": "^7.32.0", 34 | "eslint-config-prettier": "^6.15.0", 35 | "eslint-plugin-prettier": "^3.1.4", 36 | "jest": "^26.6.3", 37 | "prettier": "^2.8.8", 38 | "ts-jest": "^26.5.6", 39 | "ts-loader": "^8.0.7", 40 | "ts-node": "^9.0.0", 41 | "tsconfig-paths": "^3.6.0", 42 | "typescript": "^4.9.5", 43 | "webpack": "^4.26.0", 44 | "webpack-cli": "^3.1.2" 45 | }, 46 | "scripts": { 47 | "test": "jest --silent", 48 | "local": "ts-node -r tsconfig-paths/register local/test.ts", 49 | "build:browser": "webpack --progress --mode='production'", 50 | "build": "npm run lint && tsc && npm run build:browser", 51 | "prepublish": "npm test && npm run build", 52 | "lint": "eslint 'src/**/*.{ts,js}'" 53 | }, 54 | "repository": { 55 | "type": "git", 56 | "url": "https://github.com/suchjs/reregexp" 57 | }, 58 | "bugs": { 59 | "url": "https://github.com/suchjs/reregexp/issues" 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # reregexp 2 | 3 | [![npm version](https://badge.fury.io/js/reregexp.svg)](https://badge.fury.io/js/reregexp)  [![Build Status](https://travis-ci.org/suchjs/reregexp.svg?branch=master)](https://travis-ci.org/suchjs/reregexp) 4 | [![Coverage Status](https://coveralls.io/repos/github/suchjs/reregexp/badge.svg?branch=master)](https://coveralls.io/github/suchjs/reregexp?branch=master) 5 | 6 | Generate a matched string with a given regular expression, it's useful if you want to mock some strings from a regexp rule. It strictly abide by the standard javascript regex rule, but you still need pay attentions with the [Special cases](#special-cases). 7 | 8 | ## Goals 9 | 10 | - Support named capture group, e.g. `(?\w)\k`, and also allowing to override it by expose a config field `namedGroupConf`. 11 | 12 | - Support unicode property class `\p{Lu}` by setting the static `UPCFactory` handle, see the example for more details. 13 | 14 | - Support `u` flag, so you can use unicode ranges. 15 | 16 | - Allow you get the capture group values. 17 | 18 | ## Installation 19 | 20 | ```bash 21 | # npm 22 | npm install --save reregexp 23 | # or yarn 24 | yarn add reregexp 25 | ``` 26 | 27 | ## Usage 28 | 29 | ```javascript 30 | // Commonjs module 31 | const ReRegExp = require('reregexp').default; 32 | 33 | // ESM module 34 | // since v1.6.1 35 | import ReRegExp from 'reregexp'; 36 | 37 | // before v1.6.1 38 | import re from 'reregexp'; 39 | const ReRegExp = re.default; 40 | 41 | // For the first parameter of the constructor 42 | // You can use a regex literal or a RegExp string 43 | // if you need use some features that are not well supported by all browsers 44 | // such as a named group, you should always choose a RegExp string 45 | 46 | // Example 1: use group reference 47 | const r1 = new ReRegExp(/([a-z0-9]{3})_\1/); 48 | r1.build(); // => 'a2z_a2z' '13d_13d' 49 | 50 | // Example 2: use named group 51 | const r2 = new ReRegExp(/(?\w{1,2})_\1_\k/); 52 | r2.build(); // => 'b5_b5_b5' '9_9_9' 53 | 54 | // Example 3: use named group and with `namedGroupConf` config 55 | // it will use the string in the config insteadof the string that will generated by the named group 56 | // of course, it will trigger an error if the string in config not match the rule of named group. 57 | const r3 = new ReRegExp('/(a)\\1(?b)\\k(?\\w+)/', { 58 | namedGroupConf: { 59 | override: ['cc', 'dd'], 60 | }, 61 | }); 62 | r3.build(); // => "aabbcc" "aabbdd" 63 | 64 | // Example 4: use a character set 65 | const r4 = new ReRegExp(/[^\w\W]+/); 66 | r4.build(); // will throw error, because the [^\w\W] will match nothing. 67 | 68 | // Example 5: also a character set with negative operator 69 | const r5 = new ReRegExp(/[^a-zA-Z0-9_\W]/); 70 | r5.build(); // will throw error, this is the same as [^\w\W] 71 | 72 | // Example 6: with the `i` flag, ignore the case. 73 | const r6 = new ReRegExp(/[a-z]{3}/i); 74 | r6.build(); // => 'bZD' 'Poe' 75 | 76 | // Example 7: with the `u` flag, e.g. make some chinese characters. 77 | const r7 = new ReRegExp('/[\\u{4e00}-\\u{9fcc}]{5,10}/u'); 78 | r7.build(); // => '偤豄酌菵呑', '孜垟与醽奚衜踆猠' 79 | 80 | // Example 8: set a global `maxRepeat` when use quantifier such as '*' and '+'. 81 | ReRegExp.maxRepeat = 10; 82 | const r8 = new ReRegExp(/a*/); 83 | r8.build(); // => 'aaaaaaa', 'a' will repeated at most 10 times. 84 | 85 | // Example 9: use a `maxRepeat` in constructor config, it will override `maxRepeat` of the global. 86 | const r9 = new ReRegExp(/a*/, { 87 | maxRepeat: 20, 88 | }); 89 | r9.build(); // => 'aaaaaaaaaaaaaa', 'a' will repeated at most 20 times 90 | 91 | // Example 10: use a `extractSetAverage` config for character sets. 92 | const r10 = new ReRegExp(/[\Wa-z]/, { 93 | // \W will extract as all the characters match \W, a-z now doesn't have the same chance as \W 94 | extractSetAverage: true, 95 | }); 96 | 97 | // Example 11: use a `capture` config if cared about the capture data 98 | const r11 = new ReRegExp(/(aa?)b(?\w)/), { 99 | capture: true, // if you cared about the group capture data, set the `capture` config true 100 | }); 101 | r11.build(); // => 'abc' 102 | console.log(r11.$1); // => 'a' 103 | console.log(r11.$2); // => 'c' 104 | console.log(r11.groups); // => {named: 'c'} 105 | 106 | // Example 12: use the unicode property class by setting the `UPCFactory` 107 | ReRegExp.UPCFactory = (data: UPCData) => { 108 | /* 109 | UPCData: { 110 | negate: boolean; // if the symbol is 'P' 111 | short: boolean; // take '\pL' as a short for '\p{Letter}' 112 | key?: string; // if has a unicode property name, such as `Script` 113 | value: string; // unicode property value, binary or non-binary 114 | } 115 | */ 116 | return { 117 | generate(){ 118 | return 'x'; // return an object that has a `generate` method. 119 | } 120 | } 121 | }; 122 | const r12 = new ReRegExp('/\\p{Lu}/u'); 123 | console.log(r12.build()); // => 'x', should handle in the `UPCFactory` method. 124 | ``` 125 | 126 | ## Config 127 | 128 | ```typescript 129 | // The meaning of the config fields can seen in the examples. 130 | { 131 | maxRepeat?: number; 132 | namedGroupConf?: { 133 | [index: string]: string[]|boolean; 134 | }; 135 | extractSetAverage?: boolean; 136 | capture?: boolean; 137 | } 138 | ``` 139 | 140 | ## Supported flags 141 | 142 | - `i` ignore case, `/[a-z]/i` is same as `/[a-zA-Z]/` 143 | 144 | - `u` unicode flag 145 | 146 | - `s` dot all flag 147 | 148 | the flags `g` `m` `y` will ignore. 149 | 150 | ## Methods 151 | 152 | `.build()` 153 | 154 | build a string that match the regexp. 155 | 156 | `.info()` 157 | 158 | get a regexp parsed queues, flags, lastRule after remove named captures. 159 | 160 | ```javascript 161 | { 162 | rule: '', 163 | context: '', 164 | flags: [], 165 | lastRule: '', 166 | queues: [], 167 | } 168 | ``` 169 | 170 | ## Build precautions,do not use any regexp anchors. 171 | 172 | 1. `^` `$` the start,end anchors will be ignored. 173 | 2. `(?=)` `(?!)` `(?<=)` `(? { 4 | [index: string]: T; 5 | } 6 | export type $N = `$${1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9}`; 7 | export type Flag = 'i' | 'm' | 'g' | 'u' | 'y' | 's' | 'd'; 8 | export type FlagsHash = { 9 | [key in Flag]?: boolean; 10 | }; 11 | export type FlagsBinary = { 12 | [key in Flag]: number; 13 | }; 14 | export type NamedGroupConf = NormalObject; 15 | export interface FeaturesConfig { 16 | unicode?: boolean; 17 | namedCapture?: boolean; 18 | upc?: boolean; 19 | } 20 | export interface ParserConf { 21 | maxRepeat?: number; 22 | namedGroupConf?: NamedGroupConf>; 23 | extractSetAverage?: boolean; 24 | charactersOfAny?: CodePointRanges | CodePointRangeItem | ((flags?: FlagsHash) => string); 25 | capture?: boolean; 26 | features?: FeaturesConfig; 27 | } 28 | export interface BuildConfData extends ParserConf { 29 | flags: FlagsHash; 30 | namedGroupData: NormalObject; 31 | captureGroupData: NormalObject; 32 | } 33 | export type Result = Pick & { 34 | queues: RegexpPart[]; 35 | }; 36 | export type UPCData = { 37 | negate: boolean; 38 | short: boolean; 39 | key?: string; 40 | value: string; 41 | }; 42 | export type UPCFactory = (data: UPCData) => UPCInstance | never; 43 | export interface UPCInstance { 44 | generate(): string; 45 | } 46 | export declare class CharsetHelper { 47 | static readonly points: CodePointData; 48 | static readonly lens: CodePointData; 49 | static readonly bigCharPoint: [number, number]; 50 | static readonly bigCharTotal: number; 51 | static charsetOfAll(): CodePointResult; 52 | static charsetOfDotall(): CodePointResult; 53 | static charsetOfNegated(type: CharsetCacheType): CodePointResult; 54 | static charsetOf(type: CharsetType): CodePointResult; 55 | static getCharsetInfo(type: CharsetType | CharsetNegatedType | '.', flags?: FlagsHash): CodePointResult; 56 | static make(type: CharsetType | CharsetNegatedType | '.', flags?: FlagsHash): string; 57 | static makeOne(result: CodePointResult): string; 58 | protected static readonly cache: CharsetCache; 59 | protected constructor(); 60 | } 61 | export declare const parserRule: RegExp; 62 | export declare const regexpRule: RegExp; 63 | export default class ReRegExp { 64 | readonly rule: string | RegExp; 65 | private config; 66 | static maxRepeat: number; 67 | static features: FeaturesConfig; 68 | static UPCFactory?: UPCFactory; 69 | static charactersOfAny: ParserConf['charactersOfAny']; 70 | readonly context: string; 71 | readonly flags: Flag[]; 72 | readonly lastRule: string; 73 | groups?: NormalObject; 74 | $1: string; 75 | $2: string; 76 | $3: string; 77 | $4: string; 78 | $5: string; 79 | $6: string; 80 | $7: string; 81 | $8: string; 82 | $9: string; 83 | private queues; 84 | private ruleInput; 85 | private flagsHash; 86 | private totalFlagBinary; 87 | private rootQueues; 88 | private hasLookaround; 89 | private hasNullRoot; 90 | private anyCharacterHandle; 91 | private anyCharacterHandleDone; 92 | constructor(rule: string | RegExp, config?: ParserConf); 93 | build(): string | never; 94 | info(): Result; 95 | private parse; 96 | private checkFlags; 97 | private hasFlag; 98 | getFlagsHash(): FlagsHash; 99 | } 100 | export type CharsetType = 'd' | 'w' | 's'; 101 | export type CharsetNegatedType = 'D' | 'W' | 'S'; 102 | export type CharsetWordType = 'b' | 'B'; 103 | export type CharsetAllType = CharsetType | CharsetNegatedType | CharsetWordType; 104 | export type CharsetCacheType = CharsetNegatedType | typeof SYMBOL_DOTALL | typeof SYMBOL_ALL; 105 | export type CharsetCache = { 106 | [key in CharsetCacheType]?: CodePointResult; 107 | }; 108 | export type CodePointRangeItem = [number, number] | [number]; 109 | export type CodePointRanges = Array; 110 | export type CodePointData = { 111 | [key in CharsetType]: T; 112 | }; 113 | export interface CodePointResult { 114 | ranges: CodePointRanges; 115 | totals: number[]; 116 | } 117 | export interface NumberRange { 118 | min: number; 119 | max: number; 120 | } 121 | export declare abstract class RegexpPart { 122 | input: string; 123 | queues: RegexpPart[]; 124 | codePoint: number; 125 | abstract readonly type: string; 126 | protected parserInstance: ReRegExp; 127 | protected min: number; 128 | protected max: number; 129 | protected dataConf: Partial; 130 | protected buildForTimes: boolean; 131 | protected curParent: RegexpPart; 132 | protected matchNothing: boolean; 133 | protected completed: boolean; 134 | constructor(input?: string); 135 | get parser(): ReRegExp; 136 | set parser(parser: ReRegExp); 137 | get count(): number; 138 | get parent(): RegexpPart; 139 | set parent(value: RegexpPart); 140 | set linkParent(value: RegexpPart); 141 | get isComplete(): boolean; 142 | set isComplete(value: boolean); 143 | get isMatchNothing(): boolean; 144 | set isMatchNothing(value: boolean); 145 | setRange(options: NumberRange): void; 146 | add(target: RegexpPart | RegexpPart[]): void; 147 | pop(): RegexpPart; 148 | build(conf: BuildConfData): string | never; 149 | untilEnd(_context: string): number | void; 150 | setDataConf(_conf: BuildConfData, _result: string): void; 151 | isAncestorOf(target: RegexpPart): boolean; 152 | getRuleInput(_parseReference?: boolean): string; 153 | protected buildRuleInputFromQueues(): string; 154 | protected prebuild(conf: BuildConfData): string | never; 155 | protected getCodePointCount(): number; 156 | } 157 | export declare abstract class RegexpEmpty extends RegexpPart { 158 | constructor(input?: string); 159 | } 160 | export declare abstract class RegexpOrigin extends RegexpPart { 161 | protected prebuild(): string; 162 | } 163 | export declare class RegexpReference extends RegexpPart { 164 | name: string; 165 | readonly type = "reference"; 166 | ref: RegexpGroup | null; 167 | index: number; 168 | constructor(input: string, name?: string); 169 | protected prebuild(conf: BuildConfData): string; 170 | } 171 | export declare class RegexpSpecial extends RegexpEmpty { 172 | readonly special: string; 173 | readonly type = "special"; 174 | constructor(special: string); 175 | } 176 | export declare class RegexpLookaround extends RegexpEmpty { 177 | readonly type = "lookaround"; 178 | readonly looktype: string; 179 | constructor(input: string); 180 | getRuleInput(): string; 181 | } 182 | export declare class RegexpAny extends RegexpPart { 183 | handle?: () => string; 184 | readonly type = "any"; 185 | constructor(handle?: () => string); 186 | static genDiyCharactersHandle(conf: ParserConf & { 187 | flags: FlagsHash; 188 | }): () => string; 189 | protected prebuild(conf: BuildConfData): string; 190 | } 191 | export declare class RegexpNull extends RegexpPart { 192 | readonly type = "null"; 193 | constructor(); 194 | protected prebuild(): string; 195 | } 196 | export declare class RegexpBackspace extends RegexpPart { 197 | readonly type = "backspace"; 198 | constructor(); 199 | protected prebuild(): string; 200 | } 201 | export declare class RegexpBegin extends RegexpEmpty { 202 | readonly type = "begin"; 203 | } 204 | export declare class RegexpControl extends RegexpPart { 205 | readonly type = "control"; 206 | constructor(input: string); 207 | protected prebuild(): string; 208 | } 209 | export declare class RegexpCharset extends RegexpPart { 210 | readonly type = "charset"; 211 | readonly charset: CharsetAllType; 212 | constructor(input: string); 213 | protected prebuild(conf: BuildConfData): string; 214 | protected getCodePointCount(): number; 215 | } 216 | export declare class RegexpPrint extends RegexpPart { 217 | readonly type = "print"; 218 | protected prebuild(): string; 219 | } 220 | export declare class RegexpAnchor extends RegexpEmpty { 221 | readonly type = "anchor"; 222 | anchor: string; 223 | constructor(input: string); 224 | } 225 | export declare class RegexpChar extends RegexpOrigin { 226 | readonly type = "char"; 227 | constructor(input: string); 228 | } 229 | export declare class RegexpTranslateChar extends RegexpOrigin { 230 | readonly type = "translate"; 231 | constructor(input: string); 232 | protected prebuild(): string; 233 | } 234 | export declare class RegexpOctal extends RegexpPart { 235 | readonly type = "octal"; 236 | constructor(input: string); 237 | protected prebuild(): string; 238 | } 239 | export declare class RegexpRefOrNumber extends RegexpPart { 240 | readonly type = "refornumber"; 241 | constructor(input: string); 242 | protected prebuild(): never; 243 | } 244 | export declare abstract class RegexpTimes extends RegexpPart { 245 | readonly type = "times"; 246 | protected readonly maxNum: number; 247 | protected greedy: boolean; 248 | protected abstract readonly rule: RegExp; 249 | protected minRepeat: number; 250 | protected maxRepeat: number; 251 | constructor(); 252 | set target(target: RegexpPart); 253 | untilEnd(context: string): number; 254 | abstract parse(): void; 255 | } 256 | export declare class RegexpTimesMulti extends RegexpTimes { 257 | protected rule: RegExp; 258 | parse(): void; 259 | } 260 | export declare class RegexpTimesQuantifiers extends RegexpTimes { 261 | protected readonly maxNum: number; 262 | protected rule: RegExp; 263 | constructor(maxNum?: number); 264 | parse(): void; 265 | } 266 | export declare class RegexpSet extends RegexpPart { 267 | readonly type = "set"; 268 | reverse: boolean; 269 | private isMatchAnything; 270 | private codePointResult; 271 | constructor(); 272 | set parser(parser: ReRegExp); 273 | get parser(): ReRegExp; 274 | get isComplete(): boolean; 275 | set isComplete(value: boolean); 276 | getRuleInput(): string; 277 | protected prebuild(conf: BuildConfData): string; 278 | protected makeCodePointResult(): void; 279 | } 280 | export declare class RegexpRange extends RegexpPart { 281 | readonly type = "range"; 282 | constructor(); 283 | add(target: RegexpPart): void | never; 284 | getRuleInput(): string; 285 | protected prebuild(): string; 286 | protected getCodePointCount(): number; 287 | } 288 | export declare abstract class RegexpHexCode extends RegexpOrigin { 289 | readonly type = "hexcode"; 290 | protected abstract rule: RegExp; 291 | protected abstract codeType: string; 292 | untilEnd(context: string): number; 293 | } 294 | export declare class RegexpUnicode extends RegexpHexCode { 295 | protected rule: RegExp; 296 | protected codeType: string; 297 | } 298 | export declare class RegexpUnicodeAll extends RegexpHexCode { 299 | protected rule: RegExp; 300 | protected codeType: string; 301 | } 302 | export declare class RegexpASCII extends RegexpHexCode { 303 | protected rule: RegExp; 304 | protected codeType: string; 305 | } 306 | export declare class RegexpUnicodeCategory extends RegexpPart { 307 | private readonly symbol; 308 | type: string; 309 | protected data: UPCData; 310 | protected rule: RegExp; 311 | protected generator: UPCInstance; 312 | constructor(symbol: string); 313 | untilEnd(context: string): number | never; 314 | protected prebuild(): string; 315 | } 316 | export declare class RegexpGroupItem extends RegexpPart { 317 | index: number; 318 | readonly type = "group-item"; 319 | constructor(index: number); 320 | getRuleInput(parseReference?: boolean): string; 321 | prebuild(conf: BuildConfData): string; 322 | private isEndLimitChar; 323 | } 324 | export declare class RegexpGroup extends RegexpPart { 325 | readonly type = "group"; 326 | captureIndex: number; 327 | captureName: string; 328 | queues: RegexpGroupItem[]; 329 | isRoot: boolean; 330 | private curGroupItem; 331 | private curRule; 332 | constructor(); 333 | get isComplete(): boolean; 334 | set isComplete(value: boolean); 335 | addNewGroup(): RegexpGroupItem; 336 | addRootItem(target: RegexpPart[]): void; 337 | addItem(target: RegexpPart): void; 338 | getRuleInput(parseReference?: boolean): string; 339 | protected buildRule(flags: FlagsHash): RegExp | null; 340 | protected prebuild(conf: BuildConfData): string; 341 | } 342 | export {}; 343 | -------------------------------------------------------------------------------- /__tests__/index.test.ts: -------------------------------------------------------------------------------- 1 | import ReRegExp, { ParserConf, CharsetHelper, UPCData } from '../src/index'; 2 | type Rule = RegExp | string; 3 | const validParser = (rule: Rule) => { 4 | return () => { 5 | try { 6 | new ReRegExp(rule); 7 | } catch (e) { 8 | throw e; 9 | } 10 | return true; 11 | }; 12 | }; 13 | const validMatch = (rule: RegExp) => { 14 | return rule.test(new ReRegExp(rule).build()); 15 | }; 16 | const validValue = (rule: Rule, conf: ParserConf = {}) => { 17 | const re = new ReRegExp(rule, conf); 18 | return re.build(); 19 | }; 20 | const validInput = (rule: Rule): boolean => { 21 | return ( 22 | new ReRegExp(rule).lastRule === 23 | (rule instanceof RegExp 24 | ? rule.source 25 | : rule.replace(/^\//, '').replace(/\/[imguys]*$/, '')) 26 | ); 27 | }; 28 | const mustIn = ( 29 | values: string[], 30 | rule: Rule, 31 | conf: ParserConf = {}, 32 | ): boolean => { 33 | for (let i = 0; i < RUNTIMES; i++) { 34 | const value = validValue(rule, conf); 35 | if (!values.includes(value)) return false; 36 | } 37 | return true; 38 | }; 39 | const RUNTIMES = 1e3; 40 | const run = (() => { 41 | let matchedTimes: number; 42 | let i: number; 43 | return (fn: () => boolean): number => { 44 | matchedTimes = 0; 45 | for (i = 0; i < RUNTIMES; i++) { 46 | if (fn()) { 47 | matchedTimes++; 48 | } 49 | } 50 | return matchedTimes; 51 | }; 52 | })(); 53 | describe('Test regexp parser', () => { 54 | // patterns 55 | test('test patterns', () => { 56 | expect(validParser('//')).toThrow(); 57 | expect(validParser('/a/ii')).toThrow(); 58 | expect(validParser('/(/')).toThrow(); 59 | expect(validParser('/)/')).toThrow(); 60 | expect(validParser('/[/')).toThrow(); 61 | expect(validParser('/]/')).toBeTruthy(); 62 | expect(validParser('/?/')).toThrow(); 63 | expect(validParser('/(?)/')).toThrow(); 64 | expect(validParser('/(|?)/')).toThrow(); 65 | expect(validParser('/^?/')).toThrow(); 66 | expect(validParser('/[/]/')).toBeTruthy(); 67 | expect(validParser('/a//')).toThrow(); 68 | expect(validParser('/(?abc)\\k/')).toThrow(); 69 | expect(validParser('/a(?=b)/')).toBeTruthy(); 70 | expect(validInput('/a(?=b)/')).toBeTruthy(); 71 | expect(validParser('/(abc()()))/')).toThrow(); 72 | expect(validParser('/\\u{fg}/u')).toThrow(); 73 | expect(validParser('/\\xfg/u')).toThrow(); 74 | expect(validMatch(/\\ufg/)).toBeTruthy(); 75 | expect(validMatch(/\\xfg/)).toBeTruthy(); 76 | expect(validInput(/\xff\u{00aa}/u)).toBeTruthy(); 77 | }); 78 | 79 | // valid times 80 | test('test times quantifier', () => { 81 | expect(validParser('/a*+/')).toThrow(); 82 | expect(validParser('/a**/')).toThrow(); 83 | expect(validParser('/a++/')).toThrow(); 84 | expect(validParser('/a+*/')).toThrow(); 85 | expect(validParser('/a?*/')).toThrow(); 86 | expect(validParser('/a?+/')).toThrow(); 87 | expect(validParser('/a*?/')).toBeTruthy(); 88 | expect(validParser('/a*?*/')).toThrow(); 89 | expect(validParser('/a+?/')).toBeTruthy(); 90 | expect(validParser('/a+?+/')).toThrow(); 91 | expect(validParser('/a??/')).toBeTruthy(); 92 | expect(validParser('/a???/')).toThrow(); 93 | expect(validParser('/a{3}?/')).toBeTruthy(); 94 | expect(validParser('/a{3,}?/')).toBeTruthy(); 95 | expect(validParser('/a{3}+/')).toThrow(); 96 | expect(validParser('/a{3}*/')).toThrow(); 97 | expect(validParser('/a{3}*/')).toThrow(); 98 | expect(validParser('/a{3}??/')).toThrow(); 99 | expect(validMatch(/a{3,5}/)).toBeTruthy(); 100 | expect(validMatch(/a*/)).toBeTruthy(); 101 | expect(validMatch(/a+/)).toBeTruthy(); 102 | expect(validMatch(/a*?/)).toBeTruthy(); 103 | expect(validMatch(/a+?/)).toBeTruthy(); 104 | expect(validMatch(/a??/)).toBeTruthy(); 105 | // wrong quantifer 106 | expect(validValue(/a{ 3}/) === 'a{ 3}').toBeTruthy(); 107 | expect(validValue(/a{3, }/) === 'a{3, }').toBeTruthy(); 108 | expect(validValue(/a{3, 5}/) === 'a{3, 5}').toBeTruthy(); 109 | expect(validParser('/a{5,3}/')).toThrow(); 110 | }); 111 | // normal regexp rules 112 | test('test string match', () => { 113 | expect(validMatch(/a/)).toBeTruthy(); 114 | expect(validMatch(/a{3}/)).toBeTruthy(); 115 | expect(validMatch(/a./)).toBeTruthy(); 116 | expect(validMatch(/[abc]/)).toBeTruthy(); 117 | expect(validMatch(/[\w]/)).toBeTruthy(); 118 | expect(validMatch(/[^\w]/)).toBeTruthy(); 119 | expect(validMatch(/(a|b|cc|\d+)/)).toBeTruthy(); 120 | expect(validMatch(/a(zz)|b(dd)|c(ef)|d(gg)/)).toBeTruthy(); 121 | expect(validMatch(/(a)\89/)).toBeTruthy(); 122 | expect(validMatch(/\1(a)/)).toBeTruthy(); 123 | expect(validMatch(/\12(a)/)).toBeTruthy(); 124 | expect(validMatch(/\377(a)/)).toBeTruthy(); 125 | expect(validMatch(/\8(a)(b)(c)(d)(e)(f)(g)(h)(i)/)).toBeTruthy(); 126 | }); 127 | // regexp set 128 | test('test regexp set', () => { 129 | // normal set 130 | const r1 = /[a-z]/; 131 | expect(validMatch(r1)).toBeTruthy(); 132 | // empty set,match nothing 133 | const r2 = /a([])b\1/; 134 | expect(() => validValue(r2)).toThrow(); 135 | // match everything 136 | const r3 = /^[^]$/; 137 | expect(validMatch(r3)).toBeTruthy(); 138 | // match nothing 139 | const r4 = /[^\w\W]/; 140 | expect(() => validValue(r4)).toThrow(); 141 | // match nothing,same to r4 142 | const r5 = /[^a-zA-Z0-9_\W]/; 143 | expect(() => validValue(r5)).toThrow(); 144 | // invalid set range 145 | const r6 = '/[z-a]/'; 146 | expect(validParser(r6)).toThrow(); 147 | // octal 148 | const r7 = '/[\\177-\\200]/'; 149 | expect(validValue(r7)).toBeTruthy(); 150 | expect(validValue(/[\8]/)).toBeTruthy(); 151 | // control character 152 | const r8 = /[\ca]/; 153 | expect(validValue(r8)).toBeTruthy(); 154 | const r9 = /[\c0]/; 155 | expect(validValue(r9)).toBeTruthy(); 156 | expect(validParser('/[\\c0]/u')).toThrow(); 157 | // hex 158 | const r10 = /[\x61-\x99]/; 159 | expect(validValue(r10)).toBeTruthy(); 160 | // backspace 161 | expect(validValue(/[\b]/)).toEqual('\u{0008}'); 162 | // null 163 | expect(() => validValue(/a[]b/)).toThrow(); 164 | // set with charset 165 | expect(validMatch(/[a-\s]/)).toBeTruthy(); 166 | expect(validMatch(/[^-a]/)).toBeTruthy(); 167 | expect(validMatch(/[a-]/)).toBeTruthy(); 168 | expect(validMatch(/[a-z]]/)).toBeTruthy(); 169 | // translate char 170 | expect(validMatch(/[\xza-z]/)).toBeTruthy(); 171 | // mixed reverse set 172 | expect(validMatch(/[^a-z\d]/)).toBeTruthy(); 173 | // character class 174 | expect(/[\w]/.test(validValue(/[\w]/))).toBeTruthy(); 175 | // special set 176 | expect(validMatch(/[[abc]/)).toBeTruthy(); 177 | // special range 178 | expect(validMatch(/[{-}]/)).toBeTruthy(); 179 | // ignore \b\B 180 | expect(validMatch(/[^a\bc]/)).toBeTruthy(); 181 | }); 182 | 183 | // test u flag 184 | test('test unicode flag', () => { 185 | const r1 = /\u{0061}/; 186 | const r2 = /\u{61}/u; 187 | expect(validMatch(r1)).toBeTruthy(); 188 | expect(mustIn(['a', '\\u{61}'], r2)).toBeTruthy(); 189 | // unicode set 190 | const r3 = /[\u{0061}-\u{0099}]/u; 191 | expect(validMatch(r3)).toBeTruthy(); 192 | // unicode any 193 | const r4 = /./u; 194 | expect(validMatch(r4)).toBeTruthy(); 195 | // wrong unicode range 196 | expect(validParser('/[\\u{010}-\\u{110000}]/u')).toThrow(); 197 | // invalid control character 198 | expect(validParser('/\\c1/u')).toThrow(); 199 | // valid control character 200 | expect(validMatch(/\ca/u)).toBeTruthy(); 201 | expect(validMatch(/\c1/)).toBeTruthy(); 202 | expect(validParser('/\\u{110000}/u')).toThrow(); 203 | expect(run(() => validMatch(/[^a-z01\W]/u)) === RUNTIMES).toBeTruthy(); 204 | expect(validMatch(/[^a-z01\W]/)).toBeTruthy(); 205 | }); 206 | // test i flag 207 | test('test ignore case', () => { 208 | const r1 = /[a-z]/i; 209 | expect( 210 | Array.from({ 211 | length: 30, 212 | }).some(() => /[A-Z]/.test(validValue(r1))), 213 | ).toBeTruthy(); 214 | }); 215 | // groups 216 | test('test groups', () => { 217 | // group capture null 218 | const r1 = /\1(a)/; 219 | expect(validMatch(r1)).toBeTruthy(); 220 | expect(validInput(r1)).toBeTruthy(); 221 | expect(validValue(r1)).toEqual('a'); 222 | // named group capture 223 | const r2 = /(?a)\k/; 224 | expect(validMatch(r2)).toBeTruthy(); 225 | expect(validValue(r2)).toEqual('aa'); 226 | expect(() => 227 | validValue(r2, { 228 | namedGroupConf: { 229 | ga: ['b'], 230 | }, 231 | }), 232 | ).toThrow(); 233 | expect( 234 | validValue(r2, { 235 | namedGroupConf: { 236 | ga: undefined, 237 | }, 238 | }), 239 | ).toEqual('aa'); 240 | // special named group match 241 | const r3 = /(ef)(?a|b|c|d+\1?)\\k/; 242 | expect( 243 | r3.test( 244 | validValue(r3, { 245 | namedGroupConf: { 246 | a_b_c_d: { 247 | a: false, 248 | b: false, 249 | c: false, 250 | d: ['ddef', 'ddd'], 251 | }, 252 | }, 253 | }), 254 | ), 255 | ).toBeTruthy(); 256 | // take as normal 257 | expect( 258 | r3.test( 259 | validValue(r3, { 260 | namedGroupConf: { 261 | a_b_c_d: ['a', 'b'], 262 | }, 263 | }), 264 | ), 265 | ).toBeTruthy(); 266 | // validate the override 267 | expect(() => 268 | validValue(r3, { 269 | namedGroupConf: { 270 | a_b_c_d: { 271 | a: false, 272 | b: false, 273 | c: false, 274 | d: ['ee'], // 'ee' is not matched 'd+(ef)?' 275 | }, 276 | }, 277 | }), 278 | ).toThrow(); 279 | // not match 280 | const rr3 = /(?a|b|c|d+)\\k/; 281 | expect( 282 | rr3.test( 283 | validValue(rr3, { 284 | namedGroupConf: { 285 | a_b_c: { 286 | a: false, 287 | b: false, 288 | c: true, 289 | }, 290 | }, 291 | }), 292 | ), 293 | ).toBeTruthy(); 294 | // no group will match 295 | expect(() => 296 | validValue(r3, { 297 | namedGroupConf: { 298 | a_b_c_d: { 299 | a: false, 300 | b: false, 301 | c: false, 302 | d: false, 303 | }, 304 | }, 305 | }), 306 | ).toThrow(); 307 | // nested 308 | const r4 = /((a)b)\1\2/; 309 | expect(validValue(r4)).toEqual('ababa'); 310 | expect(validInput(r4)).toBeTruthy(); 311 | // nested group item 312 | const r5 = /((a)b|c)\1\2/; 313 | expect(mustIn(['ababa', 'cc'], r5)).toBeTruthy(); 314 | // no capture group 315 | const r6 = /(?:a)(b)\1/; 316 | expect(validMatch(r6)).toBeTruthy(); 317 | expect(validInput(r6)).toBeTruthy(); 318 | // root group 319 | const r7 = /(:)a|b|c/; 320 | const r8 = /(:)a|b$|c/; 321 | const r9 = /($)/; 322 | expect(mustIn([':a', 'b', 'c'], r7)).toBeTruthy(); 323 | expect(mustIn([':a', 'b', 'c'], r8)).toBeTruthy(); 324 | expect(validValue(r9)).toEqual(''); 325 | expect(validInput(r9)).toBeFalsy(); 326 | // group ref item 327 | const r10 = /(d)(a|b|c|\1)/; 328 | expect(mustIn(['da', 'db', 'dc', 'dd'], r10)).toBeTruthy(); 329 | expect(validInput(r10)).toBeTruthy(); 330 | // with null 331 | const r11 = /abc\0/; 332 | expect(validValue(r11)).toBeTruthy(); 333 | // test references and octal、numbers 334 | // \012 is an octal, match \n 335 | const r12 = /(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\012/; 336 | expect(validMatch(r12)).toBeTruthy(); 337 | expect(validValue(r12)).toEqual(`abcdefghijkl\n`); 338 | // \12 is a reference, match the group 'l' 339 | const r13 = /(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\12/; 340 | expect(validMatch(r13)).toBeTruthy(); 341 | // \12 is an octal code point, match \n 342 | const r14 = /(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\12/; 343 | expect(validMatch(r14)).toBeTruthy(); 344 | // \12 is a reference,but match nothing 345 | const r15 = /(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\12(l)/; 346 | expect(validMatch(r15)).toBeTruthy(); 347 | // not octal 348 | const r16 = /(a)\08/; 349 | expect(validMatch(r16)).toBeTruthy(); 350 | // with point 351 | expect(validMatch(/(^a|b$|c$)/)).toBeTruthy(); 352 | // named group with conf 353 | const v1 = new ReRegExp('/a{1}b{2}(d{3})\\1(?[a-z]{2})/', { 354 | namedGroupConf: { 355 | namecap: ['aa', 'bb'], 356 | }, 357 | }); 358 | const v1values = ['abbddddddaa', 'abbddddddbb']; 359 | for (let i = 0, j = 10; i < j; i++) { 360 | expect(v1values.includes(v1.build())).toBeTruthy(); 361 | } 362 | // normal named group 363 | const v2: string = validValue('/(?haha)\\k/'); 364 | expect(v2 === 'hahahaha').toBeTruthy(); 365 | // reference 366 | expect(validMatch(/(abc\1)/)).toBeTruthy(); 367 | expect(validMatch(/(?abc\k)/)).toBeTruthy(); 368 | // 369 | expect(() => validMatch(/(abc(?=def))/)).toThrow(); 370 | // special reference 371 | expect(validValue(/(a)bc\81/) === 'abc81').toBeTruthy(); 372 | // octal 373 | expect(validValue(/(a)bc\051/) === 'abc\u{29}').toBeTruthy(); 374 | expect(validMatch(/(a)(bc|\81)/)).toBeTruthy(); 375 | }); 376 | 377 | // build 378 | test('test not supported', () => { 379 | const r1 = /^abc$/; 380 | expect(validMatch(r1)).toBeTruthy(); 381 | const r2 = /^a|b$/; 382 | expect(validMatch(r2)).toBeTruthy(); 383 | const r3 = /a$|^b/; 384 | expect(validMatch(r3)).toBeTruthy(); 385 | }); 386 | 387 | // flags 388 | test('test flags', () => { 389 | const r1 = /a/i; 390 | expect(mustIn(['a', 'A'], r1)).toBeTruthy(); 391 | const r2 = /a/gimsuy; 392 | expect(mustIn(['a', 'A'], r2)).toBeTruthy(); 393 | expect(validParser('/a/imguysi')).toThrow(); 394 | }); 395 | 396 | // special 397 | test('test special', () => { 398 | // . 399 | expect(validMatch(/./)).toBeTruthy(); 400 | expect(/./s.test(validValue(/./s))).toBeTruthy(); 401 | // \b 402 | expect(validValue(/a\bb/)).toEqual('ab'); 403 | // range 404 | expect(validValue(/a-z]/)).toEqual('a-z]'); 405 | // \w \d 406 | expect(validMatch(/\w/)).toBeTruthy(); 407 | expect(validMatch(/./u)).toBeTruthy(); 408 | expect(validMatch(/\W/)).toBeTruthy(); 409 | expect(validMatch(/\d/)).toBeTruthy(); 410 | expect(validMatch(/\D/)).toBeTruthy(); 411 | expect(validMatch(/\s/)).toBeTruthy(); 412 | expect(validMatch(/\S/)).toBeTruthy(); 413 | // expect(validMatch(/\t/)).toBeTruthy(); 414 | expect(validMatch(/\r/)).toBeTruthy(); 415 | expect(validMatch(/\n/)).toBeTruthy(); 416 | expect(validMatch(/\f/)).toBeTruthy(); 417 | expect(validMatch(/\v/)).toBeTruthy(); 418 | }); 419 | // test max repeat times 420 | test('test max repeat', () => { 421 | ReRegExp.maxRepeat = 10; 422 | expect( 423 | Array.from({ 424 | length: 10, 425 | }).every(() => { 426 | return validValue(/a*/).length <= 10; 427 | }), 428 | ).toBeTruthy(); 429 | expect( 430 | Array.from({ 431 | length: 10, 432 | }).every(() => { 433 | return ( 434 | validValue(/a*/, { 435 | maxRepeat: 20, 436 | }).length <= 20 437 | ); 438 | }), 439 | ).toBeTruthy(); 440 | }); 441 | // test capture 442 | test('test capture config', () => { 443 | // use capture 444 | const r1 = new ReRegExp(/(aa?)b(?\d)/, { 445 | capture: true, 446 | }); 447 | for (let i = 0; i < 100; i++) { 448 | const value = r1.build(); 449 | let index = 2; 450 | if (value.startsWith('aa')) { 451 | index = 3; 452 | expect(r1.$1).toEqual('aa'); 453 | } else { 454 | expect(r1.$1).toEqual('a'); 455 | } 456 | expect(r1.groups).not.toBeUndefined(); 457 | expect(r1.groups.num).toEqual(value.charAt(index)); 458 | } 459 | // no capture 460 | const r2 = new ReRegExp(/(aa?)b(?\d)/); 461 | r2.build(); 462 | expect(r2.$1).toBeUndefined(); 463 | expect(r2.groups).toBeUndefined(); 464 | }); 465 | // test unicode category 466 | test('test unicode category', () => { 467 | // without u flag 468 | const r1 = new ReRegExp('/\\p{Letter}{2}/'); 469 | expect(r1.build()).toEqual('p{Letter}}'); 470 | // with u flag, but no factory setted 471 | expect(() => { 472 | const _r2 = new ReRegExp('/\\p{Letter}{2}/u'); 473 | }).toThrowError(); 474 | // set the factory 475 | expect(() => { 476 | ReRegExp.UPCFactory = function (data: UPCData) { 477 | if (data.negate) { 478 | return { 479 | generate() { 480 | return '_'; 481 | }, 482 | }; 483 | } 484 | if (data.value === 'Letter' || data.value === 'L') { 485 | return { 486 | generate() { 487 | return 'a'; 488 | }, 489 | }; 490 | } 491 | return { 492 | generate() { 493 | return '1'; 494 | }, 495 | }; 496 | }; 497 | // Letter 498 | const r2 = new ReRegExp('/\\p{Letter}{2}/u'); 499 | expect(r2.build()).toEqual('aa'); 500 | // 'L' with short syntax 501 | const r3 = new ReRegExp('/\\pLl{2}/u'); 502 | expect(r3.build()).toEqual('all'); 503 | // value is 'Ll', so should return '11' 504 | const r4 = new ReRegExp('/\\p{Ll}{2}/u'); 505 | expect(r4.build()).toEqual('11'); 506 | // reverse 507 | const r5 = new ReRegExp('/\\P{Letter}{2}/u'); 508 | expect(r5.build()).toEqual('__'); 509 | // delete the factory 510 | delete ReRegExp.UPCFactory; 511 | }).not.toThrowError(); 512 | }); 513 | // test last info 514 | test('test parser info', () => { 515 | const r1 = new ReRegExp(/a(b(c))d/i); 516 | const info = r1.info(); 517 | expect(info.lastRule).toEqual('a(b(c))d'); 518 | expect(info.flags.includes('i')).toBeTruthy(); 519 | expect(r1.info().lastRule === info.lastRule).toBeTruthy(); 520 | }); 521 | // test extractSetAverage 522 | test('test average', () => { 523 | // r1 524 | const r1 = new ReRegExp(/[\Wa]/, { 525 | extractSetAverage: true, 526 | }); 527 | expect(run(() => /\W/.test(r1.build())) > (RUNTIMES * 2) / 3).toBeTruthy(); 528 | // r2 529 | const r2 = new ReRegExp(/[\Wa]/); 530 | expect(run(() => /\W/.test(r2.build())) < (RUNTIMES * 2) / 3).toBeTruthy(); 531 | // r3 532 | const r3 = new ReRegExp(/[a-z,]/, { 533 | extractSetAverage: true, 534 | }); 535 | expect( 536 | run(() => /[a-z]/.test(r3.build())) > (RUNTIMES * 2) / 3, 537 | ).toBeTruthy(); 538 | }); 539 | // test others 540 | test('other conditions', () => { 541 | // 542 | expect( 543 | run(() => /^.$/.test(CharsetHelper.make('.'))) === RUNTIMES, 544 | ).toBeTruthy(); 545 | expect( 546 | run(() => 547 | /^.$/s.test( 548 | CharsetHelper.make('.', { 549 | s: true, 550 | }), 551 | ), 552 | ) === RUNTIMES, 553 | ).toBeTruthy(); 554 | expect( 555 | run(() => 556 | /^.$/u.test( 557 | CharsetHelper.make('.', { 558 | u: true, 559 | }), 560 | ), 561 | ) === RUNTIMES, 562 | ).toBeTruthy(); 563 | expect( 564 | run(() => 565 | /^.$/su.test( 566 | CharsetHelper.make('.', { 567 | s: true, 568 | u: true, 569 | }), 570 | ), 571 | ) === RUNTIMES, 572 | ).toBeTruthy(); 573 | }); 574 | // test diy any characters 575 | test('diy any chacaters', () => { 576 | // error character ranges 577 | expect( 578 | () => 579 | new ReRegExp('.', { 580 | charactersOfAny: [], 581 | }), 582 | ).toThrow(); 583 | // global config 584 | ReRegExp.charactersOfAny = () => '.'; 585 | const re = new ReRegExp('.'); 586 | for (let i = 0; i < 100; i++) { 587 | expect(re.build() === '.').toBeTruthy(); 588 | } 589 | ReRegExp.charactersOfAny = undefined; 590 | // use charset helper characters 591 | const re0 = new ReRegExp('.', { 592 | charactersOfAny: CharsetHelper.points.d, 593 | }); 594 | for (let i = 0; i < 100; i++) { 595 | expect(/^\w$/.test(re0.build())).toBeTruthy(); 596 | } 597 | // normal array 598 | const re1 = new ReRegExp('.', { 599 | charactersOfAny: [97], 600 | }); 601 | for (let i = 0; i < 100; i++) { 602 | expect(re1.build() === 'a').toBeTruthy(); 603 | } 604 | // normal array 605 | const re2 = new ReRegExp('.', { 606 | charactersOfAny: [97, 98], 607 | }); 608 | for (let i = 0; i < 100; i++) { 609 | expect(['a', 'b'].includes(re2.build())).toBeTruthy(); 610 | } 611 | // normal array 612 | const re3 = new ReRegExp('.', { 613 | charactersOfAny: [[97, 98]], 614 | }); 615 | for (let i = 0; i < 100; i++) { 616 | expect(['a', 'b'].includes(re3.build())).toBeTruthy(); 617 | } 618 | // normal array 619 | const re4 = new ReRegExp('..', { 620 | charactersOfAny: [[97, 98], [99]], 621 | }); 622 | for (let i = 0; i < 100; i++) { 623 | expect( 624 | ['aa', 'ab', 'ac', 'ba', 'bb', 'bc', 'ca', 'cb', 'cc'].includes( 625 | re4.build(), 626 | ), 627 | ).toBeTruthy(); 628 | } 629 | // function 630 | const re5 = new ReRegExp('/./', { 631 | charactersOfAny: () => 'a', 632 | }); 633 | for (let i = 0; i < 100; i++) { 634 | expect(re5.build() === 'a').toBeTruthy(); 635 | } 636 | // function 637 | const re6 = new ReRegExp('/./s', { 638 | charactersOfAny: (flags) => (flags.s ? 'b' : 'a'), 639 | }); 640 | for (let i = 0; i < 100; i++) { 641 | expect(re6.build() === 'b').toBeTruthy(); 642 | } 643 | }); 644 | }); 645 | -------------------------------------------------------------------------------- /dist/reregexp.min.js: -------------------------------------------------------------------------------- 1 | !function(e){var t={};function n(r){if(t[r])return t[r].exports;var o=t[r]={i:r,l:!1,exports:{}};return e[r].call(o.exports,o,o.exports,n),o.l=!0,o.exports}n.m=e,n.c=t,n.d=function(e,t,r){n.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:r})},n.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},n.t=function(e,t){if(1&t&&(e=n(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var r=Object.create(null);if(n.r(r),Object.defineProperty(r,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)n.d(r,o,function(t){return e[t]}.bind(null,o));return r},n.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return n.d(t,"a",t),t},n.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},n.p="",n(n.s=0)}([function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0});var r=n(1);window.ReRegExp=r.default},function(e,t,n){"use strict";var r,o=this&&this.__extends||(r=function(e,t){return(r=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(e,t){e.__proto__=t}||function(e,t){for(var n in t)Object.prototype.hasOwnProperty.call(t,n)&&(e[n]=t[n])})(e,t)},function(e,t){if("function"!=typeof t&&null!==t)throw new TypeError("Class extends value "+String(t)+" is not a constructor or null");function n(){this.constructor=e}r(e,t),e.prototype=null===t?Object.create(t):(n.prototype=t.prototype,new n)}),i=this&&this.__assign||function(){return(i=Object.assign||function(e){for(var t,n=1,r=arguments.length;n=.5},s=function(e,t){return e===t?e:e+Math.floor(Math.random()*(t+1-e))},c=function(e){for(var t=p(e),n=s(1,t),r=e.length,o=0;r>1;){var i=Math.floor(r/2),a=e[o+i-1],u=e[o+i];if(n>=a&&n<=u){o+=i-(n===a?1:0);break}n>u?(o+=i+1,r-=i+1):r-=i}return{rand:n,index:o}},p=function(e){return e[e.length-1]},l=function(){function e(){}return e.charsetOfAll=function(){return e.charsetOfNegated("ALL")},e.charsetOfDotall=function(){return e.charsetOfNegated("DOTALL")},e.charsetOfNegated=function(t){var n=e.points,r=e.cache;if(r[t])return r[t];var o=0,i=[],a=[],u=0,s=function(e,t){var n=t-e+1;n<=0||(i.push(n>1?[e,t]:[e]),u+=n,a.push(u))};if("DOTALL"===t)s(o,56319),s(57344,65535);else{for(var c="ALL"===t?[[10],[13],[8232,8233]]:n[t.toLowerCase()],l="S"===t,h=c.length-(l?1:0),f=0;o<=56319&&h>f;){var g=c[f++],d=g[0],y=g[1];s(o,d-1),o=(y||d)+1}if(o<56319&&s(o,56319),l){var v=p(c)[0];s(57344,v-1),s(v+1,65535)}else s(57344,65535)}return r[t]={ranges:i,totals:a}},e.charsetOf=function(t){var n=e.lens;return{ranges:e.points[t],totals:n[t]}},e.getCharsetInfo=function(t,n){var r;void 0===n&&(n={});var o=e;return["w","d","s"].includes(t)?r=o.charsetOf(t):(r="."===t?n.s?o.charsetOfDotall():o.charsetOfAll():o.charsetOfNegated(t),n.u&&(r={ranges:r.ranges.concat([o.bigCharPoint]),totals:r.totals.concat(o.bigCharTotal)})),r},e.make=function(t,n){return void 0===n&&(n={}),e.makeOne(e.getCharsetInfo(t,n))},e.makeOne=function(e){var t=e.totals,n=e.ranges,r=c(t),o=r.rand,i=r.index,a=n[i][0]+(o-(t[i-1]||0))-1;return String.fromCodePoint(a)},e.points={d:[[48,57]],w:[[48,57],[65,90],[95],[97,122]],s:[[9,13],[32],[160],[5760],[8192,8202],[8232,8233],[8239],[8287],[12288],[65279]]},e.lens={d:[10],w:[10,36,37,63],s:[5,6,7,8,18,20,21,22,23,24]},e.bigCharPoint=[65536,1114111],e.bigCharTotal=1048576,e.cache={},e}();t.CharsetHelper=l;var h=l,f={beginWith:"^",endWith:"$",matchAny:".",groupBegin:"(",groupEnd:")",groupSplitor:"|",setBegin:"[",setEnd:"]",rangeSplitor:"-",multipleBegin:"{",multipleEnd:"}",multipleSplitor:",",translate:"\\",leastOne:"+",multiple:"*",optional:"?",setNotIn:"^",delimiter:"/"},g={i:1,u:2,s:4,g:8,m:16,y:32,d:64},d=Object.keys(g).join("");t.parserRule=new RegExp("^\\/(?:\\\\.|\\[[^\\]]*\\]|[^\\/])+?/[".concat(d,"]*"));t.regexpRule=new RegExp("^\\/".concat("((?:\\\\.|\\[[^\\]]*\\]|[^\\/])+?)","\\/([").concat(d,"]*)$"));var y=new RegExp("^".concat("((?:\\\\.|\\[[^\\]]*\\]|[^\\/])+?)","$")),v=/^(0[0-7]{0,2}|[1-3][0-7]{0,2}|[4-7][0-7]?)/,m=function(){function e(e,n){if(void 0===n&&(n={}),this.rule=e,this.config=n,this.context="",this.flags=[],this.lastRule="",this.queues=[],this.ruleInput="",this.flagsHash={},this.totalFlagBinary=0,this.rootQueues=[],this.hasLookaround=!1,this.hasNullRoot=null,this.anyCharacterHandleDone=!1,e instanceof RegExp)this.rule=e.toString(),this.context=e.source,this.flags=e.flags.split("");else{if(!t.regexpRule.test(e)&&!y.test(e))throw new Error("wrong regexp:".concat(e));this.rule=e,this.context=RegExp.$1,this.flags=RegExp.$2?RegExp.$2.split(""):[]}this.checkFlags(),this.parse(),this.lastRule=this.ruleInput}return e.prototype.build=function(){if(this.hasLookaround)throw new Error("the build method does not support lookarounds.");var e=this.rootQueues,t="",n=i(i({},this.config),{flags:this.flagsHash,namedGroupData:{},captureGroupData:{}}),r="the regexp has null expression, will match nothing";if(!0===this.hasNullRoot)throw new Error(r);if(this.hasNullRoot=e.some((function(e){return!!e.isMatchNothing||(t+=e.build(n),!1)})),this.hasNullRoot)throw new Error(r);if(this.config.capture){for(var o=n.captureGroupData,a=n.namedGroupData,u=1;u<=9;u++)this["$".concat(u)]=o[u]||"";Object.keys(a).length>0&&(this.groups=a)}return t},e.prototype.info=function(){var e=this;return{rule:e.rule,context:e.context,lastRule:e.lastRule,flags:e.flags,queues:e.queues}},e.prototype.parse=function(){for(var t=this,n=this.context,r=f,o=0,u=n.length,s=[new I],c=[],l=[],h=[],g={},d={},y=/^(\?(?:<(.+?)>|:))/,m=/^(\?(?:7)re=new A;else{pe=ce.length>=3&&+ce.charAt(2)<=7?ce.slice(1,3):ce.charAt(1);re=new T("\\0".concat(pe)),o+=pe.length}else if(o+=ce.length-1,+ce<=h.length){re=new w("\\".concat(ce));var le=h[+ce-1];d[ce]=le,le.isAncestorOf(te)?re.ref=null:re.ref=le}else re=new $("\\".concat(ce)),b.push(re)}else if("k"===ie&&/^<([^>]+?)>/.test(n.slice(o))){var he=RegExp.$1;if(!g[he])throw new Error("Invalid named capture referenced:".concat(he));o+=he.length+2;le=g[he];re=new w("\\".concat(le.captureIndex),he),le.isAncestorOf(te)?re.ref=null:re.ref=le}else re=new F(ae);break;case r.groupBegin:var fe=m.test(ee);if(fe){var ge=RegExp.$1;re=new E(ge),oe=new C("lookaroundBegin"),this.hasLookaround=!0,o+=ge.length}else re=new Z,oe=new C("groupBegin");if(!fe){if(re=re,y.test(ee)){var de=RegExp.$1,ye=RegExp.$2;"?:"===de||(re.captureIndex=++D,re.captureName=ye,g[ye]=re),o+=de.length}else re.captureIndex=++D;re.captureIndex>0&&h.push(re)}break;case r.groupEnd:if(!x.length)throw new Error("unmatched ".concat(V,',you mean "\\').concat(V,'"?'));var ve=x.pop(),me=("group"===ve.type?c:l).pop();me.isComplete=!0,(oe=new C("".concat(ve.type,"End"))).parent=me;break;case r.groupSplitor:var Re=p(c);if(Re)Re.addNewGroup();else{var xe=new Z;xe.isRoot=!0,xe.addRootItem(s.slice(1)),s.splice(1,0,xe),c.push(xe)}oe=new C("groupSplitor");break;case r.setBegin:/^\\b]/.test(ee)?(re=new O,o+=3):(W=new G,"^"===ee.charAt(0)&&(W.reverse=!0,o+=1),_(W),J(W),(oe=new C("setBegin")).parent=W);break;case r.setEnd:W?(W.isComplete=!0,(oe=new C("setEnd")).parent=W,W=null):re=new S(V);break;case r.rangeSplitor:if(W)if(ne.codePoint<0)re=new S(V);else ee.charAt(0)===r.setEnd?(W.isComplete=!0,W=null,o+=1):(W.pop(),(Y=new L).parent=W,s.pop().parent=Y,_(Y,ne),(oe=new C("rangeSplitor")).parent=Y);else re=new S(V);break;case r.multipleBegin:case r.optional:case r.multiple:case r.leastOne:var be=(re=V===r.multipleBegin?new M:new(B.bind.apply(B,a([void 0],this.config.maxRepeat?[this.config.maxRepeat]:[],!1)))).untilEnd(n.slice(o-1));if(be>0){var we=ne instanceof C?ne.special:ne.type,Ce="[".concat(ne.input,"]nothing to repeat[index:").concat(o,"]:").concat(n.slice(o-1,o-1+be));if(K(we,ne))throw new Error(Ce);o+=be-1,re.target="groupEnd"===we||"setEnd"===we?ne.parent:ne}else re=new S(V);break;case r.matchAny:re=new P(t.anyCharacterHandleDone?t.anyCharacterHandle:(t.anyCharacterHandleDone=!0,t.anyCharacterHandle=P.genDiyCharactersHandle(i(i({},t.config),{flags:t.flagsHash}))));break;case r.beginWith:case r.endWith:re=new q(V);break;case r.delimiter:throw new Error('unexpected pattern end delimiter:"/'.concat(ee,'"'));default:re=new S(V)}if(re){var Ee=re;if(_(Ee),Y){if(re.codePoint<0){var Pe=s.splice(-4,4),Ae=Pe[1],Oe=Pe[3],Ie=new S("-");W.pop(),[Ae,Ie,Oe].map((function(e){return e.parent=W,_(e),e}))}else re.parent=Y;Y=null}else W?Ee.parent=W:J(Ee)}oe&&(re&&(oe.parent=re),_(oe))}}if(b.length){var ke=function(e,t,n){for(var r=0,o=!1,i=e.length;r1&&"group"===s[1].type&&!0===s[1].isRoot&&(s[1].isComplete=!0);var je=[],qe="";s.every((function(e){if(!e.isComplete)throw new Error("the regexp segment ".concat(e.type," is not completed:").concat(e.input));return null===e.parent&&(je.push(e),qe+=e.getRuleInput()),!0})),this.ruleInput=qe,this.rootQueues=je,this.queues=s},e.prototype.checkFlags=function(){var e,t=this.flags,n=t.length;if(0!==n){if(n>Object.keys(g).length)throw new Error("The rule may has repeated or unrecognized flags, please check."));for(var r=t[0],o=g[r],i=((e={})[r]=!0,e),a=1,u=t.length;ae[0]?-1:t[0]===e[0]?t[1]>e[1]?1:-1:1}));for(var i=[],a=0,u=0,s=o.length;uo.codePoint)throw new Error("invalid range:".concat(r.getRuleInput(),"-").concat(o.getRuleInput()))}},t.prototype.getRuleInput=function(){var e=this.queues,t=e[0],n=e[1];return t.getRuleInput()+"-"+n.getRuleInput()},t.prototype.prebuild=function(){var e=this.queues,t=e[0],n=e[1],r=t.codePoint,o=n.codePoint;return String.fromCodePoint(s(r,o))},t.prototype.getCodePointCount=function(){var e=this.queues,t=e[0],n=e[1],r=t.codePoint;return n.codePoint-r+1},t}(R);t.RegexpRange=L;var D=function(e){function t(){var t=null!==e&&e.apply(this,arguments)||this;return t.type="hexcode",t}return o(t,e),t.prototype.untilEnd=function(e){var t=this.rule,n=this.codeType;if(t.test(e)){var r=RegExp.$1,o=RegExp.$2||r;if(this.codePoint=Number("0x".concat(o)),this.codePoint>1114111)throw new Error("invalid unicode code point:\\u{".concat(o,"},can not great than 0x10ffff"));return this.input="\\".concat(n).concat(r),r.length}return 0},t}(b);t.RegexpHexCode=D;var H=function(e){function t(){var t=null!==e&&e.apply(this,arguments)||this;return t.rule=/^([0-9A-Fa-f]{4})/,t.codeType="u",t}return o(t,e),t}(D);t.RegexpUnicode=H;var U=function(e){function t(){var t=null!==e&&e.apply(this,arguments)||this;return t.rule=/^({(0*[0-9A-Fa-f]{1,6})}|[0-9A-Fa-f]{4})/,t.codeType="u",t}return o(t,e),t}(D);t.RegexpUnicodeAll=U;var Q=function(e){function t(){var t=null!==e&&e.apply(this,arguments)||this;return t.rule=/^([0-9A-Fa-f]{2})/,t.codeType="x",t}return o(t,e),t}(D);t.RegexpASCII=Q;var z=function(e){function t(t){var n=e.call(this)||this;return n.symbol=t,n.type="unicode-category",n.rule=/^([A-Z]|\{(?:(?:([a-zA-Z_]+)=)?([A-Za-z_]+))})/,n}return o(t,e),t.prototype.untilEnd=function(e){if(this.rule.test(e)){var t=RegExp.$1,n=RegExp.$2,r=RegExp.$3,o=this.symbol,i="P"===o,a=void 0;r?(a={short:!1,negate:i,value:r},n&&(a.key=n)):a={short:!0,negate:i,value:t},this.data=a;var u=m.UPCFactory;return this.generator=u(a),this.input="\\".concat(o).concat(t),t.length}return 0},t.prototype.prebuild=function(){return this.generator.generate()},t}(R);t.RegexpUnicodeCategory=z;var W=function(e){function t(t){var n=e.call(this)||this;return n.index=t,n.type="group-item",n}return o(t,e),t.prototype.getRuleInput=function(e){var t=this;return void 0===e&&(e=!1),this.queues.reduce((function(n,r){return n+(e&&"reference"===r.type&&null!==r.ref?r.ref.getRuleInput(e):t.isEndLimitChar(r)?"":r.getRuleInput(e))}),"")},t.prototype.prebuild=function(e){var t=this;return this.queues.reduce((function(n,r){var o;return t.isEndLimitChar(r)?(console.warn("the ^ and $ of the regexp will ignore"),o=""):o=r.build(e),n+o}),"")},t.prototype.isEndLimitChar=function(e){return"anchor"===e.type},t}(R);t.RegexpGroupItem=W;var Z=function(e){function t(){var t=e.call(this)||this;return t.type="group",t.captureIndex=0,t.captureName="",t.queues=[],t.isRoot=!1,t.curGroupItem=null,t.curRule=null,t.isComplete=!1,t.buildForTimes=!0,t.addNewGroup(),t}return o(t,e),Object.defineProperty(t.prototype,"isComplete",{get:function(){return this.completed},set:function(e){this.completed=e,!0===e&&(this.isMatchNothing=this.queues.every((function(e){return e.isMatchNothing})))},enumerable:!1,configurable:!0}),t.prototype.addNewGroup=function(){var e=this.queues,t=new W(e.length);return this.curGroupItem=t,t.parent=this,t},t.prototype.addRootItem=function(e){var t=this;e.map((function(e){null===e.parent&&(e.parent=t.curGroupItem)})),this.addNewGroup()},t.prototype.addItem=function(e){e.parent=this.curGroupItem},t.prototype.getRuleInput=function(e){void 0===e&&(e=!1);var t=this.queues,n=this.captureIndex,r=this.isRoot,o="";return 0!==n||r||(o="?:"+o),o+=t.map((function(t){return t.getRuleInput(e)})).join("|"),r?o:"(".concat(o,")")},t.prototype.buildRule=function(e){if(this.curRule)return this.curRule;var t=this.getRuleInput(!0),n=Object.keys(e).join("");return this.curRule=new Function("","return /^".concat(t,"$/").concat(n))()},t.prototype.prebuild=function(e){var t,n=this.queues,r=this.captureIndex,o=this.captureName,i="",a=e.flags,u=e.namedGroupConf,c=n.length,p=[],l=[],h=[],f=[];if(o&&o.includes("_")&&u){var g=o.split("_");if(g.length===c){var d=!1;if("object"==typeof u[o]){var y=u[o];g.forEach((function(e,t){if("boolean"==typeof y[e]&&!1===y[e]);else{d=!0;var r=n[t];Array.isArray(y[e])?(l.push(r),h.push(y[e])):p.push(r)}}))}if(!d)throw new Error("the specified named group '".concat(o,"' are all filtered by the config."));var v=l.length;v&&(x=s(0,v+p.length-1)) { 3 | return Math.random() >= 0.5; 4 | }; 5 | // make a random number between min to max 6 | const makeRandom = (min: number, max: number): number => { 7 | if (min === max) { 8 | return min; 9 | } else { 10 | return min + Math.floor(Math.random() * (max + 1 - min)); 11 | } 12 | }; 13 | // make a random from totals array 14 | const getRandomTotalIndex = ( 15 | totals: number[], 16 | ): { 17 | rand: number; 18 | index: number; 19 | } => { 20 | const total = getLastItem(totals); 21 | const rand = makeRandom(1, total); 22 | let nums = totals.length; 23 | let index = 0; 24 | while (nums > 1) { 25 | const avg = Math.floor(nums / 2); 26 | const prev = totals[index + avg - 1]; 27 | const next = totals[index + avg]; 28 | if (rand >= prev && rand <= next) { 29 | // find 30 | index += avg - (rand === prev ? 1 : 0); 31 | break; 32 | } else { 33 | if (rand > next) { 34 | // in the right side 35 | index += avg + 1; 36 | nums -= avg + 1; 37 | } else { 38 | // in the left side,keep the index 39 | nums -= avg; 40 | } 41 | } 42 | } 43 | return { 44 | rand, 45 | index, 46 | }; 47 | }; 48 | // get an array's last item 49 | const getLastItem = (arr: T[]) => { 50 | return arr[arr.length - 1]; 51 | }; 52 | 53 | const SYMBOL_DOTALL = 'DOTALL'; 54 | const SYMBOL_ALL = 'ALL'; 55 | 56 | // normal object 57 | export interface NormalObject { 58 | [index: string]: T; 59 | } 60 | // regular expression flags 61 | export type $N = `$${1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9}`; 62 | export type Flag = 'i' | 'm' | 'g' | 'u' | 'y' | 's' | 'd'; 63 | export type FlagsHash = { 64 | [key in Flag]?: boolean; 65 | }; 66 | export type FlagsBinary = { 67 | [key in Flag]: number; 68 | }; 69 | export type NamedGroupConf = NormalObject; 70 | 71 | export interface FeaturesConfig { 72 | unicode?: boolean; 73 | namedCapture?: boolean; 74 | upc?: boolean; 75 | } 76 | export interface ParserConf { 77 | maxRepeat?: number; 78 | namedGroupConf?: NamedGroupConf>; 79 | extractSetAverage?: boolean; 80 | charactersOfAny?: 81 | | CodePointRanges 82 | | CodePointRangeItem 83 | | ((flags?: FlagsHash) => string); 84 | capture?: boolean; 85 | features?: FeaturesConfig; 86 | } 87 | export interface BuildConfData extends ParserConf { 88 | flags: FlagsHash; 89 | namedGroupData: NormalObject; 90 | captureGroupData: NormalObject; 91 | } 92 | 93 | export type Result = Pick< 94 | ReRegExp, 95 | 'rule' | 'lastRule' | 'context' | 'flags' 96 | > & { 97 | queues: RegexpPart[]; 98 | }; 99 | 100 | export type UPCData = { 101 | negate: boolean; 102 | short: boolean; 103 | key?: string; 104 | value: string; 105 | }; 106 | 107 | export type UPCFactory = (data: UPCData) => UPCInstance | never; 108 | 109 | export interface UPCInstance { 110 | generate(): string; 111 | } 112 | 113 | export class CharsetHelper { 114 | public static readonly points: CodePointData = { 115 | // 0-9 116 | d: [[48, 57]], 117 | // 0-9, A-Z, _, a-z 118 | w: [[48, 57], [65, 90], [95], [97, 122]], 119 | /** 120 | * Whitespaces, see the wiki below: 121 | * https://en.wikipedia.org/wiki/Whitespace_character 122 | * https://github.com/microsoft/ChakraCore/issues/2120 [0x18e0] not in \s 123 | */ 124 | s: [ 125 | [0x0009, 0x000d], 126 | [0x0020], 127 | [0x00a0], 128 | [0x1680], 129 | [0x2000, 0x200a], 130 | [0x2028, 0x2029], 131 | [0x202f], 132 | [0x205f], 133 | [0x3000], 134 | [0xfeff], 135 | ], 136 | }; 137 | // the count of the code point ranges 138 | // should match the 'points' field. 139 | public static readonly lens: CodePointData = { 140 | // 0-9, total is 10 141 | d: [10], 142 | // 0-9:10, A-Z:26 => 26+10, _: 1 => 26+10+1, a-z: 26 => 26+10+1+26 143 | w: [10, 36, 37, 63], 144 | // 0x9-0xd: 5, 0x20: 1 => 5+1 ...see the 's' in static field 'points' 145 | s: [5, 6, 7, 8, 18, 20, 21, 22, 23, 24], 146 | }; 147 | // big code point character 148 | public static readonly bigCharPoint: [number, number] = [0x10000, 0x10ffff]; 149 | public static readonly bigCharTotal: number = 0x10ffff - 0x10000 + 1; 150 | // match '.' 151 | public static charsetOfAll(): CodePointResult { 152 | return CharsetHelper.charsetOfNegated(SYMBOL_ALL); 153 | } 154 | // match '.' with s flag 155 | public static charsetOfDotall(): CodePointResult { 156 | return CharsetHelper.charsetOfNegated(SYMBOL_DOTALL); 157 | } 158 | // get negated charset 159 | public static charsetOfNegated(type: CharsetCacheType): CodePointResult { 160 | const { points, cache } = CharsetHelper; 161 | if (cache[type]) { 162 | return cache[type]; 163 | } else { 164 | let start = 0x0000; 165 | const max = 0xdbff; 166 | const nextStart = 0xe000; 167 | const nextMax = 0xffff; 168 | const ranges: CodePointRanges = []; 169 | const totals: number[] = []; 170 | let total = 0; 171 | const add = (begin: number, end: number) => { 172 | const num = end - begin + 1; 173 | if (num <= 0) { 174 | return; 175 | } else { 176 | ranges.push(num > 1 ? [begin, end] : [begin]); 177 | } 178 | total += num; 179 | totals.push(total); 180 | }; 181 | if (type === SYMBOL_DOTALL) { 182 | add(start, max); 183 | add(nextStart, nextMax); 184 | } else { 185 | // should exclude 186 | const excepts = 187 | type === SYMBOL_ALL 188 | ? [[0x000a], [0x000d], [0x2028, 0x2029]] 189 | : points[type.toLowerCase() as CharsetType]; 190 | const isNegaWhitespace = type === 'S'; 191 | const count = excepts.length - (isNegaWhitespace ? 1 : 0); 192 | let looped = 0; 193 | while (start <= max && count > looped) { 194 | const [begin, end] = excepts[looped++]; 195 | add(start, begin - 1); 196 | start = (end || begin) + 1; 197 | } 198 | if (start < max) { 199 | add(start, max); 200 | } 201 | if (isNegaWhitespace) { 202 | // 0xfeff 203 | const last = getLastItem(excepts)[0]; 204 | add(nextStart, last - 1); 205 | add(last + 1, nextMax); 206 | } else { 207 | add(nextStart, nextMax); 208 | } 209 | } 210 | return (cache[type] = { 211 | ranges, 212 | totals, 213 | }); 214 | } 215 | } 216 | // charset of type 's'|'d'|'w' 217 | public static charsetOf(type: CharsetType): CodePointResult { 218 | const { lens, points } = CharsetHelper; 219 | return { 220 | ranges: points[type], 221 | totals: lens[type], 222 | }; 223 | } 224 | // get charset ranges 225 | public static getCharsetInfo( 226 | type: CharsetType | CharsetNegatedType | '.', 227 | flags: FlagsHash = {}, 228 | ): CodePointResult { 229 | let last: CodePointResult; 230 | const helper = CharsetHelper; 231 | if (['w', 'd', 's'].includes(type)) { 232 | last = helper.charsetOf(type as CharsetType); 233 | } else { 234 | if (type === '.') { 235 | if (flags.s) { 236 | last = helper.charsetOfDotall(); 237 | } else { 238 | last = helper.charsetOfAll(); 239 | } 240 | } else { 241 | last = helper.charsetOfNegated(type as CharsetNegatedType); 242 | } 243 | if (flags.u) { 244 | last = { 245 | ranges: last.ranges.concat([helper.bigCharPoint]), 246 | totals: last.totals.concat(helper.bigCharTotal), 247 | }; 248 | } 249 | } 250 | return last; 251 | } 252 | // make the type 253 | public static make( 254 | type: CharsetType | CharsetNegatedType | '.', 255 | flags: FlagsHash = {}, 256 | ): string { 257 | return CharsetHelper.makeOne(CharsetHelper.getCharsetInfo(type, flags)); 258 | } 259 | // make one character 260 | public static makeOne(result: CodePointResult): string { 261 | const { totals, ranges } = result; 262 | const { rand, index } = getRandomTotalIndex(totals); 263 | const codePoint = ranges[index][0] + (rand - (totals[index - 1] || 0)) - 1; 264 | return String.fromCodePoint(codePoint); 265 | } 266 | protected static readonly cache: CharsetCache = {}; 267 | // contructor 268 | protected constructor() { 269 | // do nothing, no methods, no properties 270 | } 271 | } 272 | const charH = CharsetHelper; 273 | const symbols: NormalObject = { 274 | beginWith: '^', 275 | endWith: '$', 276 | matchAny: '.', 277 | groupBegin: '(', 278 | groupEnd: ')', 279 | groupSplitor: '|', 280 | setBegin: '[', 281 | setEnd: ']', 282 | rangeSplitor: '-', 283 | multipleBegin: '{', 284 | multipleEnd: '}', 285 | multipleSplitor: ',', 286 | translate: '\\', 287 | leastOne: '+', 288 | multiple: '*', 289 | optional: '?', 290 | setNotIn: '^', 291 | delimiter: '/', 292 | }; 293 | const flagsBinary: FlagsBinary = { 294 | i: 0b0000001, 295 | u: 0b0000010, 296 | s: 0b0000100, 297 | g: 0b0001000, 298 | m: 0b0010000, 299 | y: 0b0100000, 300 | d: 0b1000000, 301 | }; 302 | const flagItems = Object.keys(flagsBinary).join(''); 303 | export const parserRule = new RegExp( 304 | `^\\/(?:\\\\.|\\[[^\\]]*\\]|[^\\/])+?\/[${flagItems}]*`, 305 | ); 306 | const regexpRuleContext = `((?:\\\\.|\\[[^\\]]*\\]|[^\\/])+?)`; 307 | export const regexpRule = new RegExp( 308 | `^\\/${regexpRuleContext}\\/([${flagItems}]*)$`, 309 | ); 310 | const regexpNoFlagsRule = new RegExp(`^${regexpRuleContext}$`); 311 | const octalRule = /^(0[0-7]{0,2}|[1-3][0-7]{0,2}|[4-7][0-7]?)/; 312 | /** 313 | * 314 | * 315 | * @export 316 | * @class 317 | */ 318 | export default class ReRegExp { 319 | // static maxRepeat config 320 | public static maxRepeat = 5; 321 | // static features 322 | public static features: FeaturesConfig = { 323 | unicode: true, 324 | namedCapture: true, 325 | upc: true, 326 | }; 327 | // static handle for unicode categories 328 | public static UPCFactory?: UPCFactory; 329 | // diy RegexpAny characters 330 | public static charactersOfAny: ParserConf['charactersOfAny']; 331 | // regexp input, without flags 332 | public readonly context: string = ''; 333 | // flags 334 | public readonly flags: Flag[] = []; 335 | // last rule, without named group 336 | public readonly lastRule: string = ''; 337 | // capture data, named or unnamed group data 338 | public groups?: NormalObject; 339 | public $1: string; 340 | public $2: string; 341 | public $3: string; 342 | public $4: string; 343 | public $5: string; 344 | public $6: string; 345 | public $7: string; 346 | public $8: string; 347 | public $9: string; 348 | // private fields 349 | private queues: RegexpPart[] = []; 350 | private ruleInput = ''; 351 | private flagsHash: FlagsHash = {}; 352 | private totalFlagBinary = 0; 353 | private rootQueues: RegexpPart[] = []; 354 | private hasLookaround = false; 355 | private hasNullRoot: boolean = null; 356 | // any character handle 357 | private anyCharacterHandle: () => string; 358 | private anyCharacterHandleDone = false; 359 | // constructor 360 | constructor( 361 | public readonly rule: string | RegExp, 362 | private config: ParserConf = {}, 363 | ) { 364 | if (rule instanceof RegExp) { 365 | this.rule = rule.toString(); 366 | this.context = rule.source; 367 | this.flags = rule.flags.split('') as Flag[]; 368 | } else { 369 | if (regexpRule.test(rule) || regexpNoFlagsRule.test(rule)) { 370 | this.rule = rule; 371 | this.context = RegExp.$1; 372 | this.flags = RegExp.$2 ? (RegExp.$2.split('') as Flag[]) : []; 373 | } else { 374 | throw new Error(`wrong regexp:${rule}`); 375 | } 376 | } 377 | this.checkFlags(); 378 | this.parse(); 379 | this.lastRule = this.ruleInput; 380 | } 381 | 382 | // build 383 | public build(): string | never { 384 | if (this.hasLookaround) { 385 | throw new Error('the build method does not support lookarounds.'); 386 | } 387 | const { rootQueues } = this; 388 | let result = ''; 389 | const conf: BuildConfData = { 390 | ...this.config, 391 | flags: this.flagsHash, 392 | namedGroupData: {}, 393 | captureGroupData: {}, 394 | }; 395 | const nullRootErr = 'the regexp has null expression, will match nothing'; 396 | if (this.hasNullRoot === true) { 397 | throw new Error(nullRootErr); 398 | } else { 399 | this.hasNullRoot = rootQueues.some((queue) => { 400 | // make sure detect 'isMatchNothing' before 'build()' 401 | if (queue.isMatchNothing) { 402 | return true; 403 | } 404 | result += queue.build(conf); 405 | return false; 406 | }); 407 | if (this.hasNullRoot) throw new Error(nullRootErr); 408 | } 409 | if (this.config.capture) { 410 | // if `capture` is setted, set the instance $1-$9 values and group values 411 | const { captureGroupData, namedGroupData } = conf; 412 | // group index data 413 | for (let i = 1; i <= 9; i++) { 414 | this[`$${i}` as $N] = captureGroupData[i] || ''; 415 | } 416 | // named group data 417 | if (Object.keys(namedGroupData).length > 0) { 418 | this.groups = namedGroupData; 419 | } 420 | } 421 | return result; 422 | } 423 | // get all info 424 | public info(): Result { 425 | const { rule, context, lastRule, flags, queues } = this; 426 | return { 427 | rule, 428 | context, 429 | lastRule, 430 | flags, 431 | queues, 432 | }; 433 | } 434 | // parse 435 | private parse() { 436 | const { context } = this; 437 | const s = symbols; 438 | let i = 0; 439 | const j: number = context.length; 440 | const queues: RegexpPart[] = [new RegexpBegin()]; 441 | const groups: RegexpGroup[] = []; 442 | const lookarounds: RegexpLookaround[] = []; 443 | const captureGroups: RegexpGroup[] = []; 444 | const namedCaptures: { [index: string]: RegexpGroup } = {}; 445 | const refGroups: { [index: string]: RegexpGroup | null } = {}; 446 | const captureRule = /^(\?(?:<(.+?)>|:))/; 447 | const lookaroundRule = /^(\?(?: { 452 | args.forEach((queue: RegexpPart) => (queue.parser = this)); 453 | queues.push(...args); 454 | }; 455 | let groupCaptureIndex = 0; 456 | let curSet: RegexpSet = null; 457 | let curRange: RegexpRange = null; 458 | const addToGroupOrLookaround = (cur: RegexpPart) => { 459 | const curQueue = 460 | getLastItem(nestQueues) || 461 | getLastItem(groups) || 462 | getLastItem(lookarounds); 463 | if (['group', 'lookaround'].includes(cur.type)) { 464 | const lists = cur.type === 'group' ? groups : lookarounds; 465 | (lists as RegexpPart[]).push(cur); 466 | nestQueues.push(cur); 467 | } 468 | if (curQueue) { 469 | if (curQueue.type === 'group') { 470 | (curQueue as RegexpGroup).addItem(cur); 471 | } else { 472 | cur.parent = curQueue; 473 | } 474 | } 475 | }; 476 | const isWrongRepeat = (type: string, prev: RegexpCharset): boolean => { 477 | const denyTypes = [ 478 | 'groupBegin', 479 | 'groupSplitor', 480 | 'times', 481 | 'begin', 482 | 'anchor', 483 | ]; 484 | return ( 485 | denyTypes.includes(type) || 486 | (type === 'charset' && prev.charset.toLowerCase() === 'b') 487 | ); 488 | }; 489 | // any character handle 490 | const genAnyCharacterHandle = () => { 491 | return this.anyCharacterHandleDone 492 | ? this.anyCharacterHandle 493 | : (() => { 494 | this.anyCharacterHandleDone = true; 495 | return (this.anyCharacterHandle = RegexpAny.genDiyCharactersHandle({ 496 | ...this.config, 497 | flags: this.flagsHash, 498 | })); 499 | })(); 500 | }; 501 | // /()/ 502 | while (i < j) { 503 | // current character 504 | const char: string = context.charAt(i++); 505 | // when in set, ignore these special chars 506 | if ( 507 | (curRange || curSet) && 508 | [ 509 | '[', 510 | '(', 511 | ')', 512 | '|', 513 | '*', 514 | '?', 515 | '+', 516 | '{', 517 | '.', 518 | '}', 519 | '^', 520 | '$', 521 | '/', 522 | ].includes(char) 523 | ) { 524 | const newChar = new RegexpChar(char); 525 | if (curRange) { 526 | newChar.parent = curRange; 527 | curRange = null; 528 | } else { 529 | newChar.parent = curSet; 530 | } 531 | addToQueue(newChar); 532 | continue; 533 | } 534 | // match more 535 | const nextAll: string = context.slice(i); 536 | const lastGroup = getLastItem(groups); 537 | const lastQueue = getLastItem(queues); 538 | let target = null; 539 | let special: RegexpPart = null; 540 | switch (char) { 541 | // match translate first,match "\*" 542 | case s.translate: 543 | // move one char 544 | const next = context.charAt(i++); 545 | const input = char + next; 546 | if (next === 'u' || next === 'x') { 547 | // unicode,ascii,if has u flag,can also match ${x{4}|x{6}} 548 | target = 549 | next === 'x' 550 | ? new RegexpASCII() 551 | : hasFlagU 552 | ? new RegexpUnicodeAll() 553 | : new RegexpUnicode(); 554 | const matchedNum: number = target.untilEnd(context.slice(i)); 555 | if (matchedNum === 0) { 556 | if (hasFlagU) { 557 | throw new Error(`invalid unicode code point:${context}`); 558 | } 559 | // not regular unicode,"\uzyaa" 560 | target = new RegexpTranslateChar(`\\${next}`); 561 | } else { 562 | // is unicode,move matchedNum steps 563 | i += matchedNum; 564 | } 565 | } else if (next === 'c') { 566 | // control char 567 | // https://github.com/tc39/ecma262/pull/864 568 | // https://github.com/Microsoft/ChakraCore/commit/4374e4407b12fa18e9795ce1ca0869affccb85fe 569 | const code = context.charAt(i); 570 | if (hasFlagU) { 571 | if (/[a-zA-Z]/.test(code)) { 572 | target = new RegexpControl(code); 573 | i++; 574 | } else { 575 | throw new Error( 576 | `invalid unicode escape,unexpect control character[${i}]:\\c${code}`, 577 | ); 578 | } 579 | } else { 580 | if (/[A-Za-z]/.test(code)) { 581 | target = new RegexpControl(code); 582 | i++; 583 | } else { 584 | // treat it with \ and character c 585 | target = new RegexpChar('\\'); 586 | i--; 587 | } 588 | } 589 | } else if (next === 'p' || next === 'P') { 590 | // unicode categories/script/block 591 | if (hasFlagU) { 592 | // must have `u` flag 593 | if (typeof ReRegExp.UPCFactory !== 'function') { 594 | throw new Error( 595 | `You must set the ReRegExp.UPCFactory before you use the unicode category.`, 596 | ); 597 | } 598 | target = new RegexpUnicodeCategory(next); 599 | const matchedNum: number = target.untilEnd(context.slice(i)); 600 | if (matchedNum === 0) { 601 | throw new Error( 602 | `Invalid unicode category syntax at ${i}: \\${next}`, 603 | ); 604 | } else { 605 | i += matchedNum; 606 | } 607 | } else { 608 | // take it as a translate `p` or `P` 609 | target = new RegexpTranslateChar(`\\${next}`); 610 | } 611 | } else if (['d', 'D', 'w', 'W', 's', 'S', 'b', 'B'].includes(next)) { 612 | // charsets 613 | target = new RegexpCharset(input); 614 | } else if (['t', 'r', 'n', 'f', 'v'].includes(next)) { 615 | // print chars 616 | target = new RegexpPrint(input); 617 | } else if (/^(\d+)/.test(nextAll)) { 618 | const no = RegExp.$1; 619 | if (curSet) { 620 | // in set, "\" + \d will parse as octal,max 0377 621 | if (octalRule.test(no)) { 622 | const octal = RegExp.$1; 623 | target = new RegexpOctal(`\\${octal}`); 624 | i += octal.length - 1; 625 | } else { 626 | target = new RegexpTranslateChar(`\\${no.charAt(0)}`); 627 | } 628 | } else { 629 | // reference 630 | if (no.charAt(0) === '0') { 631 | if (no.length === 1) { 632 | // \0 633 | target = new RegexpNull(); 634 | } else { 635 | if (+no.charAt(1) > 7) { 636 | // \08 \09 637 | target = new RegexpNull(); 638 | } else { 639 | // \01 640 | const octal = 641 | no.length >= 3 && +no.charAt(2) <= 7 642 | ? no.slice(1, 3) 643 | : no.charAt(1); 644 | target = new RegexpOctal(`\\0${octal}`); 645 | i += octal.length; 646 | } 647 | } 648 | } else { 649 | i += no.length - 1; 650 | if (+no <= captureGroups.length) { 651 | target = new RegexpReference(`\\${no}`); 652 | const refGroup = captureGroups[+no - 1]; 653 | refGroups[no] = refGroup; 654 | if (refGroup.isAncestorOf(lastGroup)) { 655 | target.ref = null; 656 | } else { 657 | target.ref = refGroup; 658 | } 659 | } else { 660 | // may be reference, or octal number, or digits 661 | target = new RegexpRefOrNumber(`\\${no}`); 662 | refOrNumbers.push(target); 663 | } 664 | } 665 | } 666 | } else if (next === 'k' && /^<([^>]+?)>/.test(context.slice(i))) { 667 | const name = RegExp.$1; 668 | if (!namedCaptures[name]) { 669 | throw new Error(`Invalid named capture referenced:${name}`); 670 | } else { 671 | i += name.length + 2; 672 | const refGroup = namedCaptures[name]; 673 | target = new RegexpReference(`\\${refGroup.captureIndex}`, name); 674 | if (refGroup.isAncestorOf(lastGroup)) { 675 | target.ref = null; 676 | } else { 677 | target.ref = refGroup; 678 | } 679 | } 680 | } else { 681 | // charsets 682 | target = new RegexpTranslateChar(input); 683 | } 684 | break; 685 | // match group begin "(" 686 | case s.groupBegin: 687 | const isLookaround = lookaroundRule.test(nextAll); 688 | if (isLookaround) { 689 | const lookType = RegExp.$1; 690 | target = new RegexpLookaround(lookType); 691 | special = new RegexpSpecial('lookaroundBegin'); 692 | this.hasLookaround = true; 693 | i += lookType.length; 694 | } else { 695 | target = new RegexpGroup(); 696 | special = new RegexpSpecial('groupBegin'); 697 | } 698 | if (!isLookaround) { 699 | target = target as RegexpGroup; 700 | // get capture info 701 | if (captureRule.test(nextAll)) { 702 | const { $1: all, $2: captureName } = RegExp; 703 | if (all === '?:') { 704 | // do nothing, captureIndex = 0 by default 705 | } else { 706 | // named group 707 | target.captureIndex = ++groupCaptureIndex; 708 | target.captureName = captureName; 709 | namedCaptures[captureName] = target; 710 | } 711 | i += all.length; 712 | } else { 713 | target.captureIndex = ++groupCaptureIndex; 714 | } 715 | if (target.captureIndex > 0) { 716 | captureGroups.push(target); 717 | } 718 | } 719 | break; 720 | // match group end ")" 721 | case s.groupEnd: 722 | if (nestQueues.length) { 723 | const curNest = nestQueues.pop(); 724 | const last = ( 725 | curNest.type === 'group' ? groups : lookarounds 726 | ).pop(); 727 | last.isComplete = true; 728 | special = new RegexpSpecial(`${curNest.type}End`); 729 | special.parent = last; 730 | } else { 731 | throw new Error(`unmatched ${char},you mean "\\${char}"?`); 732 | } 733 | break; 734 | // match group splitor "|" 735 | case s.groupSplitor: 736 | const group = getLastItem(groups); 737 | if (!group) { 738 | const rootGroup = new RegexpGroup(); 739 | rootGroup.isRoot = true; 740 | rootGroup.addRootItem(queues.slice(1)); 741 | queues.splice(1, 0, rootGroup); 742 | groups.push(rootGroup); 743 | } else { 744 | group.addNewGroup(); 745 | } 746 | special = new RegexpSpecial('groupSplitor'); 747 | break; 748 | // match set begin "[" 749 | case s.setBegin: 750 | if (/^\\b]/.test(nextAll)) { 751 | target = new RegexpBackspace(); 752 | i += 3; 753 | } else { 754 | curSet = new RegexpSet(); 755 | if (nextAll.charAt(0) === '^') { 756 | curSet.reverse = true; 757 | i += 1; 758 | } 759 | addToQueue(curSet); 760 | addToGroupOrLookaround(curSet); 761 | special = new RegexpSpecial('setBegin'); 762 | special.parent = curSet; 763 | } 764 | break; 765 | // match set end "]" 766 | case s.setEnd: 767 | if (curSet) { 768 | curSet.isComplete = true; 769 | special = new RegexpSpecial('setEnd'); 770 | special.parent = curSet; 771 | curSet = null; 772 | } else { 773 | target = new RegexpChar(char); 774 | } 775 | break; 776 | // match range splitor "-" 777 | case s.rangeSplitor: 778 | if (curSet) { 779 | if (lastQueue.codePoint < 0) { 780 | // such as [-aaa] [^-a] [\s-a] 781 | target = new RegexpChar(char); 782 | } else { 783 | const nextChar = nextAll.charAt(0); 784 | if (nextChar === s.setEnd) { 785 | curSet.isComplete = true; 786 | curSet = null; 787 | i += 1; 788 | } else { 789 | curSet.pop(); 790 | curRange = new RegexpRange(); 791 | curRange.parent = curSet; 792 | queues.pop().parent = curRange; 793 | addToQueue(curRange, lastQueue); 794 | special = new RegexpSpecial('rangeSplitor'); 795 | special.parent = curRange; 796 | } 797 | } 798 | } else { 799 | target = new RegexpChar(char); 800 | } 801 | break; 802 | // match times 803 | case s.multipleBegin: 804 | case s.optional: 805 | case s.multiple: 806 | case s.leastOne: 807 | target = 808 | char === s.multipleBegin 809 | ? new RegexpTimesMulti() 810 | : new RegexpTimesQuantifiers( 811 | ...(this.config.maxRepeat ? [this.config.maxRepeat] : []), 812 | ); 813 | const num = target.untilEnd(context.slice(i - 1)); 814 | if (num > 0) { 815 | const type = 816 | lastQueue instanceof RegexpSpecial 817 | ? lastQueue.special 818 | : lastQueue.type; 819 | const error = `[${ 820 | lastQueue.input 821 | }]nothing to repeat[index:${i}]:${context.slice( 822 | i - 1, 823 | i - 1 + num, 824 | )}`; 825 | if (isWrongRepeat(type, lastQueue as RegexpCharset)) { 826 | throw new Error(error); 827 | } else { 828 | i += num - 1; 829 | if (type === 'groupEnd' || type === 'setEnd') { 830 | target.target = lastQueue.parent; 831 | } else { 832 | target.target = lastQueue; 833 | } 834 | } 835 | } else { 836 | target = new RegexpChar(char); 837 | } 838 | break; 839 | // match any . 840 | case s.matchAny: 841 | target = new RegexpAny(genAnyCharacterHandle()); 842 | break; 843 | // match ^$ 844 | case s.beginWith: 845 | case s.endWith: 846 | target = new RegexpAnchor(char); 847 | break; 848 | // match / 849 | case s.delimiter: 850 | throw new Error(`unexpected pattern end delimiter:"/${nextAll}"`); 851 | // default 852 | default: 853 | target = new RegexpChar(char); 854 | } 855 | // push target to queues 856 | if (target) { 857 | const cur = target as RegexpPart; 858 | addToQueue(cur); 859 | if (curRange) { 860 | if (target.codePoint < 0) { 861 | // not char,translateChar,control char,or octal 862 | const [, first, , second] = queues.splice(-4, 4); 863 | const middle = new RegexpChar('-'); 864 | curSet.pop(); 865 | [first, middle, second].map((item: RegexpPart) => { 866 | item.parent = curSet; 867 | addToQueue(item); 868 | return item; 869 | }); 870 | } else { 871 | target.parent = curRange; 872 | } 873 | curRange = null; 874 | } else if (curSet) { 875 | cur.parent = curSet; 876 | } else { 877 | addToGroupOrLookaround(cur); 878 | } 879 | } 880 | // add special 881 | if (special) { 882 | if (target) { 883 | special.parent = target; 884 | } 885 | addToQueue(special); 886 | } 887 | } 888 | // parse reference or number 889 | if (refOrNumbers.length) { 890 | const replace = ( 891 | lists: RegexpPart[], 892 | search: RegexpPart, 893 | rep: RegexpPart[], 894 | ) => { 895 | let idx = 0; 896 | let finded = false; 897 | for (const len = lists.length; idx < len; idx++) { 898 | if (search === lists[idx]) { 899 | finded = true; 900 | break; 901 | } 902 | } 903 | if (finded) { 904 | lists.splice(idx, 1, ...rep); 905 | } 906 | }; 907 | const refLen = captureGroups.length; 908 | refOrNumbers.map((item: RegexpRefOrNumber) => { 909 | const strNum = item.input.slice(1); 910 | const total = strNum.length; 911 | let matchLen = 0; 912 | let instance: RegexpPart; 913 | if (strNum.charAt(0) !== '0' && +strNum <= refLen) { 914 | instance = new RegexpReference(item.input); 915 | (instance as RegexpReference).ref = null; 916 | matchLen = total; 917 | } else { 918 | if (/^([1-3][0-7]{0,2}|[4-7][0-7]?)/.test(strNum)) { 919 | const octal = RegExp.$1; 920 | instance = new RegexpOctal(`\\${octal}`); 921 | matchLen += octal.length; 922 | } else { 923 | instance = new RegexpTranslateChar(`\\${strNum.charAt(0)}`); 924 | matchLen += 1; 925 | } 926 | } 927 | instance.linkParent = item.parent; 928 | const res: RegexpPart[] = [instance]; 929 | while (matchLen < total) { 930 | const curChar = new RegexpChar(strNum.charAt(matchLen++)); 931 | curChar.linkParent = item.parent; 932 | res.push(curChar); 933 | } 934 | if (item.parent) { 935 | replace(item.parent.queues, item, res); 936 | } 937 | replace(queues, item, res); 938 | }); 939 | } 940 | // if root group,set completed when parse end 941 | if ( 942 | queues.length > 1 && 943 | queues[1].type === 'group' && 944 | (queues[1] as RegexpGroup).isRoot === true 945 | ) { 946 | queues[1].isComplete = true; 947 | } 948 | // check the queues whether if completed and saved the root queues 949 | const rootQueues: RegexpPart[] = []; 950 | let ruleInput = ''; 951 | queues.every((queue) => { 952 | if (!queue.isComplete) { 953 | throw new Error( 954 | `the regexp segment ${queue.type} is not completed:${queue.input}`, 955 | ); 956 | } 957 | if (queue.parent === null) { 958 | rootQueues.push(queue); 959 | ruleInput += queue.getRuleInput(); 960 | } 961 | return true; 962 | }); 963 | this.ruleInput = ruleInput; 964 | this.rootQueues = rootQueues; 965 | this.queues = queues; 966 | } 967 | // check if has repeat flags 968 | private checkFlags(): void | never { 969 | const { flags } = this; 970 | const len = flags.length; 971 | if (len === 0) { 972 | return; 973 | } 974 | if (len > Object.keys(flagsBinary).length) { 975 | throw new Error( 976 | `The rule may has repeated or unrecognized flags, please check.`, 979 | ); 980 | } 981 | const first = flags[0]; 982 | let totalFlagBinary = flagsBinary[first]; 983 | const flagsHash: FlagsHash = { 984 | [first]: true, 985 | }; 986 | for (let i = 1, j = flags.length; i < j; i++) { 987 | const flag = flags[i]; 988 | const binary = flagsBinary[flag]; 989 | if ((totalFlagBinary & binary) === 0) { 990 | totalFlagBinary += binary; 991 | flagsHash[flag] = true; 992 | } else { 993 | throw new Error(`wrong flag[${i}]:${flag}`); 994 | } 995 | } 996 | this.flagsHash = flagsHash; 997 | this.totalFlagBinary = totalFlagBinary; 998 | if (flagsHash.y || flagsHash.m || flagsHash.g) { 999 | // eslint-disable-next-line no-console 1000 | console.warn( 1001 | `the flags of 'g','m','y' will ignore,but you can set flags such as 'i','u','s'`, 1002 | ); 1003 | } 1004 | } 1005 | // check if has the flag 1006 | private hasFlag(flag: Flag) { 1007 | const { totalFlagBinary } = this; 1008 | const binary = flagsBinary[flag]; 1009 | return binary && (binary & totalFlagBinary) !== 0; 1010 | } 1011 | // flags hash 1012 | public getFlagsHash(): FlagsHash { 1013 | return this.flagsHash; 1014 | } 1015 | } 1016 | // make charset 1017 | export type CharsetType = 'd' | 'w' | 's'; 1018 | export type CharsetNegatedType = 'D' | 'W' | 'S'; 1019 | export type CharsetWordType = 'b' | 'B'; 1020 | export type CharsetAllType = CharsetType | CharsetNegatedType | CharsetWordType; 1021 | export type CharsetCacheType = 1022 | | CharsetNegatedType 1023 | | typeof SYMBOL_DOTALL 1024 | | typeof SYMBOL_ALL; 1025 | export type CharsetCache = { 1026 | [key in CharsetCacheType]?: CodePointResult; 1027 | }; 1028 | export type CodePointRangeItem = [number, number] | [number]; 1029 | export type CodePointRanges = Array; 1030 | export type CodePointData = { 1031 | [key in CharsetType]: T; 1032 | }; 1033 | export interface CodePointResult { 1034 | ranges: CodePointRanges; 1035 | totals: number[]; 1036 | } 1037 | export interface NumberRange { 1038 | min: number; 1039 | max: number; 1040 | } 1041 | 1042 | /** 1043 | * 1044 | * 1045 | * @export 1046 | * @abstract 1047 | * @class RegexpPart 1048 | */ 1049 | 1050 | export abstract class RegexpPart { 1051 | public queues: RegexpPart[] = []; 1052 | public codePoint = -1; 1053 | public abstract readonly type: string; 1054 | protected parserInstance: ReRegExp; 1055 | protected min = 1; 1056 | protected max = 1; 1057 | protected dataConf: Partial = {}; 1058 | protected buildForTimes = false; 1059 | protected curParent: RegexpPart = null; 1060 | protected matchNothing = false; 1061 | protected completed = true; 1062 | constructor(public input: string = '') {} 1063 | // set/get the ref parser 1064 | get parser(): ReRegExp { 1065 | return this.parserInstance; 1066 | } 1067 | set parser(parser: ReRegExp) { 1068 | this.parserInstance = parser; 1069 | } 1070 | // get all possible count 1071 | get count(): number { 1072 | // if (this.isMatchNothing) { 1073 | // return 0; 1074 | // } 1075 | return this.getCodePointCount(); 1076 | } 1077 | // parent getter and setter 1078 | get parent(): RegexpPart { 1079 | return this.curParent; 1080 | } 1081 | set parent(value: RegexpPart) { 1082 | this.curParent = value; 1083 | // ignore special chars 1084 | if (this.type !== 'special') { 1085 | value.add(this); 1086 | } 1087 | } 1088 | // set linked parent 1089 | set linkParent(value: RegexpPart) { 1090 | this.curParent = value; 1091 | } 1092 | // isComplete getter and setter 1093 | get isComplete(): boolean { 1094 | return this.completed; 1095 | } 1096 | set isComplete(value: boolean) { 1097 | this.completed = value; 1098 | } 1099 | // isMatchNothing getter and setter 1100 | get isMatchNothing(): boolean { 1101 | return this.matchNothing; 1102 | } 1103 | set isMatchNothing(value: boolean) { 1104 | this.matchNothing = value; 1105 | if (this.parent) { 1106 | this.parent.isMatchNothing = value; 1107 | } 1108 | } 1109 | public setRange(options: NumberRange): void { 1110 | Object.keys(options).forEach((key: keyof NumberRange) => { 1111 | this[key] = options[key]; 1112 | }); 1113 | } 1114 | public add(target: RegexpPart | RegexpPart[]): void { 1115 | this.queues = this.queues.concat(target); 1116 | } 1117 | // 1118 | public pop(): RegexpPart { 1119 | return this.queues.pop(); 1120 | } 1121 | public build(conf: BuildConfData): string | never { 1122 | const { min, max } = this; 1123 | let result = ''; 1124 | if (min === 0 && max === 0) { 1125 | // do nothing 1126 | } else { 1127 | let total = min + Math.floor(Math.random() * (max - min + 1)); 1128 | if (total !== 0) { 1129 | const makeOnce = () => { 1130 | let cur = this.prebuild(conf); 1131 | if (conf.flags && conf.flags.i) { 1132 | cur = isOptional() 1133 | ? isOptional() 1134 | ? cur.toLowerCase() 1135 | : cur.toUpperCase() 1136 | : cur; 1137 | } 1138 | return cur; 1139 | }; 1140 | if (!this.buildForTimes) { 1141 | result = makeOnce().repeat(total); 1142 | } else { 1143 | while (total--) { 1144 | result += makeOnce(); 1145 | } 1146 | } 1147 | } 1148 | } 1149 | this.dataConf = conf; 1150 | this.setDataConf(conf, result); 1151 | return result; 1152 | } 1153 | // parse until end 1154 | public untilEnd(_context: string): number | void { 1155 | // will override by sub class 1156 | } 1157 | // set data conf 1158 | public setDataConf(_conf: BuildConfData, _result: string): void { 1159 | // will override by sub class 1160 | } 1161 | 1162 | // check if this is the ancestor of the target 1163 | public isAncestorOf(target: RegexpPart): boolean { 1164 | do { 1165 | if (target === this) { 1166 | return true; 1167 | } 1168 | } while ((target = target?.parent)); 1169 | return false; 1170 | } 1171 | // get last input, remove named group's name.e.g 1172 | public getRuleInput(_parseReference?: boolean): string { 1173 | if (this.queues.length) { 1174 | return this.buildRuleInputFromQueues(); 1175 | } else { 1176 | return this.input; 1177 | } 1178 | } 1179 | // build rule input from queues. 1180 | protected buildRuleInputFromQueues(): string { 1181 | return this.queues.reduce((result: string, next: RegexpPart) => { 1182 | return result + next.getRuleInput(); 1183 | }, ''); 1184 | } 1185 | // build from regex part 1186 | protected prebuild(conf: BuildConfData): string | never { 1187 | if (this.queues.length) { 1188 | return this.queues.reduce((res, cur: RegexpPart) => { 1189 | return res + cur.build(conf); 1190 | }, ''); 1191 | } else { 1192 | return ''; 1193 | } 1194 | } 1195 | 1196 | // codePointCount 1197 | protected getCodePointCount(): number { 1198 | return 1; 1199 | } 1200 | } 1201 | 1202 | export abstract class RegexpEmpty extends RegexpPart { 1203 | constructor(input?: string) { 1204 | super(input); 1205 | this.min = 0; 1206 | this.max = 0; 1207 | } 1208 | } 1209 | 1210 | export abstract class RegexpOrigin extends RegexpPart { 1211 | protected prebuild(): string { 1212 | return this.input; 1213 | } 1214 | } 1215 | 1216 | export class RegexpReference extends RegexpPart { 1217 | public readonly type = 'reference'; 1218 | public ref: RegexpGroup | null = null; 1219 | public index: number; 1220 | constructor(input: string, public name: string = '') { 1221 | super(input); 1222 | this.index = Number(`${input.slice(1)}`); 1223 | } 1224 | protected prebuild(conf: BuildConfData): string { 1225 | const { ref } = this; 1226 | if (ref === null) { 1227 | return ''; 1228 | } else { 1229 | const { captureIndex } = ref as RegexpGroup; 1230 | const { captureGroupData } = conf; 1231 | return captureGroupData.hasOwnProperty(captureIndex) 1232 | ? captureGroupData[captureIndex] 1233 | : ''; 1234 | } 1235 | } 1236 | } 1237 | 1238 | export class RegexpSpecial extends RegexpEmpty { 1239 | public readonly type = 'special'; 1240 | constructor(public readonly special: string) { 1241 | super(); 1242 | } 1243 | } 1244 | 1245 | export class RegexpLookaround extends RegexpEmpty { 1246 | public readonly type = 'lookaround'; 1247 | public readonly looktype: string; 1248 | constructor(input: string) { 1249 | super(); 1250 | this.looktype = input; 1251 | this.isComplete = false; 1252 | } 1253 | public getRuleInput(): string { 1254 | return '(' + this.looktype + this.buildRuleInputFromQueues() + ')'; 1255 | } 1256 | } 1257 | 1258 | export class RegexpAny extends RegexpPart { 1259 | public readonly type = 'any'; 1260 | constructor(public handle?: () => string) { 1261 | super('.'); 1262 | this.buildForTimes = true; 1263 | } 1264 | // generate build handle 1265 | public static genDiyCharactersHandle( 1266 | conf: ParserConf & { flags: FlagsHash }, 1267 | ): () => string { 1268 | const charactersOfAny = conf.charactersOfAny || ReRegExp.charactersOfAny; 1269 | if (Array.isArray(charactersOfAny)) { 1270 | if (charactersOfAny.length === 0) { 1271 | throw new Error( 1272 | "The user defined 'charactersOfAny' should not be an empty range array.", 1273 | ); 1274 | } 1275 | let allCharacters: CodePointRanges; 1276 | if (!Array.isArray(charactersOfAny[0])) { 1277 | allCharacters = [charactersOfAny as CodePointRangeItem]; 1278 | } else { 1279 | allCharacters = charactersOfAny as CodePointRanges; 1280 | } 1281 | const totals: number[] = []; 1282 | let total = 0; 1283 | for (const [start, end = start] of allCharacters) { 1284 | total += end - start + 1; 1285 | totals.push(total); 1286 | } 1287 | const result: CodePointResult = { 1288 | ranges: allCharacters, 1289 | totals, 1290 | }; 1291 | return () => charH.makeOne(result); 1292 | } else if (typeof charactersOfAny === 'function') { 1293 | return () => 1294 | (charactersOfAny as (flags?: FlagsHash) => string)(conf.flags); 1295 | } 1296 | } 1297 | // prebuild 1298 | protected prebuild(conf: BuildConfData): string { 1299 | const handle = this.handle || (() => charH.make('.', conf.flags)); 1300 | return handle(); 1301 | } 1302 | } 1303 | 1304 | export class RegexpNull extends RegexpPart { 1305 | public readonly type = 'null'; 1306 | constructor() { 1307 | super('\\0'); 1308 | } 1309 | protected prebuild(): string { 1310 | return '\x00'; 1311 | } 1312 | } 1313 | 1314 | export class RegexpBackspace extends RegexpPart { 1315 | public readonly type = 'backspace'; 1316 | constructor() { 1317 | super('[\\b]'); 1318 | } 1319 | protected prebuild(): string { 1320 | return '\u0008'; 1321 | } 1322 | } 1323 | 1324 | export class RegexpBegin extends RegexpEmpty { 1325 | public readonly type = 'begin'; 1326 | } 1327 | 1328 | export class RegexpControl extends RegexpPart { 1329 | public readonly type = 'control'; 1330 | constructor(input: string) { 1331 | super(`\\c${input}`); 1332 | this.codePoint = parseInt(input.charCodeAt(0).toString(2).slice(-5), 2); 1333 | } 1334 | protected prebuild(): string { 1335 | return String.fromCharCode(this.codePoint); 1336 | } 1337 | } 1338 | 1339 | export class RegexpCharset extends RegexpPart { 1340 | public readonly type = 'charset'; 1341 | public readonly charset: CharsetAllType; 1342 | constructor(input: string) { 1343 | super(input); 1344 | this.charset = this.input.slice(-1) as CharsetAllType; 1345 | this.buildForTimes = true; 1346 | } 1347 | protected prebuild(conf: BuildConfData): string { 1348 | const { charset } = this; 1349 | if (charset === 'b' || charset === 'B') { 1350 | // eslint-disable-next-line no-console 1351 | console.warn('The \\b or \\B'); 1352 | return ''; 1353 | } else { 1354 | // make the charset 1355 | return charH.make( 1356 | charset as CharsetType | CharsetNegatedType, 1357 | conf.flags, 1358 | ); 1359 | } 1360 | } 1361 | // charset's maybe character count 1362 | protected getCodePointCount(): number { 1363 | const { parser, charset } = this; 1364 | const { totals } = charH.getCharsetInfo( 1365 | charset as CharsetType | CharsetNegatedType, 1366 | parser.getFlagsHash(), 1367 | ); 1368 | return getLastItem(totals); 1369 | } 1370 | } 1371 | 1372 | export class RegexpPrint extends RegexpPart { 1373 | public readonly type = 'print'; 1374 | protected prebuild(): string { 1375 | return new Function('', `return '${this.input}'`)(); 1376 | } 1377 | } 1378 | 1379 | export class RegexpAnchor extends RegexpEmpty { 1380 | public readonly type = 'anchor'; 1381 | public anchor: string; 1382 | constructor(input: string) { 1383 | super(input); 1384 | this.anchor = input; 1385 | // eslint-disable-next-line no-console 1386 | console.warn(`the anchor of "${this.input}" will ignore.`); 1387 | } 1388 | } 1389 | 1390 | export class RegexpChar extends RegexpOrigin { 1391 | public readonly type = 'char'; 1392 | constructor(input: string) { 1393 | super(input); 1394 | this.codePoint = input.codePointAt(0); 1395 | } 1396 | } 1397 | 1398 | export class RegexpTranslateChar extends RegexpOrigin { 1399 | public readonly type = 'translate'; 1400 | constructor(input: string) { 1401 | super(input); 1402 | this.codePoint = input.slice(-1).codePointAt(0); 1403 | } 1404 | protected prebuild(): string { 1405 | return this.input.slice(-1); 1406 | } 1407 | } 1408 | 1409 | export class RegexpOctal extends RegexpPart { 1410 | public readonly type = 'octal'; 1411 | constructor(input: string) { 1412 | super(input); 1413 | this.codePoint = Number(`0o${input.slice(1)}`); 1414 | } 1415 | protected prebuild(): string { 1416 | return String.fromCodePoint(this.codePoint); 1417 | } 1418 | } 1419 | 1420 | export class RegexpRefOrNumber extends RegexpPart { 1421 | public readonly type = 'refornumber'; 1422 | constructor(input: string) { 1423 | super(input); 1424 | } 1425 | protected prebuild(): never { 1426 | throw new Error( 1427 | `the "${this.input}" must parse again,either reference or number`, 1428 | ); 1429 | } 1430 | } 1431 | 1432 | export abstract class RegexpTimes extends RegexpPart { 1433 | public readonly type = 'times'; 1434 | protected readonly maxNum: number = ReRegExp.maxRepeat; 1435 | protected greedy = true; 1436 | protected abstract readonly rule: RegExp; 1437 | protected minRepeat = 0; 1438 | protected maxRepeat = 0; 1439 | constructor() { 1440 | super(); 1441 | this.isComplete = false; 1442 | } 1443 | set target(target: RegexpPart) { 1444 | target.setRange({ 1445 | min: this.minRepeat, 1446 | max: this.maxRepeat, 1447 | }); 1448 | } 1449 | public untilEnd(context: string): number { 1450 | if (this.rule.test(context)) { 1451 | const all = RegExp.$1; 1452 | this.isComplete = true; 1453 | this.input = all; 1454 | this.parse(); 1455 | return all.length; 1456 | } 1457 | return 0; 1458 | } 1459 | public abstract parse(): void; 1460 | } 1461 | 1462 | export class RegexpTimesMulti extends RegexpTimes { 1463 | protected rule = /^(\{(\d+)(,(\d*))?}(\??))/; 1464 | public parse(): void { 1465 | const { $2: min, $3: code, $4: max, $5: optional } = RegExp; 1466 | this.greedy = optional !== '?'; 1467 | this.minRepeat = parseInt(min, 10); 1468 | this.maxRepeat = Number(max) 1469 | ? parseInt(max, 10) 1470 | : code 1471 | ? this.minRepeat + this.maxNum * 2 1472 | : this.minRepeat; 1473 | if (this.maxRepeat < this.minRepeat) { 1474 | throw new Error( 1475 | `wrong quantifier: {${this.minRepeat}, ${this.maxRepeat}}`, 1476 | ); 1477 | } 1478 | } 1479 | } 1480 | 1481 | export class RegexpTimesQuantifiers extends RegexpTimes { 1482 | protected rule = /^(\*\?|\+\?|\?\?|\*|\+|\?)/; 1483 | constructor(protected readonly maxNum: number = ReRegExp.maxRepeat) { 1484 | super(); 1485 | } 1486 | public parse(): void { 1487 | const all = RegExp.$1; 1488 | this.greedy = all.length === 1; 1489 | switch (all.charAt(0)) { 1490 | case '*': 1491 | this.maxRepeat = this.maxNum; 1492 | break; 1493 | case '+': 1494 | this.minRepeat = 1; 1495 | this.maxRepeat = this.maxNum; 1496 | break; 1497 | case '?': 1498 | this.maxRepeat = 1; 1499 | break; 1500 | } 1501 | } 1502 | } 1503 | 1504 | export class RegexpSet extends RegexpPart { 1505 | public readonly type = 'set'; 1506 | public reverse = false; 1507 | private isMatchAnything = false; 1508 | private codePointResult: CodePointResult = null; 1509 | constructor() { 1510 | super(); 1511 | this.isComplete = false; 1512 | this.buildForTimes = true; 1513 | } 1514 | // override set parser 1515 | set parser(parser: ReRegExp) { 1516 | this.parserInstance = parser; 1517 | this.makeCodePointResult(); 1518 | } 1519 | get parser(): ReRegExp { 1520 | return this.parserInstance; 1521 | } 1522 | // isComplete 1523 | get isComplete(): boolean { 1524 | return this.completed; 1525 | } 1526 | set isComplete(value: boolean) { 1527 | this.completed = value; 1528 | if (value === true) { 1529 | const isEmptyQueue = this.queues.length === 0; 1530 | if (isEmptyQueue) { 1531 | if (this.reverse) { 1532 | this.isMatchAnything = true; 1533 | } else { 1534 | this.isMatchNothing = true; 1535 | } 1536 | } else { 1537 | this.makeCodePointResult(); 1538 | } 1539 | } 1540 | } 1541 | public getRuleInput(): string { 1542 | return ( 1543 | '[' + (this.reverse ? '^' : '') + this.buildRuleInputFromQueues() + ']' 1544 | ); 1545 | } 1546 | protected prebuild(conf: BuildConfData): string { 1547 | if (this.isMatchAnything) { 1548 | return new RegexpAny().build(conf); 1549 | } 1550 | const { queues } = this; 1551 | if (this.reverse) { 1552 | return charH.makeOne(this.codePointResult); 1553 | } 1554 | let index: number; 1555 | if (conf.extractSetAverage) { 1556 | let total = 0; 1557 | const totals = queues.map((queue) => (total = total + queue.count)); 1558 | index = getRandomTotalIndex(totals).index; 1559 | } else { 1560 | index = makeRandom(0, queues.length - 1); 1561 | } 1562 | return this.queues[index].build(conf) as string; 1563 | } 1564 | // set code point result 1565 | protected makeCodePointResult(): void { 1566 | if (!this.reverse || !this.parser || !this.isComplete) return; 1567 | // with begin ^, reverse the sets 1568 | const { queues, parser } = this; 1569 | const flags = parser.getFlagsHash(); 1570 | if ( 1571 | queues.length === 1 && 1572 | queues[0].type === 'charset' && 1573 | ['w', 's', 'd'].includes( 1574 | (queues[0] as RegexpCharset).charset.toLowerCase(), 1575 | ) 1576 | ) { 1577 | const charCode = 1578 | (queues[0] as RegexpCharset).charset.charCodeAt(0) ^ 0b100000; 1579 | const charset = String.fromCharCode(charCode) as 1580 | | CharsetType 1581 | | CharsetNegatedType; 1582 | this.codePointResult = charH.getCharsetInfo(charset, flags); 1583 | } else { 1584 | const ranges = queues.reduce((res: CodePointRanges, item: RegexpPart) => { 1585 | const { type } = item; 1586 | let cur: CodePointRanges; 1587 | if (type === 'charset') { 1588 | const charset = (item as RegexpCharset).charset as CharsetAllType; 1589 | if (charset === 'b' || charset === 'B') { 1590 | // eslint-disable-next-line no-console 1591 | console.warn('the charset \\b or \\B will ignore'); 1592 | cur = []; 1593 | } else { 1594 | cur = charH.getCharsetInfo(charset, flags).ranges.slice(0); 1595 | } 1596 | } else if (type === 'range') { 1597 | cur = [ 1598 | (item as RegexpRange).queues.map((e: RegexpPart) => { 1599 | return e.codePoint; 1600 | }) as CodePointRangeItem, 1601 | ]; 1602 | } else { 1603 | cur = [[item.codePoint]]; 1604 | } 1605 | return res.concat(cur); 1606 | }, []); 1607 | ranges.push([0xd800, 0xdfff], flags.u ? [0x110000] : [0x10000]); 1608 | ranges.sort((a: number[], b: number[]) => { 1609 | return b[0] > a[0] ? -1 : b[0] === a[0] ? (b[1] > a[1] ? 1 : -1) : 1; 1610 | }); 1611 | const negated: CodePointRanges = []; 1612 | let point = 0; 1613 | for (let i = 0, j = ranges.length; i < j; i++) { 1614 | const cur = ranges[i]; 1615 | const [start] = cur; 1616 | const end = cur[1] || start; 1617 | if (point < start) { 1618 | negated.push(point + 1 === start ? [point] : [point, start - 1]); 1619 | } 1620 | point = Math.max(end + 1, point); 1621 | } 1622 | if (negated.length === 0) { 1623 | this.isMatchNothing = true; 1624 | } else { 1625 | let total = 0; 1626 | const totals = negated.map((item: number[]) => { 1627 | if (item.length === 1) { 1628 | total += 1; 1629 | } else { 1630 | total += item[1] - item[0] + 1; 1631 | } 1632 | return total; 1633 | }); 1634 | this.codePointResult = { totals, ranges: negated }; 1635 | } 1636 | } 1637 | } 1638 | } 1639 | 1640 | export class RegexpRange extends RegexpPart { 1641 | public readonly type = 'range'; 1642 | constructor() { 1643 | super(); 1644 | this.isComplete = false; 1645 | } 1646 | public add(target: RegexpPart): void | never { 1647 | super.add(target); 1648 | if (this.queues.length === 2) { 1649 | this.isComplete = true; 1650 | const [prev, next] = this.queues; 1651 | if (prev.codePoint > next.codePoint) { 1652 | throw new Error( 1653 | `invalid range:${prev.getRuleInput()}-${next.getRuleInput()}`, 1654 | ); 1655 | } 1656 | } 1657 | } 1658 | public getRuleInput(): string { 1659 | const [prev, next] = this.queues; 1660 | return prev.getRuleInput() + '-' + next.getRuleInput(); 1661 | } 1662 | protected prebuild(): string { 1663 | const [prev, next] = this.queues; 1664 | const min = prev.codePoint; 1665 | const max = next.codePoint; 1666 | return String.fromCodePoint(makeRandom(min, max)); 1667 | } 1668 | // the range's possible character counts 1669 | protected getCodePointCount(): number { 1670 | const [prev, next] = this.queues; 1671 | const min = prev.codePoint; 1672 | const max = next.codePoint; 1673 | return max - min + 1; 1674 | } 1675 | } 1676 | 1677 | export abstract class RegexpHexCode extends RegexpOrigin { 1678 | public readonly type = 'hexcode'; 1679 | protected abstract rule: RegExp; 1680 | protected abstract codeType: string; 1681 | public untilEnd(context: string): number { 1682 | const { rule, codeType } = this; 1683 | if (rule.test(context)) { 1684 | const { $1: all, $2: codePoint } = RegExp; 1685 | const lastCode = codePoint || all; 1686 | this.codePoint = Number(`0x${lastCode}`); 1687 | if (this.codePoint > 0x10ffff) { 1688 | throw new Error( 1689 | `invalid unicode code point:\\u{${lastCode}},can not great than 0x10ffff`, 1690 | ); 1691 | } 1692 | this.input = `\\${codeType}${all}`; 1693 | return all.length; 1694 | } 1695 | return 0; 1696 | } 1697 | } 1698 | 1699 | export class RegexpUnicode extends RegexpHexCode { 1700 | protected rule = /^([0-9A-Fa-f]{4})/; 1701 | protected codeType = 'u'; 1702 | } 1703 | 1704 | export class RegexpUnicodeAll extends RegexpHexCode { 1705 | protected rule = /^({(0*[0-9A-Fa-f]{1,6})}|[0-9A-Fa-f]{4})/; 1706 | protected codeType = 'u'; 1707 | } 1708 | 1709 | export class RegexpASCII extends RegexpHexCode { 1710 | protected rule = /^([0-9A-Fa-f]{2})/; 1711 | protected codeType = 'x'; 1712 | } 1713 | 1714 | export class RegexpUnicodeCategory extends RegexpPart { 1715 | public type = 'unicode-category'; 1716 | protected data: UPCData; 1717 | protected rule = /^([A-Z]|\{(?:(?:([a-zA-Z_]+)=)?([A-Za-z_]+))})/; 1718 | protected generator: UPCInstance; 1719 | // constructor 1720 | public constructor(private readonly symbol: string) { 1721 | super(); 1722 | } 1723 | // parse until matched 1724 | public untilEnd(context: string): number | never { 1725 | if (this.rule.test(context)) { 1726 | const { $1: all, $2: key, $3: value } = RegExp; 1727 | const { symbol } = this; 1728 | const negate = symbol === 'P'; 1729 | let data: UPCData; 1730 | if (value) { 1731 | data = { 1732 | short: false, 1733 | negate, 1734 | value, 1735 | }; 1736 | if (key) { 1737 | data.key = key; 1738 | } 1739 | } else { 1740 | data = { 1741 | short: true, 1742 | negate, 1743 | value: all, 1744 | }; 1745 | } 1746 | this.data = data; 1747 | const factory = ReRegExp.UPCFactory; 1748 | this.generator = factory(data); 1749 | this.input = `\\${symbol}${all}`; 1750 | return all.length; 1751 | } 1752 | return 0; 1753 | } 1754 | // generate a random character 1755 | protected prebuild(): string { 1756 | return this.generator.generate(); 1757 | } 1758 | } 1759 | 1760 | export class RegexpGroupItem extends RegexpPart { 1761 | public readonly type = 'group-item'; 1762 | constructor(public index: number) { 1763 | super(); 1764 | } 1765 | public getRuleInput(parseReference = false): string { 1766 | return this.queues.reduce((res: string, item: RegexpPart) => { 1767 | let cur: string; 1768 | if ( 1769 | parseReference && 1770 | item.type === 'reference' && 1771 | (item as RegexpReference).ref !== null 1772 | ) { 1773 | cur = (item as RegexpReference).ref.getRuleInput(parseReference); 1774 | } else { 1775 | cur = this.isEndLimitChar(item) 1776 | ? '' 1777 | : item.getRuleInput(parseReference); 1778 | } 1779 | return res + cur; 1780 | }, ''); 1781 | } 1782 | public prebuild(conf: BuildConfData): string { 1783 | return this.queues.reduce((res, queue: RegexpPart) => { 1784 | let cur: string; 1785 | if (this.isEndLimitChar(queue)) { 1786 | // eslint-disable-next-line no-console 1787 | console.warn('the ^ and $ of the regexp will ignore'); 1788 | cur = ''; 1789 | } else { 1790 | cur = queue.build(conf); 1791 | } 1792 | return res + cur; 1793 | }, ''); 1794 | } 1795 | // 1796 | private isEndLimitChar(target: RegexpPart) { 1797 | return target.type === 'anchor'; 1798 | } 1799 | } 1800 | 1801 | export class RegexpGroup extends RegexpPart { 1802 | public readonly type = 'group'; 1803 | public captureIndex = 0; 1804 | public captureName = ''; 1805 | public queues: RegexpGroupItem[] = []; 1806 | public isRoot = false; 1807 | private curGroupItem: RegexpGroupItem = null; 1808 | private curRule: RegExp | null = null; 1809 | constructor() { 1810 | super(); 1811 | this.isComplete = false; 1812 | this.buildForTimes = true; 1813 | this.addNewGroup(); 1814 | } 1815 | get isComplete(): boolean { 1816 | return this.completed; 1817 | } 1818 | set isComplete(value: boolean) { 1819 | this.completed = value; 1820 | if (value === true) { 1821 | this.isMatchNothing = this.queues.every((item: RegexpGroupItem) => { 1822 | return item.isMatchNothing; 1823 | }); 1824 | } 1825 | } 1826 | // add a new group item 1827 | public addNewGroup(): RegexpGroupItem { 1828 | const { queues } = this; 1829 | const groupItem = new RegexpGroupItem(queues.length); 1830 | this.curGroupItem = groupItem; 1831 | groupItem.parent = this; 1832 | return groupItem; 1833 | } 1834 | // add root group item 1835 | public addRootItem(target: RegexpPart[]): void { 1836 | target.map((item: RegexpPart) => { 1837 | if (item.parent === null) { 1838 | item.parent = this.curGroupItem; 1839 | } 1840 | }); 1841 | this.addNewGroup(); 1842 | } 1843 | // 1844 | public addItem(target: RegexpPart): void { 1845 | target.parent = this.curGroupItem; 1846 | } 1847 | // override getRuleInput 1848 | public getRuleInput(parseReference = false): string { 1849 | const { queues: groups, captureIndex, isRoot } = this; 1850 | let result = ''; 1851 | const segs = groups.map((groupItem) => { 1852 | return groupItem.getRuleInput(parseReference); 1853 | }); 1854 | if (captureIndex === 0 && !isRoot) { 1855 | result = '?:' + result; 1856 | } 1857 | result += segs.join('|'); 1858 | return isRoot ? result : `(${result})`; 1859 | } 1860 | // build a rule 1861 | protected buildRule(flags: FlagsHash): RegExp | null { 1862 | if (this.curRule) { 1863 | return this.curRule; 1864 | } else { 1865 | const rule = this.getRuleInput(true); 1866 | const flag = Object.keys(flags).join(''); 1867 | return (this.curRule = new Function('', `return /^${rule}$/${flag}`)()); 1868 | } 1869 | } 1870 | // build string 1871 | protected prebuild(conf: BuildConfData): string { 1872 | const { queues: groups, captureIndex, captureName } = this; 1873 | let result = ''; 1874 | const { flags, namedGroupConf } = conf; 1875 | const groupsLen = groups.length; 1876 | const filterGroups: RegexpGroupItem[] = []; 1877 | const overrideGroups: RegexpGroupItem[] = []; 1878 | const overrideValues: string[][] = []; 1879 | let segNamedGroup: RegexpGroupItem; 1880 | let segNamedValue: string[] = []; 1881 | // special build logic, /(?a|b|c|d)\k/ 1882 | if (captureName && captureName.includes('_') && namedGroupConf) { 1883 | const segs = captureName.split('_'); 1884 | if (segs.length === groupsLen) { 1885 | let hasGroup = false; 1886 | if (typeof namedGroupConf[captureName] === 'object') { 1887 | const conf = namedGroupConf[captureName] as NamedGroupConf; 1888 | segs.forEach((key: string, index: number) => { 1889 | if (typeof conf[key] === 'boolean' && conf[key] === false) { 1890 | // ignore current group 1891 | } else { 1892 | hasGroup = true; 1893 | const groupItem = groups[index]; 1894 | if (Array.isArray(conf[key])) { 1895 | overrideGroups.push(groupItem); 1896 | overrideValues.push(conf[key] as string[]); 1897 | } else { 1898 | filterGroups.push(groupItem); 1899 | } 1900 | } 1901 | }); 1902 | } 1903 | if (!hasGroup) { 1904 | throw new Error( 1905 | `the specified named group '${captureName}' are all filtered by the config.`, 1906 | ); 1907 | } else { 1908 | const overrideItemNum = overrideGroups.length; 1909 | if (overrideItemNum) { 1910 | // use override array 1911 | const index = makeRandom( 1912 | 0, 1913 | overrideItemNum + filterGroups.length - 1, 1914 | ); 1915 | if (index < overrideItemNum) { 1916 | segNamedGroup = overrideGroups[index]; 1917 | segNamedValue = overrideValues[index]; 1918 | } 1919 | } 1920 | } 1921 | } 1922 | } 1923 | if ( 1924 | captureName && 1925 | namedGroupConf && 1926 | namedGroupConf[captureName] && 1927 | (Array.isArray(namedGroupConf[captureName]) || segNamedGroup) 1928 | ) { 1929 | let namedGroup: string[]; 1930 | let curRule: RegExp; 1931 | if (!segNamedGroup) { 1932 | namedGroup = namedGroupConf[captureName] as string[]; 1933 | curRule = this.buildRule(flags); 1934 | } else { 1935 | namedGroup = segNamedValue; 1936 | curRule = this.buildRule.call(segNamedGroup, flags); 1937 | } 1938 | const index = makeRandom(0, namedGroup.length - 1); 1939 | result = namedGroup[index]; 1940 | if (!curRule.test(result)) { 1941 | throw new Error( 1942 | `the namedGroupConf of ${captureName}'s value "${result}" is not match the rule ${curRule.toString()}`, 1943 | ); 1944 | } 1945 | } else { 1946 | const lastGroups = filterGroups.length ? filterGroups : groups; 1947 | const index = makeRandom(0, lastGroups.length - 1); 1948 | const group = lastGroups[index]; 1949 | result = group.build(conf); 1950 | } 1951 | if (captureName) { 1952 | conf.namedGroupData[captureName] = result; 1953 | } 1954 | if (captureIndex) { 1955 | conf.captureGroupData[captureIndex] = result; 1956 | } 1957 | return result; 1958 | } 1959 | } 1960 | --------------------------------------------------------------------------------